docs: reorder links in README.md

Add Kairos LocalAI example to the links (#115 )
docs: Add contributors (#113 )
2026-05-23 08:10:48 -04:00 · 2023-04-28 13:54:11 +02:00 · 2023-04-28 13:52:17 +02:00 · 2023-04-28 10:54:39 +02:00 · 2023-04-27 16:45:24 +00:00 · 2023-04-27 17:46:14 +02:00
42 changed files with 1803 additions and 556 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,3 @@
+ARG GO_VERSION=1.20
+FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
+RUN apt-get update && apt-get install -y cmake
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,46 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
+{
+	"name": "Existing Docker Compose (Extend)",
+
+	// Update the 'dockerComposeFile' list if you have more compose files or use different names.
+	// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
+	"dockerComposeFile": [
+		"../docker-compose.yaml",
+		"docker-compose.yml"
+	],
+
+	// The 'service' property is the name of the service for the container that VS Code should
+	// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
+	"service": "api",
+
+	// The optional 'workspaceFolder' property is the path VS Code should open by default when
+	// connected. This is typically a file mount in .devcontainer/docker-compose.yml
+	"workspaceFolder": "/workspace",
+
+	"features": {
+		"ghcr.io/devcontainers/features/go:1": {},
+		"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
+	},
+
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+
+	// Uncomment the next line if you want start specific services in your Docker Compose config.
+	// "runServices": [],
+
+	// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
+	// "shutdownAction": "none",
+
+	// Uncomment the next line to run commands after the container is created.
+	"postCreateCommand": "make prepare"
+
+	// Configure tool-specific properties.
+	// "customizations": {},
+
+	// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "devcontainer"
+}
--- a/.devcontainer/docker-compose.yml
+++ b/.devcontainer/docker-compose.yml
@@ -0,0 +1,26 @@
+version: '3.6'
+services:
+  # Update this to the name of the service you want to work with in your docker-compose.yml file
+  api:
+    # Uncomment if you want to override the service's Dockerfile to one in the .devcontainer 
+    # folder. Note that the path of the Dockerfile and context is relative to the *primary* 
+    # docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
+    # array). The sample below assumes your primary file is in the root of your project.
+    #
+    build:
+      context: .
+      dockerfile: .devcontainer/Dockerfile
+
+    volumes:
+      # Update this to wherever you want VS Code to mount the folder of your project
+      - .:/workspace:cached
+
+    # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
+    # cap_add:
+    #   - SYS_PTRACE
+    # security_opt:
+    #   - seccomp:unconfined
+
+    # Overrides default command so things don't shut down after the process ends.
+    command: /bin/sh -c "while sleep 1000; do :; done"
+ 
--- a/.dockerignore
+++ b/.dockerignore
@@ -1 +1,2 @@
 models
+examples/chatbot-ui/models
--- a/.env
+++ b/.env
@@ -1,4 +1,5 @@
-THREADS=14
-CONTEXT_SIZE=512
+# THREADS=14
+# CONTEXT_SIZE=512
 MODELS_PATH=/models
-# DEBUG=true
+# DEBUG=true
+# BUILD_TYPE=generic
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -0,0 +1,44 @@
+---
+name: 'tests'
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+    tags:
+      - '*'
+
+jobs:
+  ubuntu-latest:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v3
+        with: 
+          submodules: true
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential
+      - name: Test
+        run: |
+          make test
+
+  macOS-latest:
+    runs-on: macOS-latest
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v3
+        with: 
+          submodules: true
+
+      - name: Dependencies
+        run: |
+          brew update
+          brew install sdl2
+      - name: Test
+        run: |
+          make test
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,14 @@
 # go-llama build artifacts
 go-llama
 go-gpt4all-j
+go-gpt2

 # LocalAI build binary
 LocalAI
 local-ai
+# prevent above rules from omitting the helm chart
+!charts/*

 # Ignore models
-models/*.bin
-models/ggml-*
+models/*
+test-models/
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -1,16 +1,20 @@
 {
    "version": "0.2.0",
    "configurations": [
-    
-    {
-        "name": "Launch Go",
-        "type": "go",
-        "request": "launch",
-        "mode": "debug",
-        "program": "${workspaceFolder}/main.go",
-        "args": [
-            "api"
-        ]
-    }
+        {
+            "name": "Launch Go",
+            "type": "go",
+            "request": "launch",
+            "mode": "debug",
+            "program": "${workspaceFolder}/main.go",
+            "args": [
+                "api"
+            ],
+            "env": {
+                "C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "DEBUG": "true"
+            }
+        }
    ]
-}
+}
--- a/6
+++ b/6
@@ -1,12 +1,14 @@
 ARG GO_VERSION=1.20
 ARG DEBIAN_VERSION=11
+ARG BUILD_TYPE=
+
 FROM golang:$GO_VERSION as builder
 WORKDIR /build
 RUN apt-get update && apt-get install -y cmake
 COPY . .
-ARG BUILD_TYPE=
-RUN make build${BUILD_TYPE}
+RUN make build

 FROM debian:$DEBIAN_VERSION
 COPY --from=builder /build/local-ai /usr/bin/local-ai
+EXPOSE 8080
 ENTRYPOINT [ "/usr/bin/local-ai" ]
--- a/95
+++ b/95
@@ -2,9 +2,12 @@ GOCMD=go
 GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
-GOLLAMA_VERSION?=llama.cpp-5ecff35
-GOGPT4ALLJ_VERSION?=1f548782d80d48b9a0fac33aae6f129358787bc0
-GOGPT2_VERSION?=1c24f5b86ac428cd5e81dae1f1427b1463bd2b06
+# renovate: datasource=github-tags depName=go-skynet/go-llama.cpp
+GOLLAMA_VERSION?=llama.cpp-25d7abb
+# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt4all-j.cpp currentValueTemplate=master depNameTemplate=go-gpt4all-j.cpp
+GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
+# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt2.cpp currentValueTemplate=master depNameTemplate=go-gpt2.cpp
+GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa

 GREEN  := $(shell tput -Txterm setaf 2)
 YELLOW := $(shell tput -Txterm setaf 3)
@@ -12,6 +15,20 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)

+C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
+LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
+
+# Use this if you want to set the default behavior
+ifndef BUILD_TYPE
+	BUILD_TYPE:=default
+endif
+
+ifeq ($(BUILD_TYPE), "generic")
+	GENERIC_PREFIX:=generic-
+else
+	GENERIC_PREFIX:=
+endif
+
 .PHONY: all test build vendor

 all: help
@@ -19,15 +36,18 @@ all: help
 ## Build:

 build: prepare ## Build the project
-	C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp $(GOCMD) build -o $(BINARY_NAME) ./
+	$(info ${GREEN}I local-ai build info:${RESET})
+	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./

-buildgeneric: prepare-generic ## Build the project
-	C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp $(GOCMD) build -o $(BINARY_NAME) ./
+generic-build: ## Build the project using generic
+	BUILD_TYPE="generic" $(MAKE) build

 ## GPT4ALL-J
 go-gpt4all-j:
-	git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j && cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
-# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
+	git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
+	cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
+	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
 	@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@@ -38,58 +58,55 @@ go-gpt4all-j:
 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +

 go-gpt4all-j/libgptj.a: go-gpt4all-j
-	$(MAKE) -C go-gpt4all-j libgptj.a
-
-go-gpt4all-j/libgptj.a-generic: go-gpt4all-j
-	$(MAKE) -C go-gpt4all-j generic-libgptj.a
+	$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a

 # CEREBRAS GPT
-go-gpt2.cpp:
-	git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2.cpp && cd go-gpt2.cpp && git checkout -b build $(GOGPT2_VERSION)
-# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
-	@find ./go-gpt2.cpp -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
+go-gpt2:
+	git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
+	cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION)
+	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
+	@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +

-go-gpt2.cpp/libgpt2.a: go-gpt2.cpp
-	$(MAKE) -C go-gpt2.cpp libgpt2.a
-
-go-gpt2.cpp/libgpt2.a-generic: go-gpt2.cpp
-	$(MAKE) -C go-gpt2.cpp generic-libgpt2.a
+go-gpt2/libgpt2.a: go-gpt2
+	$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
+	

 go-llama:
 	git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
-	$(MAKE) -C go-llama libbinding.a

-go-llama-generic:
-	git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
-	$(MAKE) -C go-llama generic-libbinding.a
+go-llama/libbinding.a: go-llama
+	$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a

 replace:
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2.cpp
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2

-prepare: go-llama go-gpt4all-j/libgptj.a go-gpt2.cpp/libgpt2.a replace
-
-prepare-generic: go-llama-generic go-gpt4all-j/libgptj.a-generic go-gpt2.cpp/libgpt2.a-generic replace
+prepare: go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a replace

 clean: ## Remove build related file
 	rm -fr ./go-llama
 	rm -rf ./go-gpt4all-j
-	rm -rf ./go-gpt2.cpp
+	rm -rf ./go-gpt2
 	rm -rf $(BINARY_NAME)

 ## Run:
 run: prepare
-	$(GOCMD) run ./ api
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go

-## Test:
-test: ## Run the tests of the project
-	$(GOTEST) -v -race ./... $(OUTPUT_OPTIONS)
+test-models/testmodel:
+	mkdir test-models
+	wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
+	cp tests/fixtures/* test-models
+
+test: prepare test-models/testmodel
+	cp tests/fixtures/* test-models
+	@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v -timeout 20m ./...

 ## Help:
 help: ## Show this help.
--- a/README.md
+++ b/README.md
@@ -5,9 +5,11 @@
 <br>
 </h1>

-> :warning: This project has been renamed from `llama-cli` to `LocalAI` to reflect the fact that we are focusing on a fast drop-in OpenAI API rather on the CLI interface. We think that there are already many projects that can be used as a CLI interface already, for instance  [llama.cpp](https://github.com/ggerganov/llama.cpp) and [gpt4all](https://github.com/nomic-ai/gpt4all). If you are were using `llama-cli` for CLI interactions and want to keep using it, use older versions or please open up an issue - contributions are welcome!
+[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml) [![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)

-LocalAI is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.
+[![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy) 
+
+**LocalAI** is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.

 - OpenAI compatible API
 - Supports multiple-models
@@ -15,6 +17,17 @@ LocalAI is a straightforward, drop-in replacement API compatible with OpenAI for
 - Support for prompt templates
 - Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).

+LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
+
+### Socials and community chatter
+- Follow [@LocalAI_API](https://twitter.com/LocalAI_API) on twitter.
+
+- [Reddit post](https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/) about LocalAI.
+
+- [Hacker news post](https://news.ycombinator.com/item?id=35726934) - help us out by voting if you like this project.
+
+- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters.
+
 ## Model compatibility

 It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) supports also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all) and [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml).
@@ -43,6 +56,9 @@ git clone https://github.com/go-skynet/LocalAI

 cd LocalAI

+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
 # copy your models to models/
 cp your-model.bin models/

@@ -50,7 +66,7 @@ cp your-model.bin models/
 # vim .env

 # start with docker-compose
-docker compose up -d --build
+docker-compose up -d --build

 # Now API is accessible at localhost:8080
 curl http://localhost:8080/v1/models
@@ -63,15 +79,62 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
   }'
 ```

+### Example: Use GPT4ALL-J model
+
+<details>
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# Use a template from the examples
+cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
+
+# (optional) Edit the .env file to set things like context size and threads
+# vim .env
+
+# start with docker-compose
+docker-compose up -d --build
+
+# Now API is accessible at localhost:8080
+curl http://localhost:8080/v1/models
+# {"object":"list","data":[{"id":"ggml-gpt4all-j","object":"model"}]}
+
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+     "model": "ggml-gpt4all-j",
+     "messages": [{"role": "user", "content": "How are you?"}],
+     "temperature": 0.9 
+   }'
+
+# {"model":"ggml-gpt4all-j","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]}
+```
+</details>
+
+To build locally, run `make build` (see below).
+
+## Other examples
+
+![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
+
+To see other examples on how to integrate with other projects for instance chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).
+
 ## Prompt templates 

 The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.

 <details>
-You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
+You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibling file, `foo.bin.tmpl` which will be used as a default prompt and can be used with alpaca:

 ```
-Below is an instruction that describes a task. Write a response that appropriately completes the request.
+The below instruction describes a task. Write a response that appropriately completes the request.

 ### Instruction:
 {{.Input}}
@@ -79,7 +142,37 @@ Below is an instruction that describes a task. Write a response that appropriate
 ### Response:
 ```

-See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for most popular models.
+See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for some of the most popular models.
+
+</details>
+
+## Installation
+
+Currently LocalAI comes as container images and can be used with docker or a containre engine of choice. 
+
+### Run LocalAI in Kubernetes
+
+LocalAI can be installed inside Kubernetes with helm.
+
+<details>
+The local-ai Helm chart supports two options for the LocalAI server's models directory:
+1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
+
+    Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
+
+2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
+
+    First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
+    ```bash
+    helm install local-ai charts/local-ai -n local-ai --create-namespace
+    ```
+    Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
+
+    Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
+    ```bash
+    helm upgrade local-ai -n local-ai charts/local-ai
+    ```
+    This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.

 </details>

@@ -94,7 +187,7 @@ Example of starting the API with `docker`:
 docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:latest --models-path /path/to/models --context-size 700 --threads 4
 ```

-And you'll see:
+You should see:
 ```
 ┌───────────────────────────────────────────────────┐ 
 │                   Fiber v2.42.0                   │ 
@@ -120,6 +213,8 @@ The API takes takes the following parameters:
 | threads      | THREADS              | Number of Physical cores     | The number of threads to use for text generation. |
 | address      | ADDRESS              | :8080         | The address and port to listen on. |
 | context-size | CONTEXT_SIZE         | 512           | Default token context size. |
+| debug | DEBUG         | false           | Enable debug mode. |
+| config-file | CONFIG_FILE         | empty           | Path to a LocalAI config file. |

 Once the server is running, you can start making requests to it using HTTP, using the OpenAI API. 

@@ -129,10 +224,16 @@ Once the server is running, you can start making requests to it using HTTP, usin

 You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create). 

-Following the list of endpoints/parameters supported.
+Following the list of endpoints/parameters supported. 
+
+Note:
+
+- You can also specify the model as part of the OpenAI token.
+- If only one model is available, the API will use it for all the requests.

 #### Chat completions

+<details>
 For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:

 ```
@@ -144,10 +245,14 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
 ```

 Available additional parameters: `top_p`, `top_k`, `max_tokens`
+</details>

 #### Completions

-For example, to generate a comletion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
+<details>
+
+To generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as per the request body:
+
 ```
 curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
     "model": "ggml-koala-7b-model-q4_0-r2.bin",
@@ -158,37 +263,94 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d

 Available additional parameters: `top_p`, `top_k`, `max_tokens`

+</details>
+
 #### List models

+<details>
 You can list all the models available with:

 ```
 curl http://localhost:8080/v1/models
 ```

-## Using other models
+</details>

-gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
+## Advanced configuration

-```bash
-wget -O tokenizer.model https://huggingface.co/decapoda-research/llama-30b-hf/resolve/main/tokenizer.model
-mkdir models
-cp gpt4all.. models/
-git clone https://gist.github.com/eiz/828bddec6162a023114ce19146cb2b82
-pip install sentencepiece
-python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
-# There will be a new model with the ".tmp" extension, you have to use that one!
+LocalAI can be configured to serve user-defined models with a set of default parameters and templates.
+
+<details>
+You can create multiple `yaml` files in the models path or either specify a single YAML configuration file.
+
+For instance, a configuration file (`gpt-3.5-turbo.yaml`) can be declaring the "gpt-3.5-turbo" model but backed by the "testmodel" model file:
+
+```yaml
+name: gpt-3.5-turbo
+parameters:
+  model: testmodel
+context_size: 512
+threads: 10
+stopwords:
+- "HUMAN:"
+- "### Response:"
+roles:
+  user: "HUMAN:"
+  system: "GPT:"
+template:
+  completion: completion
+  chat: ggml-gpt4all-j
 ```

-### Windows compatibility
+Specifying a `config-file` via CLI allows to declare models in a single file as a list, for instance:
+
+```yaml
+- name: list1
+  parameters:
+    model: testmodel
+  context_size: 512
+  threads: 10
+  stopwords:
+  - "HUMAN:"
+  - "### Response:"
+  roles:
+    user: "HUMAN:"
+    system: "GPT:"
+  template:
+    completion: completion
+    chat: ggml-gpt4all-j
+- name: list2
+  parameters:
+    model: testmodel
+  context_size: 512
+  threads: 10
+  stopwords:
+  - "HUMAN:"
+  - "### Response:"
+  roles:
+    user: "HUMAN:"
+    system: "GPT:"
+  template:
+    completion: completion
+    chat: ggml-gpt4all-j
+```
+
+See also [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) as an example on how to use config files.
+
+</details>
+
+
+
+## Blog posts and other articles
+
+- https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65
+- https://kairos.io/docs/examples/localai/
+
+## Windows compatibility

 It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2

-### Kubernetes
-
-You can run the API in Kubernetes, see an example deployment in [kubernetes](https://github.com/go-skynet/LocalAI/tree/master/kubernetes)
-
-### Build locally
+## Build locally

 Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.

@@ -206,16 +368,91 @@ Or build the binary with `make`:
 make build
 ```

+## Frequently asked questions
+
+Here are answers to some of the most common questions.
+
+
+### How do I get models? 
+
+<details>
+
+Most ggml-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=ggml, or models from gpt4all should also work: https://github.com/nomic-ai/gpt4all.
+
+</details>
+
+### What's the difference with Serge, or XXX?
+
+
+<details>
+
+LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference,  easy to set up locally and deploy to Kubernetes.
+
+</details>
+
+
+### Can I use it with a Discord bot, or XXX?
+
+<details>
+
+Yes! If the client uses OpenAI and supports setting a different base URL to send requests to, you can use the LocalAI endpoint. This allows to use this with every application that was supposed to work with OpenAI, but without changing the application!
+
+</details>
+
+
+### Can this leverage GPUs? 
+
+<details>
+
+Not currently, as ggml doesn't support GPUs yet: https://github.com/ggerganov/llama.cpp/discussions/915.
+
+</details>
+
+### Where is the webUI? 
+
+<details> 
+We are working on to have a good out of the box experience - however as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
+
+</details>
+
+### Does it work with AutoGPT? 
+
+<details>
+
+AutoGPT currently doesn't allow to set a different API URL, but there is a PR open for it, so this should be possible soon!
+
+</details>
+
+## Projects already using LocalAI to run local models
+
+Feel free to open up a PR to get your project listed!
+
+- [Kairos](https://github.com/kairos-io/kairos)
+- [k8sgpt](https://github.com/k8sgpt-ai/k8sgpt#running-local-models)
+
+## Blog posts and other articles on LocalAI
+
+- https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65
+- https://kairos.io/docs/examples/localai/
+
 ## Short-term roadmap

 - [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)
- Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
- Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
+- [ ] Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
+- [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351) and [gpt4all](https://github.com/go-skynet/LocalAI/issues/85)
 - [x] Multi-model support
- Have a webUI!
+- [x] Have a webUI!
+- [x] Allow configuration of defaults for models.
+- [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models, directly from the webui.
+
+## Star history
+
+[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)

 ## License

+LocalAI is a community-driven project. It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
+
 MIT

 ## Acknowledgements
@@ -224,3 +461,9 @@ MIT
 - https://github.com/tatsu-lab/stanford_alpaca
 - https://github.com/cornelk/llama-go for the initial ideas
 - https://github.com/antimatter15/alpaca.cpp for the light model version (this is compatible and tested only with that checkpoint model!)
+
+## Contributors
+
+<a href="https://github.com/go-skynet/LocalAI/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=go-skynet/LocalAI" />
+</a>
--- a/api/api.go
+++ b/api/api.go
@@ -1,349 +1,25 @@
 package api

 import (
-	"encoding/json"
 	"errors"
-	"fmt"
-	"strings"
-	"sync"

 	model "github.com/go-skynet/LocalAI/pkg/model"
-	gpt2 "github.com/go-skynet/go-gpt2.cpp"
-	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
-	llama "github.com/go-skynet/go-llama.cpp"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
 	"github.com/gofiber/fiber/v2/middleware/recover"
+	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )

-type OpenAIResponse struct {
-	Created int      `json:"created,omitempty"`
-	Object  string   `json:"chat.completion,omitempty"`
-	ID      string   `json:"id,omitempty"`
-	Model   string   `json:"model,omitempty"`
-	Choices []Choice `json:"choices,omitempty"`
-}
-
-type Choice struct {
-	Index        int      `json:"index,omitempty"`
-	FinishReason string   `json:"finish_reason,omitempty"`
-	Message      *Message `json:"message,omitempty"`
-	Text         string   `json:"text,omitempty"`
-}
-
-type Message struct {
-	Role    string `json:"role,omitempty"`
-	Content string `json:"content,omitempty"`
-}
-
-type OpenAIModel struct {
-	ID     string `json:"id"`
-	Object string `json:"object"`
-}
-
-type OpenAIRequest struct {
-	Model string `json:"model"`
-
-	// Prompt is read only by completion API calls
-	Prompt string `json:"prompt"`
-
-	// Messages is read only by chat/completion API calls
-	Messages []Message `json:"messages"`
-
-	Echo bool `json:"echo"`
-	// Common options between all the API calls
-	TopP        float64 `json:"top_p"`
-	TopK        int     `json:"top_k"`
-	Temperature float64 `json:"temperature"`
-	Maxtokens   int     `json:"max_tokens"`
-
-	N int `json:"n"`
-
-	// Custom parameters - not present in the OpenAI API
-	Batch     int  `json:"batch"`
-	F16       bool `json:"f16kv"`
-	IgnoreEOS bool `json:"ignore_eos"`
-
-	Seed int `json:"seed"`
-}
-
-// https://platform.openai.com/docs/api-reference/completions
-func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		var err error
-		var model *llama.LLama
-		var gptModel *gptj.GPTJ
-		var gpt2Model *gpt2.GPT2
-		var stableLMModel *gpt2.StableLM
-
-		input := new(OpenAIRequest)
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-		modelFile := input.Model
-		received, _ := json.Marshal(input)
-
-		log.Debug().Msgf("Request received: %s", string(received))
-
-		// Set model from bearer token, if available
-		bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
-		bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
-		if modelFile == "" && !bearerExists {
-			return fmt.Errorf("no model specified")
-		}
-
-		if bearerExists { // model specified in bearer token takes precedence
-			log.Debug().Msgf("Using model from bearer token: %s", bearer)
-			modelFile = bearer
-		}
-
-		// Try to load the model with both
-		var llamaerr, gpt2err, gptjerr, stableerr error
-		llamaOpts := []llama.ModelOption{}
-		if ctx != 0 {
-			llamaOpts = append(llamaOpts, llama.SetContext(ctx))
-		}
-		if f16 {
-			llamaOpts = append(llamaOpts, llama.EnableF16Memory)
-		}
-
-		// TODO: this is ugly, better identifying the model somehow! however, it is a good stab for a first implementation..
-		model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...)
-		if llamaerr != nil {
-			gptModel, gptjerr = loader.LoadGPTJModel(modelFile)
-			if gptjerr != nil {
-				gpt2Model, gpt2err = loader.LoadGPT2Model(modelFile)
-				if gpt2err != nil {
-					stableLMModel, stableerr = loader.LoadStableLMModel(modelFile)
-					if stableerr != nil {
-						return fmt.Errorf("llama: %s gpt: %s gpt2: %s stableLM: %s", llamaerr.Error(), gptjerr.Error(), gpt2err.Error(), stableerr.Error()) // llama failed first, so we want to catch both errors
-					}
-				}
-			}
-		}
-
-		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
-		mutexMap.Lock()
-		l, ok := mutexes[modelFile]
-		if !ok {
-			m := &sync.Mutex{}
-			mutexes[modelFile] = m
-			l = m
-		}
-		mutexMap.Unlock()
-		l.Lock()
-		defer l.Unlock()
-
-		// Set the parameters for the language model prediction
-		topP := input.TopP
-		if topP == 0 {
-			topP = 0.7
-		}
-		topK := input.TopK
-		if topK == 0 {
-			topK = 80
-		}
-
-		temperature := input.Temperature
-		if temperature == 0 {
-			temperature = 0.9
-		}
-
-		tokens := input.Maxtokens
-		if tokens == 0 {
-			tokens = 512
-		}
-
-		predInput := input.Prompt
-		if chat {
-			mess := []string{}
-			// TODO: encode roles
-			for _, i := range input.Messages {
-				mess = append(mess, i.Content)
-			}
-
-			predInput = strings.Join(mess, "\n")
-		}
-
-		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		templatedInput, err := loader.TemplatePrefix(modelFile, struct {
-			Input string
-		}{Input: predInput})
-		if err == nil {
-			predInput = templatedInput
-			log.Debug().Msgf("Template found, input modified to: %s", predInput)
-		}
-
-		result := []Choice{}
-
-		n := input.N
-
-		if input.N == 0 {
-			n = 1
-		}
-
-		var predFunc func() (string, error)
-		switch {
-		case stableLMModel != nil:
-			predFunc = func() (string, error) {
-				// Generate the prediction using the language model
-				predictOptions := []gpt2.PredictOption{
-					gpt2.SetTemperature(temperature),
-					gpt2.SetTopP(topP),
-					gpt2.SetTopK(topK),
-					gpt2.SetTokens(tokens),
-					gpt2.SetThreads(threads),
-				}
-
-				if input.Batch != 0 {
-					predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
-				}
-
-				if input.Seed != 0 {
-					predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
-				}
-
-				return stableLMModel.Predict(
-					predInput,
-					predictOptions...,
-				)
-			}
-		case gpt2Model != nil:
-			predFunc = func() (string, error) {
-				// Generate the prediction using the language model
-				predictOptions := []gpt2.PredictOption{
-					gpt2.SetTemperature(temperature),
-					gpt2.SetTopP(topP),
-					gpt2.SetTopK(topK),
-					gpt2.SetTokens(tokens),
-					gpt2.SetThreads(threads),
-				}
-
-				if input.Batch != 0 {
-					predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
-				}
-
-				if input.Seed != 0 {
-					predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
-				}
-
-				return gpt2Model.Predict(
-					predInput,
-					predictOptions...,
-				)
-			}
-		case gptModel != nil:
-			predFunc = func() (string, error) {
-				// Generate the prediction using the language model
-				predictOptions := []gptj.PredictOption{
-					gptj.SetTemperature(temperature),
-					gptj.SetTopP(topP),
-					gptj.SetTopK(topK),
-					gptj.SetTokens(tokens),
-					gptj.SetThreads(threads),
-				}
-
-				if input.Batch != 0 {
-					predictOptions = append(predictOptions, gptj.SetBatch(input.Batch))
-				}
-
-				if input.Seed != 0 {
-					predictOptions = append(predictOptions, gptj.SetSeed(input.Seed))
-				}
-
-				return gptModel.Predict(
-					predInput,
-					predictOptions...,
-				)
-			}
-		case model != nil:
-			predFunc = func() (string, error) {
-				// Generate the prediction using the language model
-				predictOptions := []llama.PredictOption{
-					llama.SetTemperature(temperature),
-					llama.SetTopP(topP),
-					llama.SetTopK(topK),
-					llama.SetTokens(tokens),
-					llama.SetThreads(threads),
-				}
-
-				if input.Batch != 0 {
-					predictOptions = append(predictOptions, llama.SetBatch(input.Batch))
-				}
-
-				if input.F16 {
-					predictOptions = append(predictOptions, llama.EnableF16KV)
-				}
-
-				if input.IgnoreEOS {
-					predictOptions = append(predictOptions, llama.IgnoreEOS)
-				}
-
-				if input.Seed != 0 {
-					predictOptions = append(predictOptions, llama.SetSeed(input.Seed))
-				}
-
-				return model.Predict(
-					predInput,
-					predictOptions...,
-				)
-			}
-		}
-
-		for i := 0; i < n; i++ {
-			prediction, err := predFunc()
-			if err != nil {
-				return err
-			}
-
-			if input.Echo {
-				prediction = predInput + prediction
-			}
-
-			if chat {
-				result = append(result, Choice{Message: &Message{Role: "assistant", Content: prediction}})
-			} else {
-				result = append(result, Choice{Text: prediction})
-			}
-		}
-
-		jsonResult, _ := json.Marshal(result)
-		log.Debug().Msgf("Response: %s", jsonResult)
-
-		// Return the prediction in the response body
-		return c.JSON(OpenAIResponse{
-			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
-			Choices: result,
-		})
+func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
+	zerolog.SetGlobalLevel(zerolog.InfoLevel)
+	if debug {
+		zerolog.SetGlobalLevel(zerolog.DebugLevel)
 	}
-}

-func listModels(loader *model.ModelLoader) func(ctx *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		models, err := loader.ListModels()
-		if err != nil {
-			return err
-		}
-
-		dataModels := []OpenAIModel{}
-		for _, m := range models {
-			dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
-		}
-		return c.JSON(struct {
-			Object string        `json:"object"`
-			Data   []OpenAIModel `json:"data"`
-		}{
-			Object: "list",
-			Data:   dataModels,
-		})
-	}
-}
-
-func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f16 bool) error {
 	// Return errors as JSON responses
 	app := fiber.New(fiber.Config{
+		DisableStartupMessage: disableMessage,
 		// Override default error handler
 		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
 			// Status code defaults to 500
@@ -356,31 +32,43 @@ func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f
 			}

 			// Send custom error page
-			return ctx.Status(code).JSON(struct {
-				Error string `json:"error"`
-			}{Error: err.Error()})
+			return ctx.Status(code).JSON(
+				ErrorResponse{
+					Error: &APIError{Message: err.Error(), Code: code},
+				},
+			)
 		},
 	})

+	cm := make(ConfigMerger)
+	if err := cm.LoadConfigs(loader.ModelPath); err != nil {
+		log.Error().Msgf("error loading config files: %s", err.Error())
+	}
+
+	if configFile != "" {
+		if err := cm.LoadConfigFile(configFile); err != nil {
+			log.Error().Msgf("error loading config file: %s", err.Error())
+		}
+	}
+
+	if debug {
+		for k, v := range cm {
+			log.Debug().Msgf("Model: %s (config: %+v)", k, v)
+		}
+	}
 	// Default middleware config
 	app.Use(recover.New())
 	app.Use(cors.New())

-	// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
-	mu := map[string]*sync.Mutex{}
-	var mumutex = &sync.Mutex{}
-
 	// openAI compatible API endpoint
-	app.Post("/v1/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
-	app.Post("/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
+	app.Post("/v1/chat/completions", openAIEndpoint(cm, true, debug, loader, threads, ctxSize, f16))
+	app.Post("/chat/completions", openAIEndpoint(cm, true, debug, loader, threads, ctxSize, f16))

-	app.Post("/v1/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
-	app.Post("/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
+	app.Post("/v1/completions", openAIEndpoint(cm, false, debug, loader, threads, ctxSize, f16))
+	app.Post("/completions", openAIEndpoint(cm, false, debug, loader, threads, ctxSize, f16))

-	app.Get("/v1/models", listModels(loader))
-	app.Get("/models", listModels(loader))
+	app.Get("/v1/models", listModels(loader, cm))
+	app.Get("/models", listModels(loader, cm))

-	// Start the server
-	app.Listen(listenAddr)
-	return nil
+	return app
 }
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -0,0 +1,121 @@
+package api_test
+
+import (
+	"context"
+	"os"
+
+	. "github.com/go-skynet/LocalAI/api"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"github.com/sashabaranov/go-openai"
+)
+
+var _ = Describe("API test", func() {
+
+	var app *fiber.App
+	var modelLoader *model.ModelLoader
+	var client *openai.Client
+	Context("API query", func() {
+		BeforeEach(func() {
+			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
+			app = App("", modelLoader, 1, 512, false, true, true)
+			go app.Listen("127.0.0.1:9090")
+
+			defaultConfig := openai.DefaultConfig("")
+			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
+
+			// Wait for API to be ready
+			client = openai.NewClientWithConfig(defaultConfig)
+			Eventually(func() error {
+				_, err := client.ListModels(context.TODO())
+				return err
+			}, "2m").ShouldNot(HaveOccurred())
+		})
+		AfterEach(func() {
+			app.Shutdown()
+		})
+		It("returns the models list", func() {
+			models, err := client.ListModels(context.TODO())
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(models.Models)).To(Equal(3))
+			Expect(models.Models[0].ID).To(Equal("testmodel"))
+		})
+		It("can generate completions", func() {
+			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
+		})
+
+		It("can generate chat completions ", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
+		})
+
+		It("can generate completions from model configs", func() {
+			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
+		})
+
+		It("can generate chat completions from model configs", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
+		})
+
+		It("returns errors", func() {
+			_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
+			Expect(err).To(HaveOccurred())
+			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: llama: model does not exist"))
+		})
+
+	})
+
+	Context("Config file", func() {
+		BeforeEach(func() {
+			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
+			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true)
+			go app.Listen("127.0.0.1:9090")
+
+			defaultConfig := openai.DefaultConfig("")
+			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
+
+			// Wait for API to be ready
+			client = openai.NewClientWithConfig(defaultConfig)
+			Eventually(func() error {
+				_, err := client.ListModels(context.TODO())
+				return err
+			}, "2m").ShouldNot(HaveOccurred())
+		})
+		AfterEach(func() {
+			app.Shutdown()
+		})
+		It("can generate chat completions from config file", func() {
+
+			models, err := client.ListModels(context.TODO())
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(models.Models)).To(Equal(5))
+			Expect(models.Models[0].ID).To(Equal("testmodel"))
+		})
+		It("can generate chat completions from config file", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
+		})
+		It("can generate chat completions from config file", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
+		})
+	})
+})
--- a/api/apt_suite_test.go
+++ b/api/apt_suite_test.go
@@ -0,0 +1,13 @@
+package api_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestLocalAI(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "LocalAI test suite")
+}
--- a/api/config.go
+++ b/api/config.go
@@ -0,0 +1,100 @@
+package api
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"gopkg.in/yaml.v3"
+)
+
+type Config struct {
+	OpenAIRequest  `yaml:"parameters"`
+	Name           string            `yaml:"name"`
+	StopWords      []string          `yaml:"stopwords"`
+	Cutstrings     []string          `yaml:"cutstrings"`
+	TrimSpace      []string          `yaml:"trimspace"`
+	ContextSize    int               `yaml:"context_size"`
+	F16            bool              `yaml:"f16"`
+	Threads        int               `yaml:"threads"`
+	Debug          bool              `yaml:"debug"`
+	Roles          map[string]string `yaml:"roles"`
+	TemplateConfig TemplateConfig    `yaml:"template"`
+}
+
+type TemplateConfig struct {
+	Completion string `yaml:"completion"`
+	Chat       string `yaml:"chat"`
+}
+
+type ConfigMerger map[string]Config
+
+func ReadConfigFile(file string) ([]*Config, error) {
+	c := &[]*Config{}
+	f, err := os.ReadFile(file)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read config file: %w", err)
+	}
+	if err := yaml.Unmarshal(f, c); err != nil {
+		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
+	}
+
+	return *c, nil
+}
+
+func ReadConfig(file string) (*Config, error) {
+	c := &Config{}
+	f, err := os.ReadFile(file)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read config file: %w", err)
+	}
+	if err := yaml.Unmarshal(f, c); err != nil {
+		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
+	}
+
+	return c, nil
+}
+
+func (cm ConfigMerger) LoadConfigFile(file string) error {
+	c, err := ReadConfigFile(file)
+	if err != nil {
+		return fmt.Errorf("cannot load config file: %w", err)
+	}
+
+	for _, cc := range c {
+		cm[cc.Name] = *cc
+	}
+	return nil
+}
+
+func (cm ConfigMerger) LoadConfig(file string) error {
+	c, err := ReadConfig(file)
+	if err != nil {
+		return fmt.Errorf("cannot read config file: %w", err)
+	}
+
+	cm[c.Name] = *c
+	return nil
+}
+
+func (cm ConfigMerger) LoadConfigs(path string) error {
+	files, err := ioutil.ReadDir(path)
+	if err != nil {
+		return err
+	}
+
+	for _, file := range files {
+		// Skip templates, YAML and .keep files
+		if !strings.Contains(file.Name(), ".yaml") {
+			continue
+		}
+		c, err := ReadConfig(filepath.Join(path, file.Name()))
+		if err == nil {
+			cm[c.Name] = *c
+		}
+	}
+
+	return nil
+}
--- a/api/openai.go
+++ b/api/openai.go
@@ -0,0 +1,396 @@
+package api
+
+import (
+	"bufio"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"sync"
+
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+	"github.com/valyala/fasthttp"
+)
+
+// APIError provides error information returned by the OpenAI API.
+type APIError struct {
+	Code    any     `json:"code,omitempty"`
+	Message string  `json:"message"`
+	Param   *string `json:"param,omitempty"`
+	Type    string  `json:"type"`
+}
+
+type ErrorResponse struct {
+	Error *APIError `json:"error,omitempty"`
+}
+
+type OpenAIResponse struct {
+	Created int      `json:"created,omitempty"`
+	Object  string   `json:"object,omitempty"`
+	ID      string   `json:"id,omitempty"`
+	Model   string   `json:"model,omitempty"`
+	Choices []Choice `json:"choices,omitempty"`
+}
+
+type Choice struct {
+	Index        int      `json:"index,omitempty"`
+	FinishReason string   `json:"finish_reason,omitempty"`
+	Message      *Message `json:"message,omitempty"`
+	Delta        *Message `json:"delta,omitempty"`
+	Text         string   `json:"text,omitempty"`
+}
+
+type Message struct {
+	Role    string `json:"role,omitempty" yaml:"role"`
+	Content string `json:"content,omitempty" yaml:"content"`
+}
+
+type OpenAIModel struct {
+	ID     string `json:"id"`
+	Object string `json:"object"`
+}
+
+type OpenAIRequest struct {
+	Model string `json:"model" yaml:"model"`
+
+	// Prompt is read only by completion API calls
+	Prompt string `json:"prompt" yaml:"prompt"`
+
+	Stop string `json:"stop" yaml:"stop"`
+
+	// Messages is read only by chat/completion API calls
+	Messages []Message `json:"messages" yaml:"messages"`
+
+	Stream bool `json:"stream"`
+	Echo   bool `json:"echo"`
+	// Common options between all the API calls
+	TopP        float64 `json:"top_p" yaml:"top_p"`
+	TopK        int     `json:"top_k" yaml:"top_k"`
+	Temperature float64 `json:"temperature" yaml:"temperature"`
+	Maxtokens   int     `json:"max_tokens" yaml:"max_tokens"`
+
+	N int `json:"n"`
+
+	// Custom parameters - not present in the OpenAI API
+	Batch         int     `json:"batch" yaml:"batch"`
+	F16           bool    `json:"f16" yaml:"f16"`
+	IgnoreEOS     bool    `json:"ignore_eos" yaml:"ignore_eos"`
+	RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
+	Keep          int     `json:"n_keep" yaml:"n_keep"`
+
+	Seed int `json:"seed" yaml:"seed"`
+}
+
+func defaultRequest(modelFile string) OpenAIRequest {
+	return OpenAIRequest{
+		TopP:        0.7,
+		TopK:        80,
+		Maxtokens:   512,
+		Temperature: 0.9,
+		Model:       modelFile,
+	}
+}
+
+func updateConfig(config *Config, input *OpenAIRequest) {
+	if input.Echo {
+		config.Echo = input.Echo
+	}
+	if input.TopK != 0 {
+		config.TopK = input.TopK
+	}
+	if input.TopP != 0 {
+		config.TopP = input.TopP
+	}
+
+	if input.Temperature != 0 {
+		config.Temperature = input.Temperature
+	}
+
+	if input.Maxtokens != 0 {
+		config.Maxtokens = input.Maxtokens
+	}
+
+	if input.Stop != "" {
+		config.StopWords = append(config.StopWords, input.Stop)
+	}
+
+	if input.RepeatPenalty != 0 {
+		config.RepeatPenalty = input.RepeatPenalty
+	}
+
+	if input.Keep != 0 {
+		config.Keep = input.Keep
+	}
+
+	if input.Batch != 0 {
+		config.Batch = input.Batch
+	}
+
+	if input.F16 {
+		config.F16 = input.F16
+	}
+
+	if input.IgnoreEOS {
+		config.IgnoreEOS = input.IgnoreEOS
+	}
+
+	if input.Seed != 0 {
+		config.Seed = input.Seed
+	}
+}
+
+var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
+var mu sync.Mutex = sync.Mutex{}
+
+// https://platform.openai.com/docs/api-reference/completions
+func openAIEndpoint(cm ConfigMerger, chat, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+
+		input := new(OpenAIRequest)
+		// Get input data from the request body
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		if input.Stream {
+			log.Debug().Msgf("Stream request received")
+			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
+			c.Set("Content-Type", "text/event-stream; charset=utf-8")
+			c.Set("Cache-Control", "no-cache")
+			c.Set("Connection", "keep-alive")
+			c.Set("Transfer-Encoding", "chunked")
+		}
+
+		modelFile := input.Model
+		received, _ := json.Marshal(input)
+
+		log.Debug().Msgf("Request received: %s", string(received))
+
+		// Set model from bearer token, if available
+		bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
+		bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
+
+		// If no model was specified, take the first available
+		if modelFile == "" && !bearerExists {
+			models, _ := loader.ListModels()
+			if len(models) > 0 {
+				modelFile = models[0]
+				log.Debug().Msgf("No model specified, using: %s", modelFile)
+			} else {
+				log.Debug().Msgf("No model specified, returning error")
+				return fmt.Errorf("no model specified")
+			}
+		}
+
+		// If a model is found in bearer token takes precedence
+		if bearerExists {
+			log.Debug().Msgf("Using model from bearer token: %s", bearer)
+			modelFile = bearer
+		}
+
+		// Load a config file if present after the model name
+		modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
+		if _, err := os.Stat(modelConfig); err == nil {
+			if err := cm.LoadConfig(modelConfig); err != nil {
+				return fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
+			}
+		}
+
+		var config *Config
+		cfg, exists := cm[modelFile]
+		if !exists {
+			config = &Config{
+				OpenAIRequest: defaultRequest(modelFile),
+			}
+		} else {
+			config = &cfg
+		}
+
+		// Set the parameters for the language model prediction
+		updateConfig(config, input)
+
+		if threads != 0 {
+			config.Threads = threads
+		}
+		if ctx != 0 {
+			config.ContextSize = ctx
+		}
+		if f16 {
+			config.F16 = true
+		}
+
+		if debug {
+			config.Debug = true
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+
+		predInput := input.Prompt
+		if chat {
+			mess := []string{}
+			for _, i := range input.Messages {
+				r := config.Roles[i.Role]
+				if r == "" {
+					r = i.Role
+				}
+
+				content := fmt.Sprint(r, " ", i.Content)
+				mess = append(mess, content)
+			}
+
+			predInput = strings.Join(mess, "\n")
+		}
+
+		templateFile := config.Model
+		if config.TemplateConfig.Chat != "" && chat {
+			templateFile = config.TemplateConfig.Chat
+		}
+
+		if config.TemplateConfig.Completion != "" && !chat {
+			templateFile = config.TemplateConfig.Completion
+		}
+
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+		templatedInput, err := loader.TemplatePrefix(templateFile, struct {
+			Input string
+		}{Input: predInput})
+		if err == nil {
+			predInput = templatedInput
+			log.Debug().Msgf("Template found, input modified to: %s", predInput)
+		}
+
+		result := []Choice{}
+
+		n := input.N
+
+		if input.N == 0 {
+			n = 1
+		}
+
+		// get the model function to call for the result
+		predFunc, err := ModelInference(predInput, loader, *config)
+		if err != nil {
+			return err
+		}
+
+		finetunePrediction := func(prediction string) string {
+			if config.Echo {
+				prediction = predInput + prediction
+			}
+
+			for _, c := range config.Cutstrings {
+				mu.Lock()
+				reg, ok := cutstrings[c]
+				if !ok {
+					cutstrings[c] = regexp.MustCompile(c)
+					reg = cutstrings[c]
+				}
+				mu.Unlock()
+				prediction = reg.ReplaceAllString(prediction, "")
+			}
+
+			for _, c := range config.TrimSpace {
+				prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
+			}
+			return prediction
+		}
+
+		for i := 0; i < n; i++ {
+			prediction, err := predFunc()
+			if err != nil {
+				return err
+			}
+
+			prediction = finetunePrediction(prediction)
+
+			if chat {
+				if input.Stream {
+					result = append(result, Choice{Delta: &Message{Role: "assistant", Content: prediction}})
+				} else {
+					result = append(result, Choice{Message: &Message{Role: "assistant", Content: prediction}})
+				}
+			} else {
+				result = append(result, Choice{Text: prediction})
+			}
+		}
+
+		resp := &OpenAIResponse{
+			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Choices: result,
+		}
+		if input.Stream && chat {
+			resp.Object = "chat.completion.chunk"
+		} else if chat {
+			resp.Object = "chat.completion"
+		} else {
+			resp.Object = "text_completion"
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		if input.Stream {
+			log.Debug().Msgf("Handling stream request")
+			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
+				fmt.Fprintf(w, "event: data\n")
+				w.Flush()
+
+				fmt.Fprintf(w, "data: %s\n\n", jsonResult)
+				w.Flush()
+
+				fmt.Fprintf(w, "event: data\n")
+				w.Flush()
+
+				resp := &OpenAIResponse{
+					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+					Choices: []Choice{Choice{FinishReason: "stop"}},
+				}
+				respData, _ := json.Marshal(resp)
+
+				fmt.Fprintf(w, "data: %s\n\n", respData)
+				w.Flush()
+
+				//	fmt.Fprintf(w, "data: [DONE]\n\n")
+				//		w.Flush()
+			}))
+			return nil
+		} else {
+			// Return the prediction in the response body
+			return c.JSON(resp)
+		}
+	}
+}
+
+func listModels(loader *model.ModelLoader, cm ConfigMerger) func(ctx *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		models, err := loader.ListModels()
+		if err != nil {
+			return err
+		}
+		var mm map[string]interface{} = map[string]interface{}{}
+
+		dataModels := []OpenAIModel{}
+		for _, m := range models {
+			mm[m] = nil
+			dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
+		}
+
+		for k := range cm {
+			if _, exists := mm[k]; !exists {
+				dataModels = append(dataModels, OpenAIModel{ID: k, Object: "model"})
+			}
+		}
+
+		return c.JSON(struct {
+			Object string        `json:"object"`
+			Data   []OpenAIModel `json:"data"`
+		}{
+			Object: "list",
+			Data:   dataModels,
+		})
+	}
+}
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -0,0 +1,188 @@
+package api
+
+import (
+	"fmt"
+	"sync"
+
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	gpt2 "github.com/go-skynet/go-gpt2.cpp"
+	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
+	llama "github.com/go-skynet/go-llama.cpp"
+)
+
+// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
+var mutexMap sync.Mutex
+var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
+
+func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (string, error), error) {
+	var model *llama.LLama
+	var gptModel *gptj.GPTJ
+	var gpt2Model *gpt2.GPT2
+	var stableLMModel *gpt2.StableLM
+
+	modelFile := c.Model
+
+	// Try to load the model
+	var llamaerr, gpt2err, gptjerr, stableerr error
+	llamaOpts := []llama.ModelOption{}
+	if c.ContextSize != 0 {
+		llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize))
+	}
+	if c.F16 {
+		llamaOpts = append(llamaOpts, llama.EnableF16Memory)
+	}
+
+	// TODO: this is ugly, better identifying the model somehow! however, it is a good stab for a first implementation..
+	model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...)
+	if llamaerr != nil {
+		gptModel, gptjerr = loader.LoadGPTJModel(modelFile)
+		if gptjerr != nil {
+			gpt2Model, gpt2err = loader.LoadGPT2Model(modelFile)
+			if gpt2err != nil {
+				stableLMModel, stableerr = loader.LoadStableLMModel(modelFile)
+				if stableerr != nil {
+					return nil, fmt.Errorf("llama: %s gpt: %s gpt2: %s stableLM: %s", llamaerr.Error(), gptjerr.Error(), gpt2err.Error(), stableerr.Error()) // llama failed first, so we want to catch both errors
+				}
+			}
+		}
+	}
+
+	var fn func() (string, error)
+
+	switch {
+	case stableLMModel != nil:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return stableLMModel.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case gpt2Model != nil:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return gpt2Model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case gptModel != nil:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gptj.PredictOption{
+				gptj.SetTemperature(c.Temperature),
+				gptj.SetTopP(c.TopP),
+				gptj.SetTopK(c.TopK),
+				gptj.SetTokens(c.Maxtokens),
+				gptj.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gptj.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gptj.SetSeed(c.Seed))
+			}
+
+			return gptModel.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case model != nil:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []llama.PredictOption{
+				llama.SetTemperature(c.Temperature),
+				llama.SetTopP(c.TopP),
+				llama.SetTopK(c.TopK),
+				llama.SetTokens(c.Maxtokens),
+				llama.SetThreads(c.Threads),
+			}
+
+			if c.Debug {
+				predictOptions = append(predictOptions, llama.Debug)
+			}
+
+			predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
+
+			if c.RepeatPenalty != 0 {
+				predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
+			}
+
+			if c.Keep != 0 {
+				predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
+			}
+
+			if c.F16 {
+				predictOptions = append(predictOptions, llama.EnableF16KV)
+			}
+
+			if c.IgnoreEOS {
+				predictOptions = append(predictOptions, llama.IgnoreEOS)
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	}
+
+	return func() (string, error) {
+		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
+		mutexMap.Lock()
+		l, ok := mutexes[modelFile]
+		if !ok {
+			m := &sync.Mutex{}
+			mutexes[modelFile] = m
+			l = m
+		}
+		mutexMap.Unlock()
+		l.Lock()
+		defer l.Unlock()
+
+		return fn()
+	}, nil
+}
--- a/charts/local-ai/Chart.yaml
+++ b/charts/local-ai/Chart.yaml
@@ -0,0 +1,6 @@
+apiVersion: v2
+appVersion: 0.1.0
+description: A Helm chart for LocalAI
+name: local-ai
+type: application
+version: 1.0.0
--- a/charts/local-ai/templates/_helpers.tpl
+++ b/charts/local-ai/templates/_helpers.tpl
@@ -0,0 +1,44 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "local-ai.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "local-ai.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "local-ai.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "local-ai.labels" -}}
+helm.sh/chart: {{ include "local-ai.chart" . }}
+app.kubernetes.io/name: {{ include "local-ai.name" . }}
+app.kubernetes.io/instance: "{{ .Release.Name }}"
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+{{- end }}
--- a/charts/local-ai/templates/data-volume.yaml
+++ b/charts/local-ai/templates/data-volume.yaml
@@ -0,0 +1,39 @@
+{{- if .Values.dataVolume.enabled }}
+apiVersion: cdi.kubevirt.io/v1beta1
+kind: DataVolume
+metadata:
+  name: {{ template "local-ai.fullname" . }}
+  namespace: {{ .Release.Namespace | quote }}
+  labels:
+    {{- include "local-ai.labels" . | nindent 4 }}
+spec:
+  contentType: archive
+  source:
+    {{ .Values.dataVolume.source.type }}:
+      url: {{ .Values.dataVolume.source.url }}
+      secretRef: {{ template "local-ai.fullname" . }}
+      {{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
+      secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
+      {{- end }}
+      {{- if .Values.dataVolume.source.caCertConfigMap }}
+      caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
+      {{- end }}
+  pvc:
+    accessModes: {{ .Values.dataVolume.pvc.accessModes }}
+    resources:
+      requests:
+        storage: {{ .Values.dataVolume.pvc.size }}
+---
+{{- if .Values.dataVolume.secret.enabled }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ template "local-ai.fullname" . }}
+  namespace: {{ .Release.Namespace | quote }}
+  labels:
+    {{- include "local-ai.labels" . | nindent 4 }}
+data:
+  accessKeyId: {{ .Values.dataVolume.secret.username }}
+  secretKey: {{ .Values.dataVolume.secret.password }}
+{{- end }}
+{{- end }}
--- a/charts/local-ai/templates/deployment.yaml
+++ b/charts/local-ai/templates/deployment.yaml
@@ -0,0 +1,39 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ template "local-ai.fullname" . }}
+  namespace: {{ .Release.Namespace | quote }}
+  labels:
+    {{- include "local-ai.labels" . | nindent 4 }}
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: {{ include "local-ai.name" . }}
+      app.kubernetes.io/instance: {{ .Release.Name }}
+  replicas: 1
+  template:
+    metadata:
+      name: {{ template "local-ai.fullname" . }}
+      labels:
+        app.kubernetes.io/name: {{ include "local-ai.name" . }}
+        app.kubernetes.io/instance: {{ .Release.Name }}
+    spec:
+      containers:
+        - name: {{ template "local-ai.fullname" . }}
+          image: {{ .Values.deployment.image }}
+          env:
+          - name: THREADS
+            value: {{ .Values.deployment.env.threads | quote }}
+          - name: CONTEXT_SIZE
+            value: {{ .Values.deployment.env.contextSize | quote }}
+          - name: MODELS_PATH
+            value: {{ .Values.deployment.env.modelsPath }}
+{{- if .Values.deployment.volume.enabled }}
+          volumeMounts:
+          - mountPath: {{ .Values.deployment.env.modelsPath }}
+            name: models
+      volumes:
+      - name: models
+        persistentVolumeClaim:
+          claimName: {{ template "local-ai.fullname" . }}
+{{- end }}
--- a/charts/local-ai/templates/service.yaml
+++ b/charts/local-ai/templates/service.yaml
@@ -0,0 +1,19 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "local-ai.fullname" . }}
+  namespace: {{ .Release.Namespace | quote }}
+  labels:
+    {{- include "local-ai.labels" . | nindent 4 }}
+{{- if .Values.service.annotations }}
+  annotations:
+  {{ toYaml .Values.service.annotations | indent 4 }}
+{{- end }}
+spec:
+  selector:
+    app.kubernetes.io/name: {{ include "local-ai.name" . }}
+  type: "{{ .Values.service.type }}"
+  ports:
+    - protocol: TCP
+      port: 8080
+      targetPort: 8080
--- a/charts/local-ai/values.yaml
+++ b/charts/local-ai/values.yaml
@@ -0,0 +1,38 @@
+deployment:
+  image: quay.io/go-skynet/local-ai:latest
+  env:
+    threads: 14
+    contextSize: 512
+    modelsPath: "/models"
+  volume:
+    enabled: false
+
+service:
+  type: ClusterIP
+  annotations: {}
+  # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
+  # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
+
+# Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
+# (requires https://github.com/kubevirt/containerized-data-importer)
+dataVolume:
+  enabled: false
+  source:
+    type: "http" # Source type. One of: [ http | s3 ]
+    url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
+
+    # CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
+    # and a base64 encoded pem certificate
+    caCertConfigMap: ""
+
+    # SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
+    # that may include sensitive information. Only applicable for the http source type.
+    secretExtraHeaders: []
+  pvc:
+    accessModes:
+    - ReadWriteOnce
+    size: 5Gi
+  secret:
+    enabled: false
+    username: "" # base64 encoded
+    password: "" # base64 encoded
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -6,14 +6,10 @@ services:
    build:
      context: .
      dockerfile: Dockerfile
-      # args:
-        # BUILD_TYPE: generic # Uncomment to build CPU generic code that works on most HW
    ports:
      - 8080:8080
-    environment:
-      - MODELS_PATH=$MODELS_PATH
-      - CONTEXT_SIZE=$CONTEXT_SIZE
-      - THREADS=$THREADS
-      - DEBUG=$DEBUG
+    env_file:
+      - .env
    volumes:
-      - ./models:/models:cached
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
--- a/examples/README.md
+++ b/examples/README.md
@@ -0,0 +1,11 @@
+# Examples
+
+Here is a list of projects that can easily be integrated with the LocalAI backend. 
+
+## Projects
+
+- [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/) (by [@mkellerman](https://github.com/mkellerman))
+
+## Want to contribute?
+
+Create an issue, and put `Example: <description>` in the title! We will post your examples here.
--- a/examples/chatbot-ui/README.md
+++ b/examples/chatbot-ui/README.md
@@ -0,0 +1,26 @@
+# chatbot-ui
+
+Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
+
+![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
+
+## Setup
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/chatbot-ui
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+Open http://localhost:3000 for the Web UI.
+
--- a/examples/chatbot-ui/docker-compose.yaml
+++ b/examples/chatbot-ui/docker-compose.yaml
@@ -0,0 +1,24 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
+
+  chatgpt:
+    image: ghcr.io/mckaywrigley/chatbot-ui:main
+    ports:
+      - 3000:3000
+    environment:
+      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+      - 'OPENAI_API_HOST=http://api:8080'
--- a/examples/chatbot-ui/models/completion.tmpl
+++ b/examples/chatbot-ui/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
--- a/examples/chatbot-ui/models/gpt-3.5-turbo.yaml
+++ b/examples/chatbot-ui/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,17 @@
+name: gpt-3.5-turbo
+parameters:
+  model: ggml-gpt4all-j
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+threads: 14
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+template:
+  completion: completion
+  chat: gpt4all
--- a/examples/chatbot-ui/models/gpt4all.tmpl
+++ b/examples/chatbot-ui/models/gpt4all.tmpl
@@ -0,0 +1,4 @@
+The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+### Prompt:
+{{.Input}}
+### Response:
--- a/go.mod
+++ b/go.mod
@@ -3,41 +3,52 @@ module github.com/go-skynet/LocalAI
 go 1.19

 require (
-	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4
-	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640
-	github.com/gofiber/fiber/v2 v2.42.0
+	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
+	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04
+	github.com/gofiber/fiber/v2 v2.44.0
 	github.com/jaypipes/ghw v0.10.0
+	github.com/onsi/ginkgo/v2 v2.9.2
+	github.com/onsi/gomega v1.27.6
 	github.com/rs/zerolog v1.29.1
-	github.com/urfave/cli/v2 v2.25.0
+	github.com/sashabaranov/go-openai v1.9.0
+	github.com/urfave/cli/v2 v2.25.1
 )

 require (
 	github.com/StackExchange/wmi v1.2.1 // indirect
-	github.com/andybalholm/brotli v1.0.4 // indirect
+	github.com/andybalholm/brotli v1.0.5 // indirect
 	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
 	github.com/ghodss/yaml v1.0.0 // indirect
+	github.com/go-logr/logr v1.2.3 // indirect
 	github.com/go-ole/go-ole v1.2.6 // indirect
+	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
+	github.com/google/go-cmp v0.5.9 // indirect
+	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
 	github.com/google/uuid v1.3.0 // indirect
 	github.com/jaypipes/pcidb v1.0.0 // indirect
-	github.com/klauspost/compress v1.15.9 // indirect
+	github.com/klauspost/compress v1.16.3 // indirect
 	github.com/kr/text v0.2.0 // indirect
 	github.com/mattn/go-colorable v0.1.13 // indirect
-	github.com/mattn/go-isatty v0.0.17 // indirect
+	github.com/mattn/go-isatty v0.0.18 // indirect
 	github.com/mattn/go-runewidth v0.0.14 // indirect
 	github.com/mitchellh/go-homedir v1.1.0 // indirect
-	github.com/philhofer/fwd v1.1.1 // indirect
+	github.com/philhofer/fwd v1.1.2 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
 	github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
-	github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d // indirect
-	github.com/tinylib/msgp v1.1.6 // indirect
+	github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
+	github.com/tinylib/msgp v1.1.8 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
-	github.com/valyala/fasthttp v1.44.0 // indirect
+	github.com/valyala/fasthttp v1.45.0 // indirect
 	github.com/valyala/tcplisten v1.0.0 // indirect
 	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
-	golang.org/x/sys v0.6.0 // indirect
+	golang.org/x/net v0.8.0 // indirect
+	golang.org/x/sys v0.7.0 // indirect
+	golang.org/x/text v0.8.0 // indirect
+	golang.org/x/tools v0.7.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
 	howett.net/plist v1.0.0 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -2,30 +2,52 @@ github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDO
 github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
 github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
 github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
+github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
+github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
+github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
 github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
+github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
 github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
 github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4 h1:GkGuqnhDFKlCsT6Bo8sdY00A7rFXCzfU1nBOSS4ZnYM=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
+github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ6JsAm9Q1A8I8j9YfNy10bmIfwOiyGyU5wQ=
+github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94 h1:rtrrMvlIq+g0/ltXjDdLeNtz0uc4wJ4Qs15GFU4ba4c=
 github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640 h1:8SSVbQ3yvq7JnfLCLF4USV0PkQnnduUkaNCv/hHDa3E=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
+github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
+github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead h1:C+lcH1srw+c0qPDx1WF8zjGiiOqoPxVICt7bI1sj5cM=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04 h1:NPiv7mcIjU71MhEv3v4RRWKSBNXnnCyu6VX4CaaHz2I=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
+github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
 github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
+github.com/gofiber/fiber/v2 v2.44.0 h1:Z90bEvPcJM5GFJnu1py0E1ojoerkyew3iiNJ78MQCM8=
+github.com/gofiber/fiber/v2 v2.44.0/go.mod h1:VTMtb/au8g01iqvHyaCzftuM/xmZgKOZCtFzz6CdV9w=
+github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
+github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
 github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
 github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8=
@@ -33,6 +55,8 @@ github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLR
 github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
 github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
 github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
+github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
+github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
 github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
@@ -43,16 +67,24 @@ github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27k
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
 github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98=
+github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
 github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
 github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
 github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
+github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
 github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
+github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
 github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
 github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
+github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
+github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
@@ -60,68 +92,106 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
 github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
 github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/sashabaranov/go-openai v1.9.0 h1:NoiO++IISxxJ1pRc0n7uZvMGMake0G+FJ1XPwXtprsA=
+github.com/sashabaranov/go-openai v1.9.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
+github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk=
+github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
 github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
+github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
+github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
 github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
 github.com/urfave/cli/v2 v2.25.0/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
+github.com/urfave/cli/v2 v2.25.1 h1:zw8dSP7ghX0Gmm8vugrs6q9Ku0wzweqPyshy+syu9Gw=
+github.com/urfave/cli/v2 v2.25.1/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasthttp v1.44.0 h1:R+gLUhldIsfg1HokMuQjdQ5bh9nuXHPIfvkYUu9eR5Q=
 github.com/valyala/fasthttp v1.44.0/go.mod h1:f6VbjjoI3z1NDOZOv17o6RvtRSWxC77seBFc2uWtgiY=
+github.com/valyala/fasthttp v1.45.0 h1:zPkkzpIn8tdHZUrVa6PzYd0i5verqiPSkgTd3bSUcpA=
+github.com/valyala/fasthttp v1.45.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
 github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
 github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
+golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
 golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
+golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
+golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
+golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
 golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
+golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
 gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
 howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
--- a/kubernetes/data-volume.yaml
+++ b/kubernetes/data-volume.yaml
@@ -1,28 +0,0 @@
-# Create a PVC containing a model binary, sourced from an arbitrary HTTP server
-# (requires https://github.com/kubevirt/containerized-data-importer)
-apiVersion: cdi.kubevirt.io/v1beta1
-kind: DataVolume
-metadata:
-  name: models
-  namespace: local-ai
-spec:
-  contentType: archive
-  source:
-    http:
-      url: http://<model_server>/koala-7B-4bit-128g.GGML.tar
-      secretRef: model-secret
-  pvc:
-    accessModes:
-    - ReadWriteOnce
-    resources:
-      requests:
-        storage: 5Gi
---
-apiVersion: v1
-kind: Secret
-metadata:
-  name: model-secret
-  namespace: local-ai
-data:
-  accessKeyId: <model_server_username_base64_encoded>
-  secretKey: <model_server_password_base64_encoded>
--- a/kubernetes/deployment.yaml
+++ b/kubernetes/deployment.yaml
@@ -1,57 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: local-ai
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: local-ai
-  namespace: local-ai
-  labels:
-    app: local-ai
-spec:
-  selector:
-    matchLabels:
-      app: local-ai
-  replicas: 1
-  template:
-    metadata:
-      labels:
-        app: local-ai
-      name: local-ai
-    spec:
-      containers:
-        - name: local-ai
-          image: quay.io/go-skynet/local-ai:latest
-          env:
-          - name: THREADS
-            value: "14"
-          - name: CONTEXT_SIZE
-            value: "512"
-          - name: MODELS_PATH
-            value: /models
-          volumeMounts:
-          - mountPath: /models
-            name: models
-      volumes:
-      - name: models
-        persistentVolumeClaim:
-          claimName: models
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: local-ai
-  namespace: local-ai
-  # If using AWS, you'll need to override the default 60s load balancer idle timeout
-  # annotations:
-  #   service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
-spec:
-  selector:
-    app: local-ai
-  type: LoadBalancer
-  ports:
-    - protocol: TCP
-      port: 8080
-      targetPort: 8080
--- a/main.go
+++ b/main.go
@@ -50,6 +50,11 @@ func main() {
 				EnvVars:     []string{"MODELS_PATH"},
 				Value:       path,
 			},
+			&cli.StringFlag{
+				Name:        "config-file",
+				DefaultText: "Config file",
+				EnvVars:     []string{"CONFIG_FILE"},
+			},
 			&cli.StringFlag{
 				Name:        "address",
 				DefaultText: "Bind address for the API server.",
@@ -80,11 +85,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
 		UsageText: `local-ai [options]`,
 		Copyright: "go-skynet authors",
 		Action: func(ctx *cli.Context) error {
-			zerolog.SetGlobalLevel(zerolog.InfoLevel)
-			if ctx.Bool("debug") {
-				zerolog.SetGlobalLevel(zerolog.DebugLevel)
-			}
-			return api.Start(model.NewModelLoader(ctx.String("models-path")), ctx.String("address"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"))
+			return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
 		},
 	}

--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -18,7 +18,7 @@ import (
 )

 type ModelLoader struct {
-	modelPath string
+	ModelPath string
 	mu        sync.Mutex

 	models            map[string]*llama.LLama
@@ -31,7 +31,7 @@ type ModelLoader struct {

 func NewModelLoader(modelPath string) *ModelLoader {
 	return &ModelLoader{
-		modelPath:         modelPath,
+		ModelPath:         modelPath,
 		gpt2models:        make(map[string]*gpt2.GPT2),
 		gptmodels:         make(map[string]*gptj.GPTJ),
 		gptstablelmmodels: make(map[string]*gpt2.StableLM),
@@ -41,12 +41,12 @@ func NewModelLoader(modelPath string) *ModelLoader {
 }

 func (ml *ModelLoader) ExistsInModelPath(s string) bool {
-	_, err := os.Stat(filepath.Join(ml.modelPath, s))
+	_, err := os.Stat(filepath.Join(ml.ModelPath, s))
 	return err == nil
 }

 func (ml *ModelLoader) ListModels() ([]string, error) {
-	files, err := ioutil.ReadDir(ml.modelPath)
+	files, err := ioutil.ReadDir(ml.ModelPath)
 	if err != nil {
 		return []string{}, err
 	}
@@ -70,7 +70,19 @@ func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string,

 	m, ok := ml.promptsTemplates[modelName]
 	if !ok {
-		return "", fmt.Errorf("no prompt template available")
+		modelFile := filepath.Join(ml.ModelPath, modelName)
+		if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
+			return "", err
+		}
+
+		t, exists := ml.promptsTemplates[modelName]
+		if exists {
+			m = t
+		}
+
+	}
+	if m == nil {
+		return "", nil
 	}

 	var buf bytes.Buffer
@@ -88,14 +100,14 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
 	}

 	// Check if the model path exists
-	// skip any error here - we run anyway if a template is not exist
+	// skip any error here - we run anyway if a template does not exist
 	modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName)

 	if !ml.ExistsInModelPath(modelTemplateFile) {
 		return nil
 	}

-	dat, err := os.ReadFile(filepath.Join(ml.modelPath, modelTemplateFile))
+	dat, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile))
 	if err != nil {
 		return err
 	}
@@ -125,7 +137,7 @@ func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, erro
 	}

 	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.modelPath, modelName)
+	modelFile := filepath.Join(ml.ModelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

 	model, err := gpt2.NewStableLM(modelFile)
@@ -164,7 +176,7 @@ func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
 	}

 	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.modelPath, modelName)
+	modelFile := filepath.Join(ml.ModelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

 	model, err := gpt2.New(modelFile)
@@ -207,7 +219,7 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
 	}

 	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.modelPath, modelName)
+	modelFile := filepath.Join(ml.ModelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

 	model, err := gptj.New(modelFile)
@@ -256,7 +268,7 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
 	}

 	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.modelPath, modelName)
+	modelFile := filepath.Join(ml.ModelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

 	model, err := llama.New(modelFile, opts...)
--- a/renovate.json
+++ b/renovate.json
@@ -0,0 +1,17 @@
+{
+  "$schema": "https://docs.renovatebot.com/renovate-schema.json",
+  "extends": [
+    "config:base"
+  ],
+  "regexManagers": [
+    {
+      "fileMatch": [
+        "^Makefile$"
+      ],
+      "matchStrings": [
+        "#\\s*renovate:\\s*datasource=(?<datasource>.*?) depName=(?<depName>.*?)( datasourceTemplate=(?<datasourceTemplate>.*?))?( packageNameTemplate=(?<packageNameTemplate>.*?))?( depNameTemplate=(?<depNameTemplate>.*?))?( valueTemplate=(?<currentValueTemplate>.*?))?( versioning=(?<versioning>.*?))?\\s+.+_VERSION=(?<currentValue>.*?)\\s"
+      ],
+      "versioningTemplate": "{{#if versioning}}{{versioning}}{{/if}}"
+    }
+  ]
+}
--- a/tests/fixtures/completion.tmpl
+++ b/tests/fixtures/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
--- a/tests/fixtures/config.yaml
+++ b/tests/fixtures/config.yaml
@@ -0,0 +1,28 @@
+- name: list1
+  parameters:
+    model: testmodel
+  context_size: 512
+  threads: 10
+  stopwords:
+  - "HUMAN:"
+  - "### Response:"
+  roles:
+    user: "HUMAN:"
+    system: "GPT:"
+  template:
+    completion: completion
+    chat: ggml-gpt4all-j
+- name: list2
+  parameters:
+    model: testmodel
+  context_size: 512
+  threads: 10
+  stopwords:
+  - "HUMAN:"
+  - "### Response:"
+  roles:
+    user: "HUMAN:"
+    system: "GPT:"
+  template:
+    completion: completion
+    chat: ggml-gpt4all-j
--- a/tests/fixtures/ggml-gpt4all-j.tmpl
+++ b/tests/fixtures/ggml-gpt4all-j.tmpl
@@ -0,0 +1,4 @@
+The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+### Prompt:
+{{.Input}}
+### Response:
--- a/tests/fixtures/gpt4.yaml
+++ b/tests/fixtures/gpt4.yaml
@@ -0,0 +1,14 @@
+name: gpt4all
+parameters:
+  model: testmodel
+context_size: 512
+threads: 10
+stopwords:
+- "HUMAN:"
+- "### Response:"
+roles:
+  user: "HUMAN:"
+  system: "GPT:"
+template:
+  completion: completion
+  chat: ggml-gpt4all-j
--- a/tests/fixtures/gpt4_2.yaml
+++ b/tests/fixtures/gpt4_2.yaml
@@ -0,0 +1,14 @@
+name: gpt4all-2
+parameters:
+  model: testmodel
+context_size: 1024
+threads: 5
+stopwords:
+- "HUMAN:"
+- "### Response:"
+roles:
+  user: "HUMAN:"
+  system: "GPT:"
+template:
+  completion: completion
+  chat: ggml-gpt4all-j
Author	SHA1	Message	Date
Ettore Di Giacinto	bd7c2ff110	docs: reorder links in README.md	2023-04-28 13:54:11 +02:00
Mauro Morales	d1d55d29a0	Add Kairos LocalAI example to the links (#115 )	2023-04-28 13:52:17 +02:00
Ettore Di Giacinto	e07dba7ad6	docs: Add contributors (#113 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-28 10:54:39 +02:00
Matthieu Talbot	062f832510	Add EXPOSE to Dockerfile (#107 )	2023-04-27 16:45:24 +00:00
Ettore Di Giacinto	d0330bb64b	docs: update example README.md (#104 )	2023-04-27 17:46:14 +02:00
antongisli	91a23ec6ec	Anton readme (#99 ) Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-04-27 17:17:03 +02:00
Ron Evans	0b000dd043	examples: correct typo in README (#103 ) Signed-off-by: deadprogram <ron@hybridgroup.com>	2023-04-27 17:14:38 +02:00
Ettore Di Giacinto	c73ba91a66	docs: update README	2023-04-27 15:39:48 +02:00
Ettore Di Giacinto	dfc00f8bc1	docs: update README.md (#98 )	2023-04-27 15:06:55 +02:00
Ettore Di Giacinto	a18ff9c9b3	docs: move api docs (#96 )	2023-04-27 10:42:50 +02:00
Ettore Di Giacinto	d0199279ad	docs: update, add config docs (#94 )	2023-04-27 10:39:01 +02:00
Ettore Di Giacinto	9ede1e12d8	few typos and clarity changes (#91 ) (#92 ) Co-authored-by: antongisli <anton@huge.geek.nz>	2023-04-27 07:47:39 +02:00
Ettore Di Giacinto	c806eae0de	feat: config files and SSE (#83 ) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com>	2023-04-26 21:18:18 -07:00
renovate[bot]	4e2061636e	chore(deps): update actions/checkout action to v3 (#82 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-25 07:46:29 +02:00
renovate[bot]	e3ef171968	fix(deps): update module github.com/gofiber/fiber/v2 to v2.44.0 (#81 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-25 07:46:14 +02:00
Ettore Di Giacinto	12d83a4184	feat: Return OpenAI errors and update docs (#80 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-24 23:42:03 +02:00
renovate[bot]	045412e8dd	fix(deps): update module github.com/urfave/cli/v2 to v2.25.1 (#78 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 18:16:23 +02:00
renovate[bot]	9896a9a58b	fix(deps): update github.com/go-skynet/go-llama.cpp digest to e45cebe (#77 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 18:16:10 +02:00
Ettore Di Giacinto	b9011bda59	feat: automatic updates with renovate, docs updates (#76 )	2023-04-24 18:10:58 +02:00
Ettore Di Giacinto	2b2f5fa36a	feat: update llama.cpp (#72 )	2023-04-24 14:15:49 +02:00
renovate[bot]	43c557dc5c	fix(deps): update github.com/go-skynet/go-gpt4all-j.cpp digest to 1f7bff5 (#74 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 14:14:21 +02:00
renovate[bot]	7abb2c9bd7	fix(deps): update github.com/go-skynet/go-gpt2.cpp digest to 245a5bf (#73 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 14:13:04 +02:00
renovate[bot]	7a9ea4480a	Configure Renovate (#71 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 14:11:39 +02:00
Vladimir Malyutin	31bcc558de	Update README.md (#62 )	2023-04-22 14:42:30 +02:00
Ettore Di Giacinto	676e15f785	fix: make MacOS builds work (#61 )	2023-04-22 11:05:23 +02:00
Marc R Kellerman	3e71c90949	feature: add devcontainer for live debugging (#60 )	2023-04-22 01:20:03 +02:00
Ettore Di Giacinto	550ae9c968	docs: add Discord channel link (#59 )	2023-04-22 00:46:17 +02:00
Ettore Di Giacinto	1c872ec326	feat: add CI/tests (#58 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-22 00:44:52 +02:00
Marc R Kellerman	05f35b182c	fix(makefile): fix go-gpt2 folder and add verification before git clone (#51 ) Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-04-22 00:29:32 +02:00
Ettore Di Giacinto	79791438fe	Use the first available model if not specified (#55 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-21 22:54:43 +02:00
Tyler Gillson	bf20cc34f6	feat: Add helm chart (#56 )	2023-04-21 13:22:03 -07:00
Ettore Di Giacinto	5cba71de70	Add stopwords, debug mode, and other API enhancements (#54 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-21 19:46:59 +02:00
Ettore Di Giacinto	4b7e83056d	Update .env	2023-04-21 01:47:35 +02:00