example: add langchain agent

⬆️ Update go-skynet/go-llama.cpp (#201 )
Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>
2026-02-03 03:02:38 -05:00 · 2023-05-07 00:15:12 +02:00 · 2023-05-06 22:49:44 +02:00 · 2023-05-06 19:18:03 +02:00 · 2023-05-06 19:15:22 +02:00 · 2023-05-06 00:42:31 +02:00
93 changed files with 5507 additions and 711 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,3 @@
+ARG GO_VERSION=1.20
+FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
+RUN apt-get update && apt-get install -y cmake
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,46 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
+{
+	"name": "Existing Docker Compose (Extend)",
+
+	// Update the 'dockerComposeFile' list if you have more compose files or use different names.
+	// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
+	"dockerComposeFile": [
+		"../docker-compose.yaml",
+		"docker-compose.yml"
+	],
+
+	// The 'service' property is the name of the service for the container that VS Code should
+	// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
+	"service": "api",
+
+	// The optional 'workspaceFolder' property is the path VS Code should open by default when
+	// connected. This is typically a file mount in .devcontainer/docker-compose.yml
+	"workspaceFolder": "/workspace",
+
+	"features": {
+		"ghcr.io/devcontainers/features/go:1": {},
+		"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
+	},
+
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+
+	// Uncomment the next line if you want start specific services in your Docker Compose config.
+	// "runServices": [],
+
+	// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
+	// "shutdownAction": "none",
+
+	// Uncomment the next line to run commands after the container is created.
+	"postCreateCommand": "make prepare"
+
+	// Configure tool-specific properties.
+	// "customizations": {},
+
+	// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "devcontainer"
+}
--- a/.devcontainer/docker-compose.yml
+++ b/.devcontainer/docker-compose.yml
@@ -0,0 +1,26 @@
+version: '3.6'
+services:
+  # Update this to the name of the service you want to work with in your docker-compose.yml file
+  api:
+    # Uncomment if you want to override the service's Dockerfile to one in the .devcontainer 
+    # folder. Note that the path of the Dockerfile and context is relative to the *primary* 
+    # docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
+    # array). The sample below assumes your primary file is in the root of your project.
+    #
+    build:
+      context: .
+      dockerfile: .devcontainer/Dockerfile
+
+    volumes:
+      # Update this to wherever you want VS Code to mount the folder of your project
+      - .:/workspace:cached
+
+    # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
+    # cap_add:
+    #   - SYS_PTRACE
+    # security_opt:
+    #   - seccomp:unconfined
+
+    # Overrides default command so things don't shut down after the process ends.
+    command: /bin/sh -c "while sleep 1000; do :; done"
+ 
--- a/.dockerignore
+++ b/.dockerignore
@@ -1 +1,2 @@
 models
+examples/chatbot-ui/models
--- a/.env
+++ b/.env
@@ -1,4 +1,5 @@
-THREADS=14
-CONTEXT_SIZE=512
+# THREADS=14
+# CONTEXT_SIZE=512
 MODELS_PATH=/models
-# DEBUG=true
+# DEBUG=true
+# BUILD_TYPE=generic
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+set -xe
+REPO=$1
+BRANCH=$2
+VAR=$3
+
+LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
+
+sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -0,0 +1,42 @@
+name: Bump dependencies
+on:
+  schedule:
+    - cron: 0 20 * * *
+  workflow_dispatch:
+jobs:
+  bump:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - repository: "go-skynet/go-gpt4all-j.cpp"
+            variable: "GOGPT4ALLJ_VERSION"
+            branch: "master"
+          - repository: "go-skynet/go-llama.cpp"
+            variable: "GOLLAMA_VERSION"
+            branch: "master"
+          - repository: "go-skynet/go-gpt2.cpp"
+            variable: "GOGPT2_VERSION"
+            branch: "master"
+          - repository: "donomii/go-rwkv.cpp"
+            variable: "RWKV_VERSION"
+            branch: "main"
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Bump dependencies 🔧
+        run: |
+          bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v5
+        with:
+          token: ${{ secrets.UPDATE_BOT_TOKEN }}
+          push-to-fork: ci-forks/LocalAI
+          commit-message: ':arrow_up: Update ${{ matrix.repository }}'
+          title: ':arrow_up: Update ${{ matrix.repository }}'
+          branch: "update/${{ matrix.variable }}"
+          body: Bump of ${{ matrix.repository }} version
+          signoff: true
+
+
+
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -54,8 +54,8 @@ jobs:
        uses: docker/login-action@v2
        with:
          registry: quay.io
-          username: ${{ secrets.QUAY_USERNAME }}
-          password: ${{ secrets.QUAY_PASSWORD }}
+          username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+          password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
      - name: Build
        if: github.event_name != 'pull_request'
        uses: docker/build-push-action@v4
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -0,0 +1,44 @@
+---
+name: 'tests'
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+    tags:
+      - '*'
+
+jobs:
+  ubuntu-latest:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v3
+        with: 
+          submodules: true
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential
+      - name: Test
+        run: |
+          make test
+
+  macOS-latest:
+    runs-on: macOS-latest
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v3
+        with: 
+          submodules: true
+
+      - name: Dependencies
+        run: |
+          brew update
+          brew install sdl2
+      - name: Test
+        run: |
+          make test
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,15 @@
 # go-llama build artifacts
 go-llama
 go-gpt4all-j
+go-gpt2
+go-rwkv

 # LocalAI build binary
 LocalAI
 local-ai
+# prevent above rules from omitting the helm chart
+!charts/*

 # Ignore models
-models/*.bin
-models/ggml-*
+models/*
+test-models/
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -1,16 +1,20 @@
 {
    "version": "0.2.0",
    "configurations": [
-    
-    {
-        "name": "Launch Go",
-        "type": "go",
-        "request": "launch",
-        "mode": "debug",
-        "program": "${workspaceFolder}/main.go",
-        "args": [
-            "api"
-        ]
-    }
+        {
+            "name": "Launch Go",
+            "type": "go",
+            "request": "launch",
+            "mode": "debug",
+            "program": "${workspaceFolder}/main.go",
+            "args": [
+                "api"
+            ],
+            "env": {
+                "C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "DEBUG": "true"
+            }
+        }
    ]
-}
+}
--- a/13
+++ b/13
@@ -1,12 +1,9 @@
 ARG GO_VERSION=1.20
-ARG DEBIAN_VERSION=11
-FROM golang:$GO_VERSION as builder
+ARG BUILD_TYPE=
+FROM golang:$GO_VERSION
 WORKDIR /build
 RUN apt-get update && apt-get install -y cmake
 COPY . .
-ARG BUILD_TYPE=
-RUN make build${BUILD_TYPE}
-
-FROM debian:$DEBIAN_VERSION
-COPY --from=builder /build/local-ai /usr/bin/local-ai
-ENTRYPOINT [ "/usr/bin/local-ai" ]
+RUN make prepare-sources
+EXPOSE 8080
+ENTRYPOINT [ "/build/entrypoint.sh" ]
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@@ -0,0 +1,14 @@
+ARG GO_VERSION=1.20
+ARG DEBIAN_VERSION=11
+ARG BUILD_TYPE=
+
+FROM golang:$GO_VERSION as builder
+WORKDIR /build
+RUN apt-get update && apt-get install -y cmake
+COPY . .
+RUN make build
+
+FROM debian:$DEBIAN_VERSION
+COPY --from=builder /build/local-ai /usr/bin/local-ai
+EXPOSE 8080
+ENTRYPOINT [ "/usr/bin/local-ai" ]
--- a/135
+++ b/135
@@ -2,9 +2,12 @@ GOCMD=go
 GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
-GOLLAMA_VERSION?=llama.cpp-5ecff35
-GOGPT4ALLJ_VERSION?=1f548782d80d48b9a0fac33aae6f129358787bc0
-GOGPT2_VERSION?=1c24f5b86ac428cd5e81dae1f1427b1463bd2b06
+
+GOLLAMA_VERSION?=cf9b522db63898dcc5eb86e37c979ab85cbd583e
+GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
+GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
+RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
+RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58

 GREEN  := $(shell tput -Txterm setaf 2)
 YELLOW := $(shell tput -Txterm setaf 3)
@@ -12,22 +15,29 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)

+C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv
+LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv
+
+# Use this if you want to set the default behavior
+ifndef BUILD_TYPE
+	BUILD_TYPE:=default
+endif
+
+ifeq ($(BUILD_TYPE), "generic")
+	GENERIC_PREFIX:=generic-
+else
+	GENERIC_PREFIX:=
+endif
+
 .PHONY: all test build vendor

 all: help

-## Build:
-
-build: prepare ## Build the project
-	C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp $(GOCMD) build -o $(BINARY_NAME) ./
-
-buildgeneric: prepare-generic ## Build the project
-	C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp $(GOCMD) build -o $(BINARY_NAME) ./
-
 ## GPT4ALL-J
 go-gpt4all-j:
-	git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j && cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
-# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
+	git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
+	cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION) && git submodule update --init --recursive --depth 1
+	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
 	@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@@ -37,59 +47,90 @@ go-gpt4all-j:
 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +

+## RWKV
+go-rwkv:
+	git clone --recurse-submodules $(RWKV_REPO) go-rwkv
+	cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
+	@find ./go-rwkv -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
+	@find ./go-rwkv -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
+	@find ./go-rwkv -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
+
+go-rwkv/librwkv.a: go-rwkv
+	cd go-rwkv && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a .. && cp ggml/src/libggml.a ..
+
 go-gpt4all-j/libgptj.a: go-gpt4all-j
-	$(MAKE) -C go-gpt4all-j libgptj.a
+	$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a

-go-gpt4all-j/libgptj.a-generic: go-gpt4all-j
-	$(MAKE) -C go-gpt4all-j generic-libgptj.a
+## CEREBRAS GPT
+go-gpt2: 
+	git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
+	cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
+	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
+	@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +

-# CEREBRAS GPT
-go-gpt2.cpp:
-	git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2.cpp && cd go-gpt2.cpp && git checkout -b build $(GOGPT2_VERSION)
-# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
-	@find ./go-gpt2.cpp -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
-
-go-gpt2.cpp/libgpt2.a: go-gpt2.cpp
-	$(MAKE) -C go-gpt2.cpp libgpt2.a
-
-go-gpt2.cpp/libgpt2.a-generic: go-gpt2.cpp
-	$(MAKE) -C go-gpt2.cpp generic-libgpt2.a
+go-gpt2/libgpt2.a: go-gpt2
+	$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a

 go-llama:
-	git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
-	$(MAKE) -C go-llama libbinding.a
+	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
+	cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1

-go-llama-generic:
-	git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
-	$(MAKE) -C go-llama generic-libbinding.a
+go-llama/libbinding.a: go-llama 
+	$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a

 replace:
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2.cpp
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
+	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv

-prepare: go-llama go-gpt4all-j/libgptj.a go-gpt2.cpp/libgpt2.a replace
+prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv
+	$(GOCMD) mod download

-prepare-generic: go-llama-generic go-gpt4all-j/libgptj.a-generic go-gpt2.cpp/libgpt2.a-generic replace
+## GENERIC
+rebuild: ## Rebuilds the project
+	$(MAKE) -C go-llama clean
+	$(MAKE) -C go-gpt4all-j clean
+	$(MAKE) -C go-gpt2 clean
+	$(MAKE) -C go-rwkv clean
+	$(MAKE) build
+
+prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a go-rwkv/librwkv.a replace ## Prepares for building

 clean: ## Remove build related file
 	rm -fr ./go-llama
 	rm -rf ./go-gpt4all-j
-	rm -rf ./go-gpt2.cpp
+	rm -rf ./go-gpt2
+	rm -rf ./go-rwkv
 	rm -rf $(BINARY_NAME)

-## Run:
-run: prepare
-	$(GOCMD) run ./ api
+## Build:

-## Test:
-test: ## Run the tests of the project
-	$(GOTEST) -v -race ./... $(OUTPUT_OPTIONS)
+build: prepare ## Build the project
+	$(info ${GREEN}I local-ai build info:${RESET})
+	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
+
+generic-build: ## Build the project using generic
+	BUILD_TYPE="generic" $(MAKE) build
+
+## Run
+run: prepare ## run local-ai
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
+
+test-models/testmodel:
+	mkdir test-models
+	wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
+	cp tests/fixtures/* test-models
+
+test: prepare test-models/testmodel
+	cp tests/fixtures/* test-models
+	@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./...

 ## Help:
 help: ## Show this help.
--- a/README.md
+++ b/README.md
@@ -5,15 +5,44 @@
 <br>
 </h1>

-> :warning: This project has been renamed from `llama-cli` to `LocalAI` to reflect the fact that we are focusing on a fast drop-in OpenAI API rather on the CLI interface. We think that there are already many projects that can be used as a CLI interface already, for instance  [llama.cpp](https://github.com/ggerganov/llama.cpp) and [gpt4all](https://github.com/nomic-ai/gpt4all). If you are were using `llama-cli` for CLI interactions and want to keep using it, use older versions or please open up an issue - contributions are welcome!
+[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml) [![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)

-LocalAI is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.
+[![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy) 
+
+**LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is licensed under Apache 2.0.

 - OpenAI compatible API
- Supports multiple-models
+- Supports multiple models
 - Once loaded the first time, it keep models loaded in memory for faster inference
 - Support for prompt templates
- Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).
+- Doesn't shell-out, but uses C bindings for a faster inference and better performance. 
+
+LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
+
+See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/).
+
+## News
+
+- 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
+- 01-05-2023: Support for SSE stream of tokens in `llama.cpp` backends ( https://github.com/go-skynet/LocalAI/pull/152 )
+
+Twitter: [@LocalAI_API](https://twitter.com/LocalAI_API) and [@mudler_it](https://twitter.com/mudler_it)
+
+### Blogs and articles
+
+- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters.
+
+## Contribute and help
+
+To help the project you can:
+
+- Upvote the [Reddit post](https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/) about LocalAI.
+
+- [Hacker news post](https://news.ycombinator.com/item?id=35726934) - help us out by voting if you like this project.
+
+- If you have technological skills and want to contribute to development, have a look at the open issues. If you are new you can have a look at the [good-first-issue](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) and [help-wanted](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels.
+
+- If you don't have technological skills you can still help improving documentation or add examples or share your user-stories with our community, any help and contribution is welcome!

 ## Model compatibility

@@ -22,15 +51,41 @@ It is compatible with the models supported by [llama.cpp](https://github.com/gge
 Tested with:
 - Vicuna
 - Alpaca
- [GPT4ALL](https://github.com/nomic-ai/gpt4all)
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin)
+- [GPT4ALL](https://github.com/nomic-ai/gpt4all) (changes required, see below)
+- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin) (no changes required)
 - Koala
 - [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
+- WizardLM
+- [RWKV](https://github.com/BlinkDL/RWKV-LM) models with [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)

-It should also be compatible with StableLM and GPTNeoX ggml models (untested)
+### GPT4ALL

 Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.

+### RWKV
+
+<details>
+
+A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
+
+Note: rwkv models have an associated tokenizer along that needs to be provided with it:
+
+```
+36464540 -rw-r--r--  1 mudler mudler 1.2G May  3 10:51 rwkv_small
+36464543 -rw-r--r--  1 mudler mudler 2.4M May  3 10:51 rwkv_small.tokenizer.json
+```
+
+</details>
+
+### Others
+
+It should also be compatible with StableLM and GPTNeoX ggml models (untested).
+
+### Hardware requirements
+
+Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
+
+
 ## Usage

 > `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
@@ -43,6 +98,9 @@ git clone https://github.com/go-skynet/LocalAI

 cd LocalAI

+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
 # copy your models to models/
 cp your-model.bin models/

@@ -50,7 +108,7 @@ cp your-model.bin models/
 # vim .env

 # start with docker-compose
-docker compose up -d --build
+docker-compose up -d --build

 # Now API is accessible at localhost:8080
 curl http://localhost:8080/v1/models
@@ -63,15 +121,150 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
   }'
 ```

-## Prompt templates 
+### Example: Use GPT4ALL-J model
+
+<details>
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# Use a template from the examples
+cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
+
+# (optional) Edit the .env file to set things like context size and threads
+# vim .env
+
+# start with docker-compose
+docker-compose up -d --build
+
+# Now API is accessible at localhost:8080
+curl http://localhost:8080/v1/models
+# {"object":"list","data":[{"id":"ggml-gpt4all-j","object":"model"}]}
+
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+     "model": "ggml-gpt4all-j",
+     "messages": [{"role": "user", "content": "How are you?"}],
+     "temperature": 0.9 
+   }'
+
+# {"model":"ggml-gpt4all-j","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]}
+```
+</details>
+
+To build locally, run `make build` (see below).
+
+### Other examples
+
+To see other examples on how to integrate with other projects for instance for question answering or for using it with chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).
+
+
+### Advanced configuration
+
+LocalAI can be configured to serve user-defined models with a set of default parameters and templates.
+
+<details>
+
+You can create multiple `yaml` files in the models path or either specify a single YAML configuration file. 
+Consider the following `models` folder in the `example/chatbot-ui`:
+
+```
+base ❯ ls -liah examples/chatbot-ui/models 
+36487587 drwxr-xr-x 2 mudler mudler 4.0K May  3 12:27 .
+36487586 drwxr-xr-x 3 mudler mudler 4.0K May  3 10:42 ..
+36465214 -rw-r--r-- 1 mudler mudler   10 Apr 27 07:46 completion.tmpl
+36464855 -rw-r--r-- 1 mudler mudler 3.6G Apr 27 00:08 ggml-gpt4all-j
+36464537 -rw-r--r-- 1 mudler mudler  245 May  3 10:42 gpt-3.5-turbo.yaml
+36467388 -rw-r--r-- 1 mudler mudler  180 Apr 27 07:46 gpt4all.tmpl
+```
+
+In the `gpt-3.5-turbo.yaml` file it is defined the `gpt-3.5-turbo` model which is an alias to use `gpt4all-j` with pre-defined options.
+
+For instance, consider the following that declares `gpt-3.5-turbo` backed by the `ggml-gpt4all-j` model:
+
+```yaml
+name: gpt-3.5-turbo
+# Default model parameters
+parameters:
+  # Relative to the models path
+  model: ggml-gpt4all-j
+  # temperature
+  temperature: 0.3
+  # all the OpenAI request options here..
+
+# Default context size
+context_size: 512
+threads: 10
+# Define a backend (optional). By default it will try to guess the backend the first time the model is interacted with.
+backend: gptj # available: llama, stablelm, gpt2, gptj rwkv
+# stopwords (if supported by the backend)
+stopwords:
+- "HUMAN:"
+- "### Response:"
+# define chat roles
+roles:
+  user: "HUMAN:"
+  system: "GPT:"
+template:
+  # template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
+  completion: completion
+  chat: ggml-gpt4all-j
+```
+
+Specifying a `config-file` via CLI allows to declare models in a single file as a list, for instance:
+
+```yaml
+- name: list1
+  parameters:
+    model: testmodel
+  context_size: 512
+  threads: 10
+  stopwords:
+  - "HUMAN:"
+  - "### Response:"
+  roles:
+    user: "HUMAN:"
+    system: "GPT:"
+  template:
+    completion: completion
+    chat: ggml-gpt4all-j
+- name: list2
+  parameters:
+    model: testmodel
+  context_size: 512
+  threads: 10
+  stopwords:
+  - "HUMAN:"
+  - "### Response:"
+  roles:
+    user: "HUMAN:"
+    system: "GPT:"
+  template:
+    completion: completion
+   chat: ggml-gpt4all-j
+```
+
+See also [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) as an example on how to use config files.
+
+</details>
+
+### Prompt templates 

 The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.

 <details>
-You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
+You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibling file, `foo.bin.tmpl` which will be used as a default prompt and can be used with alpaca:

 ```
-Below is an instruction that describes a task. Write a response that appropriately completes the request.
+The below instruction describes a task. Write a response that appropriately completes the request.

 ### Instruction:
 {{.Input}}
@@ -79,13 +272,53 @@ Below is an instruction that describes a task. Write a response that appropriate
 ### Response:
 ```

-See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for most popular models.
+See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for some of the most popular models.
+
+
+For the edit endpoint, an example template for alpaca-based models can be:
+
+```yaml
+Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
+
+### Instruction:
+{{.Instruction}}
+
+### Input:
+{{.Input}}
+
+### Response:
+```

 </details>

-## API
+### CLI

-`LocalAI` provides an API for running text generation as a service, that follows the OpenAI reference and can be used as a drop-in. The models once loaded the first time will be kept in memory.
+You can control LocalAI with command line arguments, to specify a binding address, or the number of threads.
+
+<details>
+
+Usage:
+
+```
+local-ai --models-path <model_path> [--address <address>] [--threads <num_threads>]
+```
+
+| Parameter    | Environment Variable | Default Value | Description                            |
+| ------------ | -------------------- | ------------- | -------------------------------------- |
+| models-path        | MODELS_PATH           |               | The path where you have models (ending with `.bin`).      |
+| threads      | THREADS              | Number of Physical cores     | The number of threads to use for text generation. |
+| address      | ADDRESS              | :8080         | The address and port to listen on. |
+| context-size | CONTEXT_SIZE         | 512           | Default token context size. |
+| debug | DEBUG         | false           | Enable debug mode. |
+| config-file | CONFIG_FILE         | empty           | Path to a LocalAI config file. |
+
+</details>
+
+## Setup
+
+Currently LocalAI comes as a container image and can be used with docker or a container engine of choice. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
+
+### Docker

 <details>
 Example of starting the API with `docker`:
@@ -94,7 +327,7 @@ Example of starting the API with `docker`:
 docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:latest --models-path /path/to/models --context-size 700 --threads 4
 ```

-And you'll see:
+You should see:
 ```
 ┌───────────────────────────────────────────────────┐ 
 │                   Fiber v2.42.0                   │ 
@@ -106,33 +339,136 @@ And you'll see:
 └───────────────────────────────────────────────────┘ 
 ```

-You can control the API server options with command line arguments:
+</details>
+
+### Build locally
+
+<details>
+
+In order to build the `LocalAI` container image locally you can use `docker`:

 ```
-local-api --models-path <model_path> [--address <address>] [--threads <num_threads>]
+# build the image
+docker build -t LocalAI .
+docker run LocalAI
 ```

-The API takes takes the following parameters:
+Or you can build the binary with `make`:

-| Parameter    | Environment Variable | Default Value | Description                            |
-| ------------ | -------------------- | ------------- | -------------------------------------- |
-| models-path        | MODELS_PATH           |               | The path where you have models (ending with `.bin`).      |
-| threads      | THREADS              | Number of Physical cores     | The number of threads to use for text generation. |
-| address      | ADDRESS              | :8080         | The address and port to listen on. |
-| context-size | CONTEXT_SIZE         | 512           | Default token context size. |
-
-Once the server is running, you can start making requests to it using HTTP, using the OpenAI API. 
+```
+make build
+```

 </details>

-### Supported OpenAI API endpoints
+### Build on mac
+
+Building on Mac (M1 or M2) works, but you may need to install some prerequisites using `brew`. 
+
+<details>
+
+The below has been tested by one mac user and found to work. Note that this doesn't use docker to run the server:
+
+```
+# install build dependencies
+brew install cmake
+brew install go
+
+# clone the repo
+git clone https://github.com/go-skynet/LocalAI.git
+
+cd LocalAI
+
+# build the binary
+make build
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# Use a template from the examples
+cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
+
+# Run LocalAI
+./local-ai --models-path ./models/ --debug
+
+# Now API is accessible at localhost:8080
+curl http://localhost:8080/v1/models
+
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+     "model": "ggml-gpt4all-j",
+     "messages": [{"role": "user", "content": "How are you?"}],
+     "temperature": 0.9 
+   }'
+```
+
+</details>
+
+### Windows compatibility
+
+It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
+
+### Run LocalAI in Kubernetes
+
+LocalAI can be installed inside Kubernetes with helm.
+
+<details>
+
+1. Add the helm repo
+    ```bash
+    helm repo add go-skynet https://go-skynet.github.io/helm-charts/
+    ```
+1. Create a values files with your settings:
+```bash
+cat <<EOF > values.yaml
+deployment:
+  image: quay.io/go-skynet/local-ai:latest
+  env:
+    threads: 4
+    contextSize: 1024
+    modelsPath: "/models"
+# Optionally create a PVC, mount the PV to the LocalAI Deployment,
+# and download a model to prepopulate the models directory
+modelsVolume:
+  enabled: true
+  url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
+  pvc:
+    size: 6Gi
+    accessModes:
+    - ReadWriteOnce
+  auth:
+    # Optional value for HTTP basic access authentication header
+    basic: "" # 'username:password' base64 encoded
+service:
+  type: ClusterIP
+  annotations: {}
+  # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
+  # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
+EOF
+```
+3. Install the helm chart:
+```bash
+helm repo update
+helm install local-ai go-skynet/local-ai -f values.yaml
+```
+
+Check out also the [helm chart repository on GitHub](https://github.com/go-skynet/helm-charts).
+
+</details>
+
+## Supported OpenAI API endpoints

 You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create). 

-Following the list of endpoints/parameters supported.
+Following the list of endpoints/parameters supported. 

-#### Chat completions
+Note:

+- You can also specify the model as part of the OpenAI token.
+- If only one model is available, the API will use it for all the requests.
+
+### Chat completions
+
+<details>
 For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:

 ```
@@ -144,10 +480,32 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
 ```

 Available additional parameters: `top_p`, `top_k`, `max_tokens`
+</details>

-#### Completions
+### Edit completions
+
+<details>
+To generate an edit completion you can send a POST request to the `/v1/edits` endpoint with the instruction as the request body:
+
+```
+curl http://localhost:8080/v1/edits -H "Content-Type: application/json" -d '{
+     "model": "ggml-koala-7b-model-q4_0-r2.bin",
+     "instruction": "rephrase",
+     "input": "Black cat jumped out of the window",
+     "temperature": 0.7
+   }'
+```
+
+Available additional parameters: `top_p`, `top_k`, `max_tokens`.
+
+</details>
+
+### Completions
+
+<details>
+
+To generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as per the request body:

-For example, to generate a comletion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
 ```
 curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
     "model": "ggml-koala-7b-model-q4_0-r2.bin",
@@ -158,69 +516,122 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d

 Available additional parameters: `top_p`, `top_k`, `max_tokens`

-#### List models
+</details>

+### List models
+
+<details>
 You can list all the models available with:

 ```
 curl http://localhost:8080/v1/models
 ```

-## Using other models
+</details>

-gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
+## Frequently asked questions

-```bash
-wget -O tokenizer.model https://huggingface.co/decapoda-research/llama-30b-hf/resolve/main/tokenizer.model
-mkdir models
-cp gpt4all.. models/
-git clone https://gist.github.com/eiz/828bddec6162a023114ce19146cb2b82
-pip install sentencepiece
-python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
-# There will be a new model with the ".tmp" extension, you have to use that one!
-```
+Here are answers to some of the most common questions.

-### Windows compatibility

-It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
+### How do I get models? 

-### Kubernetes
+<details>

-You can run the API in Kubernetes, see an example deployment in [kubernetes](https://github.com/go-skynet/LocalAI/tree/master/kubernetes)
+Most ggml-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=ggml, or models from gpt4all should also work: https://github.com/nomic-ai/gpt4all.

-### Build locally
+</details>

-Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
+### What's the difference with Serge, or XXX?

-In order to build the `LocalAI` container image locally you can use `docker`:

-```
-# build the image
-docker build -t LocalAI .
-docker run LocalAI
-```
+<details>

-Or build the binary with `make`:
+LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference,  easy to set up locally and deploy to Kubernetes.

-```
-make build
-```
+</details>
+
+
+### Can I use it with a Discord bot, or XXX?
+
+<details>
+
+Yes! If the client uses OpenAI and supports setting a different base URL to send requests to, you can use the LocalAI endpoint. This allows to use this with every application that was supposed to work with OpenAI, but without changing the application!
+
+</details>
+
+
+### Can this leverage GPUs? 
+
+<details>
+
+Not currently, as ggml doesn't support GPUs yet: https://github.com/ggerganov/llama.cpp/discussions/915.
+
+</details>
+
+### Where is the webUI? 
+
+<details> 
+There is the availability of localai-webui and chatbot-ui in the examples section and can be setup as per the instructions. However as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
+
+</details>
+
+### Does it work with AutoGPT? 
+
+<details>
+
+AutoGPT currently doesn't allow to set a different API URL, but there is a PR open for it, so this should be possible soon!
+
+</details>
+
+## Projects already using LocalAI to run local models
+
+Feel free to open up a PR to get your project listed!
+
+- [Kairos](https://github.com/kairos-io/kairos)
+- [k8sgpt](https://github.com/k8sgpt-ai/k8sgpt#running-local-models)
+
+## Blog posts and other articles
+
+- https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65
+- https://kairos.io/docs/examples/localai/

 ## Short-term roadmap

 - [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)
- Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
- Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
+- [ ] Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
+- [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351) and [gpt4all](https://github.com/go-skynet/LocalAI/issues/85)
 - [x] Multi-model support
- Have a webUI!
+- [x] Have a webUI!
+- [x] Allow configuration of defaults for models.
+- [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models, directly from the webui.
+
+## Star history
+
+[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)

 ## License

+LocalAI is a community-driven project. It was initially created by [Ettore Di Giacinto](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
+
 MIT

+## Golang bindings used
+
+- [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
+- [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp)
+- [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp)
+- [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp)
+
 ## Acknowledgements

 - [llama.cpp](https://github.com/ggerganov/llama.cpp)
 - https://github.com/tatsu-lab/stanford_alpaca
 - https://github.com/cornelk/llama-go for the initial ideas
 - https://github.com/antimatter15/alpaca.cpp for the light model version (this is compatible and tested only with that checkpoint model!)
+
+## Contributors
+
+<a href="https://github.com/go-skynet/LocalAI/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=go-skynet/LocalAI" />
+</a>
--- a/api/api.go
+++ b/api/api.go
@@ -1,349 +1,26 @@
 package api

 import (
-	"encoding/json"
 	"errors"
-	"fmt"
-	"strings"
-	"sync"

 	model "github.com/go-skynet/LocalAI/pkg/model"
-	gpt2 "github.com/go-skynet/go-gpt2.cpp"
-	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
-	llama "github.com/go-skynet/go-llama.cpp"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
+	"github.com/gofiber/fiber/v2/middleware/logger"
 	"github.com/gofiber/fiber/v2/middleware/recover"
+	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )

-type OpenAIResponse struct {
-	Created int      `json:"created,omitempty"`
-	Object  string   `json:"chat.completion,omitempty"`
-	ID      string   `json:"id,omitempty"`
-	Model   string   `json:"model,omitempty"`
-	Choices []Choice `json:"choices,omitempty"`
-}
-
-type Choice struct {
-	Index        int      `json:"index,omitempty"`
-	FinishReason string   `json:"finish_reason,omitempty"`
-	Message      *Message `json:"message,omitempty"`
-	Text         string   `json:"text,omitempty"`
-}
-
-type Message struct {
-	Role    string `json:"role,omitempty"`
-	Content string `json:"content,omitempty"`
-}
-
-type OpenAIModel struct {
-	ID     string `json:"id"`
-	Object string `json:"object"`
-}
-
-type OpenAIRequest struct {
-	Model string `json:"model"`
-
-	// Prompt is read only by completion API calls
-	Prompt string `json:"prompt"`
-
-	// Messages is read only by chat/completion API calls
-	Messages []Message `json:"messages"`
-
-	Echo bool `json:"echo"`
-	// Common options between all the API calls
-	TopP        float64 `json:"top_p"`
-	TopK        int     `json:"top_k"`
-	Temperature float64 `json:"temperature"`
-	Maxtokens   int     `json:"max_tokens"`
-
-	N int `json:"n"`
-
-	// Custom parameters - not present in the OpenAI API
-	Batch     int  `json:"batch"`
-	F16       bool `json:"f16kv"`
-	IgnoreEOS bool `json:"ignore_eos"`
-
-	Seed int `json:"seed"`
-}
-
-// https://platform.openai.com/docs/api-reference/completions
-func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		var err error
-		var model *llama.LLama
-		var gptModel *gptj.GPTJ
-		var gpt2Model *gpt2.GPT2
-		var stableLMModel *gpt2.StableLM
-
-		input := new(OpenAIRequest)
-		// Get input data from the request body
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-		modelFile := input.Model
-		received, _ := json.Marshal(input)
-
-		log.Debug().Msgf("Request received: %s", string(received))
-
-		// Set model from bearer token, if available
-		bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
-		bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
-		if modelFile == "" && !bearerExists {
-			return fmt.Errorf("no model specified")
-		}
-
-		if bearerExists { // model specified in bearer token takes precedence
-			log.Debug().Msgf("Using model from bearer token: %s", bearer)
-			modelFile = bearer
-		}
-
-		// Try to load the model with both
-		var llamaerr, gpt2err, gptjerr, stableerr error
-		llamaOpts := []llama.ModelOption{}
-		if ctx != 0 {
-			llamaOpts = append(llamaOpts, llama.SetContext(ctx))
-		}
-		if f16 {
-			llamaOpts = append(llamaOpts, llama.EnableF16Memory)
-		}
-
-		// TODO: this is ugly, better identifying the model somehow! however, it is a good stab for a first implementation..
-		model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...)
-		if llamaerr != nil {
-			gptModel, gptjerr = loader.LoadGPTJModel(modelFile)
-			if gptjerr != nil {
-				gpt2Model, gpt2err = loader.LoadGPT2Model(modelFile)
-				if gpt2err != nil {
-					stableLMModel, stableerr = loader.LoadStableLMModel(modelFile)
-					if stableerr != nil {
-						return fmt.Errorf("llama: %s gpt: %s gpt2: %s stableLM: %s", llamaerr.Error(), gptjerr.Error(), gpt2err.Error(), stableerr.Error()) // llama failed first, so we want to catch both errors
-					}
-				}
-			}
-		}
-
-		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
-		mutexMap.Lock()
-		l, ok := mutexes[modelFile]
-		if !ok {
-			m := &sync.Mutex{}
-			mutexes[modelFile] = m
-			l = m
-		}
-		mutexMap.Unlock()
-		l.Lock()
-		defer l.Unlock()
-
-		// Set the parameters for the language model prediction
-		topP := input.TopP
-		if topP == 0 {
-			topP = 0.7
-		}
-		topK := input.TopK
-		if topK == 0 {
-			topK = 80
-		}
-
-		temperature := input.Temperature
-		if temperature == 0 {
-			temperature = 0.9
-		}
-
-		tokens := input.Maxtokens
-		if tokens == 0 {
-			tokens = 512
-		}
-
-		predInput := input.Prompt
-		if chat {
-			mess := []string{}
-			// TODO: encode roles
-			for _, i := range input.Messages {
-				mess = append(mess, i.Content)
-			}
-
-			predInput = strings.Join(mess, "\n")
-		}
-
-		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
-		templatedInput, err := loader.TemplatePrefix(modelFile, struct {
-			Input string
-		}{Input: predInput})
-		if err == nil {
-			predInput = templatedInput
-			log.Debug().Msgf("Template found, input modified to: %s", predInput)
-		}
-
-		result := []Choice{}
-
-		n := input.N
-
-		if input.N == 0 {
-			n = 1
-		}
-
-		var predFunc func() (string, error)
-		switch {
-		case stableLMModel != nil:
-			predFunc = func() (string, error) {
-				// Generate the prediction using the language model
-				predictOptions := []gpt2.PredictOption{
-					gpt2.SetTemperature(temperature),
-					gpt2.SetTopP(topP),
-					gpt2.SetTopK(topK),
-					gpt2.SetTokens(tokens),
-					gpt2.SetThreads(threads),
-				}
-
-				if input.Batch != 0 {
-					predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
-				}
-
-				if input.Seed != 0 {
-					predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
-				}
-
-				return stableLMModel.Predict(
-					predInput,
-					predictOptions...,
-				)
-			}
-		case gpt2Model != nil:
-			predFunc = func() (string, error) {
-				// Generate the prediction using the language model
-				predictOptions := []gpt2.PredictOption{
-					gpt2.SetTemperature(temperature),
-					gpt2.SetTopP(topP),
-					gpt2.SetTopK(topK),
-					gpt2.SetTokens(tokens),
-					gpt2.SetThreads(threads),
-				}
-
-				if input.Batch != 0 {
-					predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
-				}
-
-				if input.Seed != 0 {
-					predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
-				}
-
-				return gpt2Model.Predict(
-					predInput,
-					predictOptions...,
-				)
-			}
-		case gptModel != nil:
-			predFunc = func() (string, error) {
-				// Generate the prediction using the language model
-				predictOptions := []gptj.PredictOption{
-					gptj.SetTemperature(temperature),
-					gptj.SetTopP(topP),
-					gptj.SetTopK(topK),
-					gptj.SetTokens(tokens),
-					gptj.SetThreads(threads),
-				}
-
-				if input.Batch != 0 {
-					predictOptions = append(predictOptions, gptj.SetBatch(input.Batch))
-				}
-
-				if input.Seed != 0 {
-					predictOptions = append(predictOptions, gptj.SetSeed(input.Seed))
-				}
-
-				return gptModel.Predict(
-					predInput,
-					predictOptions...,
-				)
-			}
-		case model != nil:
-			predFunc = func() (string, error) {
-				// Generate the prediction using the language model
-				predictOptions := []llama.PredictOption{
-					llama.SetTemperature(temperature),
-					llama.SetTopP(topP),
-					llama.SetTopK(topK),
-					llama.SetTokens(tokens),
-					llama.SetThreads(threads),
-				}
-
-				if input.Batch != 0 {
-					predictOptions = append(predictOptions, llama.SetBatch(input.Batch))
-				}
-
-				if input.F16 {
-					predictOptions = append(predictOptions, llama.EnableF16KV)
-				}
-
-				if input.IgnoreEOS {
-					predictOptions = append(predictOptions, llama.IgnoreEOS)
-				}
-
-				if input.Seed != 0 {
-					predictOptions = append(predictOptions, llama.SetSeed(input.Seed))
-				}
-
-				return model.Predict(
-					predInput,
-					predictOptions...,
-				)
-			}
-		}
-
-		for i := 0; i < n; i++ {
-			prediction, err := predFunc()
-			if err != nil {
-				return err
-			}
-
-			if input.Echo {
-				prediction = predInput + prediction
-			}
-
-			if chat {
-				result = append(result, Choice{Message: &Message{Role: "assistant", Content: prediction}})
-			} else {
-				result = append(result, Choice{Text: prediction})
-			}
-		}
-
-		jsonResult, _ := json.Marshal(result)
-		log.Debug().Msgf("Response: %s", jsonResult)
-
-		// Return the prediction in the response body
-		return c.JSON(OpenAIResponse{
-			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
-			Choices: result,
-		})
+func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
+	zerolog.SetGlobalLevel(zerolog.InfoLevel)
+	if debug {
+		zerolog.SetGlobalLevel(zerolog.DebugLevel)
 	}
-}

-func listModels(loader *model.ModelLoader) func(ctx *fiber.Ctx) error {
-	return func(c *fiber.Ctx) error {
-		models, err := loader.ListModels()
-		if err != nil {
-			return err
-		}
-
-		dataModels := []OpenAIModel{}
-		for _, m := range models {
-			dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
-		}
-		return c.JSON(struct {
-			Object string        `json:"object"`
-			Data   []OpenAIModel `json:"data"`
-		}{
-			Object: "list",
-			Data:   dataModels,
-		})
-	}
-}
-
-func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f16 bool) error {
 	// Return errors as JSON responses
 	app := fiber.New(fiber.Config{
+		DisableStartupMessage: disableMessage,
 		// Override default error handler
 		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
 			// Status code defaults to 500
@@ -356,31 +33,59 @@ func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f
 			}

 			// Send custom error page
-			return ctx.Status(code).JSON(struct {
-				Error string `json:"error"`
-			}{Error: err.Error()})
+			return ctx.Status(code).JSON(
+				ErrorResponse{
+					Error: &APIError{Message: err.Error(), Code: code},
+				},
+			)
 		},
 	})

+	if debug {
+		app.Use(logger.New(logger.Config{
+			Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
+		}))
+	}
+
+	cm := make(ConfigMerger)
+	if err := cm.LoadConfigs(loader.ModelPath); err != nil {
+		log.Error().Msgf("error loading config files: %s", err.Error())
+	}
+
+	if configFile != "" {
+		if err := cm.LoadConfigFile(configFile); err != nil {
+			log.Error().Msgf("error loading config file: %s", err.Error())
+		}
+	}
+
+	if debug {
+		for k, v := range cm {
+			log.Debug().Msgf("Model: %s (config: %+v)", k, v)
+		}
+	}
 	// Default middleware config
 	app.Use(recover.New())
 	app.Use(cors.New())

-	// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
-	mu := map[string]*sync.Mutex{}
-	var mumutex = &sync.Mutex{}
-
 	// openAI compatible API endpoint
-	app.Post("/v1/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
-	app.Post("/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
+	app.Post("/v1/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))

-	app.Post("/v1/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
-	app.Post("/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
+	app.Post("/v1/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))

-	app.Get("/v1/models", listModels(loader))
-	app.Get("/models", listModels(loader))
+	app.Post("/v1/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))

-	// Start the server
-	app.Listen(listenAddr)
-	return nil
+	app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
+
+	// /v1/engines/{engine_id}/embeddings
+
+	app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
+
+	app.Get("/v1/models", listModels(loader, cm))
+	app.Get("/models", listModels(loader, cm))
+
+	return app
 }
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -0,0 +1,138 @@
+package api_test
+
+import (
+	"context"
+	"os"
+
+	. "github.com/go-skynet/LocalAI/api"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	openaigo "github.com/otiai10/openaigo"
+	"github.com/sashabaranov/go-openai"
+)
+
+var _ = Describe("API test", func() {
+
+	var app *fiber.App
+	var modelLoader *model.ModelLoader
+	var client *openai.Client
+	var client2 *openaigo.Client
+	Context("API query", func() {
+		BeforeEach(func() {
+			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
+			app = App("", modelLoader, 1, 512, false, true, true)
+			go app.Listen("127.0.0.1:9090")
+
+			defaultConfig := openai.DefaultConfig("")
+			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
+
+			client2 = openaigo.NewClient("")
+			client2.BaseURL = defaultConfig.BaseURL
+
+			// Wait for API to be ready
+			client = openai.NewClientWithConfig(defaultConfig)
+			Eventually(func() error {
+				_, err := client.ListModels(context.TODO())
+				return err
+			}, "2m").ShouldNot(HaveOccurred())
+		})
+		AfterEach(func() {
+			app.Shutdown()
+		})
+		It("returns the models list", func() {
+			models, err := client.ListModels(context.TODO())
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(models.Models)).To(Equal(3))
+			Expect(models.Models[0].ID).To(Equal("testmodel"))
+		})
+		It("can generate completions", func() {
+			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
+		})
+
+		It("can generate chat completions ", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
+		})
+
+		It("can generate completions from model configs", func() {
+			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
+		})
+
+		It("can generate chat completions from model configs", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
+		})
+
+		It("returns errors", func() {
+			_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
+			Expect(err).To(HaveOccurred())
+			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 5 errors occurred:"))
+		})
+
+	})
+
+	Context("Config file", func() {
+		BeforeEach(func() {
+			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
+			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true)
+			go app.Listen("127.0.0.1:9090")
+
+			defaultConfig := openai.DefaultConfig("")
+			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
+			client2 = openaigo.NewClient("")
+			client2.BaseURL = defaultConfig.BaseURL
+			// Wait for API to be ready
+			client = openai.NewClientWithConfig(defaultConfig)
+			Eventually(func() error {
+				_, err := client.ListModels(context.TODO())
+				return err
+			}, "2m").ShouldNot(HaveOccurred())
+		})
+		AfterEach(func() {
+			app.Shutdown()
+		})
+		It("can generate chat completions from config file", func() {
+
+			models, err := client.ListModels(context.TODO())
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(models.Models)).To(Equal(5))
+			Expect(models.Models[0].ID).To(Equal("testmodel"))
+		})
+		It("can generate chat completions from config file", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
+		})
+		It("can generate chat completions from config file", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
+		})
+		It("can generate edit completions from config file", func() {
+			request := openaigo.EditCreateRequestBody{
+				Model:       "list2",
+				Instruction: "foo",
+				Input:       "bar",
+			}
+			resp, err := client2.CreateEdit(context.Background(), request)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
+		})
+	})
+})
--- a/api/apt_suite_test.go
+++ b/api/apt_suite_test.go
@@ -0,0 +1,13 @@
+package api_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestLocalAI(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "LocalAI test suite")
+}
--- a/api/config.go
+++ b/api/config.go
@@ -0,0 +1,281 @@
+package api
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"strings"
+
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+	"gopkg.in/yaml.v3"
+)
+
+type Config struct {
+	OpenAIRequest  `yaml:"parameters"`
+	Name           string            `yaml:"name"`
+	StopWords      []string          `yaml:"stopwords"`
+	Cutstrings     []string          `yaml:"cutstrings"`
+	TrimSpace      []string          `yaml:"trimspace"`
+	ContextSize    int               `yaml:"context_size"`
+	F16            bool              `yaml:"f16"`
+	Threads        int               `yaml:"threads"`
+	Debug          bool              `yaml:"debug"`
+	Roles          map[string]string `yaml:"roles"`
+	Embeddings     bool              `yaml:"embeddings"`
+	Backend        string            `yaml:"backend"`
+	TemplateConfig TemplateConfig    `yaml:"template"`
+	MirostatETA    float64           `yaml:"mirostat_eta"`
+	MirostatTAU    float64           `yaml:"mirostat_tau"`
+	Mirostat       int               `yaml:"mirostat"`
+
+	PromptStrings, InputStrings []string
+}
+
+type TemplateConfig struct {
+	Completion string `yaml:"completion"`
+	Chat       string `yaml:"chat"`
+	Edit       string `yaml:"edit"`
+}
+
+type ConfigMerger map[string]Config
+
+func ReadConfigFile(file string) ([]*Config, error) {
+	c := &[]*Config{}
+	f, err := os.ReadFile(file)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read config file: %w", err)
+	}
+	if err := yaml.Unmarshal(f, c); err != nil {
+		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
+	}
+
+	return *c, nil
+}
+
+func ReadConfig(file string) (*Config, error) {
+	c := &Config{}
+	f, err := os.ReadFile(file)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read config file: %w", err)
+	}
+	if err := yaml.Unmarshal(f, c); err != nil {
+		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
+	}
+
+	return c, nil
+}
+
+func (cm ConfigMerger) LoadConfigFile(file string) error {
+	c, err := ReadConfigFile(file)
+	if err != nil {
+		return fmt.Errorf("cannot load config file: %w", err)
+	}
+
+	for _, cc := range c {
+		cm[cc.Name] = *cc
+	}
+	return nil
+}
+
+func (cm ConfigMerger) LoadConfig(file string) error {
+	c, err := ReadConfig(file)
+	if err != nil {
+		return fmt.Errorf("cannot read config file: %w", err)
+	}
+
+	cm[c.Name] = *c
+	return nil
+}
+
+func (cm ConfigMerger) LoadConfigs(path string) error {
+	files, err := ioutil.ReadDir(path)
+	if err != nil {
+		return err
+	}
+
+	for _, file := range files {
+		// Skip templates, YAML and .keep files
+		if !strings.Contains(file.Name(), ".yaml") {
+			continue
+		}
+		c, err := ReadConfig(filepath.Join(path, file.Name()))
+		if err == nil {
+			cm[c.Name] = *c
+		}
+	}
+
+	return nil
+}
+
+func updateConfig(config *Config, input *OpenAIRequest) {
+	if input.Echo {
+		config.Echo = input.Echo
+	}
+	if input.TopK != 0 {
+		config.TopK = input.TopK
+	}
+	if input.TopP != 0 {
+		config.TopP = input.TopP
+	}
+
+	if input.Temperature != 0 {
+		config.Temperature = input.Temperature
+	}
+
+	if input.Maxtokens != 0 {
+		config.Maxtokens = input.Maxtokens
+	}
+
+	switch stop := input.Stop.(type) {
+	case string:
+		if stop != "" {
+			config.StopWords = append(config.StopWords, stop)
+		}
+	case []interface{}:
+		for _, pp := range stop {
+			if s, ok := pp.(string); ok {
+				config.StopWords = append(config.StopWords, s)
+			}
+		}
+	}
+
+	if input.RepeatPenalty != 0 {
+		config.RepeatPenalty = input.RepeatPenalty
+	}
+
+	if input.Keep != 0 {
+		config.Keep = input.Keep
+	}
+
+	if input.Batch != 0 {
+		config.Batch = input.Batch
+	}
+
+	if input.F16 {
+		config.F16 = input.F16
+	}
+
+	if input.IgnoreEOS {
+		config.IgnoreEOS = input.IgnoreEOS
+	}
+
+	if input.Seed != 0 {
+		config.Seed = input.Seed
+	}
+
+	if input.Mirostat != 0 {
+		config.Mirostat = input.Mirostat
+	}
+
+	if input.MirostatETA != 0 {
+		config.MirostatETA = input.MirostatETA
+	}
+
+	if input.MirostatTAU != 0 {
+		config.MirostatTAU = input.MirostatTAU
+	}
+
+	switch inputs := input.Input.(type) {
+	case string:
+		if inputs != "" {
+			config.InputStrings = append(config.InputStrings, inputs)
+		}
+	case []interface{}:
+		for _, pp := range inputs {
+			if s, ok := pp.(string); ok {
+				config.InputStrings = append(config.InputStrings, s)
+			}
+		}
+	}
+
+	switch p := input.Prompt.(type) {
+	case string:
+		config.PromptStrings = append(config.PromptStrings, p)
+	case []interface{}:
+		for _, pp := range p {
+			if s, ok := pp.(string); ok {
+				config.PromptStrings = append(config.PromptStrings, s)
+			}
+		}
+	}
+}
+
+func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
+	input := new(OpenAIRequest)
+	// Get input data from the request body
+	if err := c.BodyParser(input); err != nil {
+		return nil, nil, err
+	}
+
+	modelFile := input.Model
+
+	if c.Params("model") != "" {
+		modelFile = c.Params("model")
+	}
+
+	received, _ := json.Marshal(input)
+
+	log.Debug().Msgf("Request received: %s", string(received))
+
+	// Set model from bearer token, if available
+	bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
+	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
+
+	// If no model was specified, take the first available
+	if modelFile == "" && !bearerExists {
+		models, _ := loader.ListModels()
+		if len(models) > 0 {
+			modelFile = models[0]
+			log.Debug().Msgf("No model specified, using: %s", modelFile)
+		} else {
+			log.Debug().Msgf("No model specified, returning error")
+			return nil, nil, fmt.Errorf("no model specified")
+		}
+	}
+
+	// If a model is found in bearer token takes precedence
+	if bearerExists {
+		log.Debug().Msgf("Using model from bearer token: %s", bearer)
+		modelFile = bearer
+	}
+
+	// Load a config file if present after the model name
+	modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
+	if _, err := os.Stat(modelConfig); err == nil {
+		if err := cm.LoadConfig(modelConfig); err != nil {
+			return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
+		}
+	}
+
+	var config *Config
+	cfg, exists := cm[modelFile]
+	if !exists {
+		config = &Config{
+			OpenAIRequest: defaultRequest(modelFile),
+			ContextSize:   ctx,
+			Threads:       threads,
+			F16:           f16,
+			Debug:         debug,
+		}
+	} else {
+		config = &cfg
+	}
+
+	// Set the parameters for the language model prediction
+	updateConfig(config, input)
+
+	// Don't allow 0 as setting
+	if config.Threads == 0 {
+		if threads != 0 {
+			config.Threads = threads
+		} else {
+			config.Threads = 4
+		}
+	}
+
+	return config, input, nil
+}
--- a/api/openai.go
+++ b/api/openai.go
@@ -0,0 +1,403 @@
+package api
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+	"github.com/valyala/fasthttp"
+)
+
+// APIError provides error information returned by the OpenAI API.
+type APIError struct {
+	Code    any     `json:"code,omitempty"`
+	Message string  `json:"message"`
+	Param   *string `json:"param,omitempty"`
+	Type    string  `json:"type"`
+}
+
+type ErrorResponse struct {
+	Error *APIError `json:"error,omitempty"`
+}
+
+type OpenAIUsage struct {
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
+}
+
+type Item struct {
+	Embedding []float32 `json:"embedding"`
+	Index     int       `json:"index"`
+	Object    string    `json:"object,omitempty"`
+}
+
+type OpenAIResponse struct {
+	Created int      `json:"created,omitempty"`
+	Object  string   `json:"object,omitempty"`
+	ID      string   `json:"id,omitempty"`
+	Model   string   `json:"model,omitempty"`
+	Choices []Choice `json:"choices,omitempty"`
+	Data    []Item   `json:"data,omitempty"`
+
+	Usage OpenAIUsage `json:"usage"`
+}
+
+type Choice struct {
+	Index        int      `json:"index,omitempty"`
+	FinishReason string   `json:"finish_reason,omitempty"`
+	Message      *Message `json:"message,omitempty"`
+	Delta        *Message `json:"delta,omitempty"`
+	Text         string   `json:"text,omitempty"`
+}
+
+type Message struct {
+	Role    string `json:"role,omitempty" yaml:"role"`
+	Content string `json:"content,omitempty" yaml:"content"`
+}
+
+type OpenAIModel struct {
+	ID     string `json:"id"`
+	Object string `json:"object"`
+}
+
+type OpenAIRequest struct {
+	Model string `json:"model" yaml:"model"`
+
+	// Prompt is read only by completion API calls
+	Prompt interface{} `json:"prompt" yaml:"prompt"`
+
+	// Edit endpoint
+	Instruction string      `json:"instruction" yaml:"instruction"`
+	Input       interface{} `json:"input" yaml:"input"`
+
+	Stop interface{} `json:"stop" yaml:"stop"`
+
+	// Messages is read only by chat/completion API calls
+	Messages []Message `json:"messages" yaml:"messages"`
+
+	Stream bool `json:"stream"`
+	Echo   bool `json:"echo"`
+	// Common options between all the API calls
+	TopP        float64 `json:"top_p" yaml:"top_p"`
+	TopK        int     `json:"top_k" yaml:"top_k"`
+	Temperature float64 `json:"temperature" yaml:"temperature"`
+	Maxtokens   int     `json:"max_tokens" yaml:"max_tokens"`
+
+	N int `json:"n"`
+
+	// Custom parameters - not present in the OpenAI API
+	Batch         int     `json:"batch" yaml:"batch"`
+	F16           bool    `json:"f16" yaml:"f16"`
+	IgnoreEOS     bool    `json:"ignore_eos" yaml:"ignore_eos"`
+	RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
+	Keep          int     `json:"n_keep" yaml:"n_keep"`
+
+	MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"`
+	MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"`
+	Mirostat    int     `json:"mirostat" yaml:"mirostat"`
+
+	Seed int `json:"seed" yaml:"seed"`
+}
+
+func defaultRequest(modelFile string) OpenAIRequest {
+	return OpenAIRequest{
+		TopP:        0.7,
+		TopK:        80,
+		Maxtokens:   512,
+		Temperature: 0.9,
+		Model:       modelFile,
+	}
+}
+
+// https://platform.openai.com/docs/api-reference/completions
+func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+
+		templateFile := config.Model
+
+		if config.TemplateConfig.Completion != "" {
+			templateFile = config.TemplateConfig.Completion
+		}
+
+		var result []Choice
+		for _, i := range config.PromptStrings {
+			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+			templatedInput, err := loader.TemplatePrefix(templateFile, struct {
+				Input string
+			}{Input: i})
+			if err == nil {
+				i = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", i)
+			}
+
+			r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
+				*c = append(*c, Choice{Text: s})
+			}, nil)
+			if err != nil {
+				return err
+			}
+
+			result = append(result, r...)
+		}
+
+		resp := &OpenAIResponse{
+			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Choices: result,
+			Object:  "text_completion",
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
+
+// https://platform.openai.com/docs/api-reference/embeddings
+func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+		items := []Item{}
+
+		for i, s := range config.InputStrings {
+
+			// get the model function to call for the result
+			embedFn, err := ModelEmbedding(s, loader, *config)
+			if err != nil {
+				return err
+			}
+
+			embeddings, err := embedFn()
+			if err != nil {
+				return err
+			}
+			items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
+		}
+
+		resp := &OpenAIResponse{
+			Model:  input.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Data:   items,
+			Object: "list",
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
+
+func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+
+	process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
+		ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
+			resp := OpenAIResponse{
+				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
+				Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
+				Object:  "chat.completion.chunk",
+			}
+			log.Debug().Msgf("Sending goroutine: %s", s)
+
+			responses <- resp
+			return true
+		})
+		close(responses)
+	}
+	return func(c *fiber.Ctx) error {
+		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+
+		var predInput string
+
+		mess := []string{}
+		for _, i := range input.Messages {
+			r := config.Roles[i.Role]
+			if r == "" {
+				r = i.Role
+			}
+
+			content := fmt.Sprint(r, " ", i.Content)
+			mess = append(mess, content)
+		}
+
+		predInput = strings.Join(mess, "\n")
+
+		if input.Stream {
+			log.Debug().Msgf("Stream request received")
+			c.Context().SetContentType("text/event-stream")
+			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
+			//	c.Set("Content-Type", "text/event-stream")
+			c.Set("Cache-Control", "no-cache")
+			c.Set("Connection", "keep-alive")
+			c.Set("Transfer-Encoding", "chunked")
+		}
+
+		templateFile := config.Model
+
+		if config.TemplateConfig.Chat != "" {
+			templateFile = config.TemplateConfig.Chat
+		}
+
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+		templatedInput, err := loader.TemplatePrefix(templateFile, struct {
+			Input string
+		}{Input: predInput})
+		if err == nil {
+			predInput = templatedInput
+			log.Debug().Msgf("Template found, input modified to: %s", predInput)
+		}
+
+		if input.Stream {
+			responses := make(chan OpenAIResponse)
+
+			go process(predInput, input, config, loader, responses)
+
+			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
+
+				for ev := range responses {
+					var buf bytes.Buffer
+					enc := json.NewEncoder(&buf)
+					enc.Encode(ev)
+
+					fmt.Fprintf(w, "event: data\n\n")
+					fmt.Fprintf(w, "data: %v\n\n", buf.String())
+					log.Debug().Msgf("Sending chunk: %s", buf.String())
+					w.Flush()
+				}
+
+				w.WriteString("event: data\n\n")
+				resp := &OpenAIResponse{
+					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+					Choices: []Choice{{FinishReason: "stop"}},
+				}
+				respData, _ := json.Marshal(resp)
+
+				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+				w.Flush()
+			}))
+			return nil
+		}
+
+		result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
+			*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
+		}, nil)
+		if err != nil {
+			return err
+		}
+
+		resp := &OpenAIResponse{
+			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Choices: result,
+			Object:  "chat.completion",
+		}
+		respData, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", respData)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
+
+func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+
+		templateFile := config.Model
+
+		if config.TemplateConfig.Edit != "" {
+			templateFile = config.TemplateConfig.Edit
+		}
+
+		var result []Choice
+		for _, i := range config.InputStrings {
+			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+			templatedInput, err := loader.TemplatePrefix(templateFile, struct {
+				Input       string
+				Instruction string
+			}{Input: i})
+			if err == nil {
+				i = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", i)
+			}
+
+			r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
+				*c = append(*c, Choice{Text: s})
+			}, nil)
+			if err != nil {
+				return err
+			}
+
+			result = append(result, r...)
+		}
+
+		resp := &OpenAIResponse{
+			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Choices: result,
+			Object:  "edit",
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
+
+func listModels(loader *model.ModelLoader, cm ConfigMerger) func(ctx *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		models, err := loader.ListModels()
+		if err != nil {
+			return err
+		}
+		var mm map[string]interface{} = map[string]interface{}{}
+
+		dataModels := []OpenAIModel{}
+		for _, m := range models {
+			mm[m] = nil
+			dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
+		}
+
+		for k := range cm {
+			if _, exists := mm[k]; !exists {
+				dataModels = append(dataModels, OpenAIModel{ID: k, Object: "model"})
+			}
+		}
+
+		return c.JSON(struct {
+			Object string        `json:"object"`
+			Data   []OpenAIModel `json:"data"`
+		}{
+			Object: "list",
+			Data:   dataModels,
+		})
+	}
+}
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -0,0 +1,358 @@
+package api
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+	"sync"
+
+	"github.com/donomii/go-rwkv.cpp"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	gpt2 "github.com/go-skynet/go-gpt2.cpp"
+	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
+	llama "github.com/go-skynet/go-llama.cpp"
+)
+
+// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
+var mutexMap sync.Mutex
+var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
+
+func defaultLLamaOpts(c Config) []llama.ModelOption {
+	llamaOpts := []llama.ModelOption{}
+	if c.ContextSize != 0 {
+		llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize))
+	}
+	if c.F16 {
+		llamaOpts = append(llamaOpts, llama.EnableF16Memory)
+	}
+	if c.Embeddings {
+		llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
+	}
+
+	return llamaOpts
+}
+
+func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
+	if !c.Embeddings {
+		return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
+	}
+
+	modelFile := c.Model
+
+	llamaOpts := defaultLLamaOpts(c)
+
+	var inferenceModel interface{}
+	var err error
+	if c.Backend == "" {
+		inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
+	} else {
+		inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
+	}
+	if err != nil {
+		return nil, err
+	}
+
+	var fn func() ([]float32, error)
+	switch model := inferenceModel.(type) {
+	case *llama.LLama:
+		fn = func() ([]float32, error) {
+			predictOptions := buildLLamaPredictOptions(c)
+			return model.Embeddings(s, predictOptions...)
+		}
+	default:
+		fn = func() ([]float32, error) {
+			return nil, fmt.Errorf("embeddings not supported by the backend")
+		}
+	}
+
+	return func() ([]float32, error) {
+		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
+		mutexMap.Lock()
+		l, ok := mutexes[modelFile]
+		if !ok {
+			m := &sync.Mutex{}
+			mutexes[modelFile] = m
+			l = m
+		}
+		mutexMap.Unlock()
+		l.Lock()
+		defer l.Unlock()
+
+		embeds, err := fn()
+		if err != nil {
+			return embeds, err
+		}
+		// Remove trailing 0s
+		for i := len(embeds) - 1; i >= 0; i-- {
+			if embeds[i] == 0.0 {
+				embeds = embeds[:i]
+			} else {
+				break
+			}
+		}
+		return embeds, nil
+	}, nil
+}
+
+func buildLLamaPredictOptions(c Config) []llama.PredictOption {
+	// Generate the prediction using the language model
+	predictOptions := []llama.PredictOption{
+		llama.SetTemperature(c.Temperature),
+		llama.SetTopP(c.TopP),
+		llama.SetTopK(c.TopK),
+		llama.SetTokens(c.Maxtokens),
+		llama.SetThreads(c.Threads),
+	}
+
+	if c.Mirostat != 0 {
+		predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
+	}
+
+	if c.MirostatETA != 0 {
+		predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA))
+	}
+
+	if c.MirostatTAU != 0 {
+		predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU))
+	}
+
+	if c.Debug {
+		predictOptions = append(predictOptions, llama.Debug)
+	}
+
+	predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
+
+	if c.RepeatPenalty != 0 {
+		predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
+	}
+
+	if c.Keep != 0 {
+		predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
+	}
+
+	if c.Batch != 0 {
+		predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
+	}
+
+	if c.F16 {
+		predictOptions = append(predictOptions, llama.EnableF16KV)
+	}
+
+	if c.IgnoreEOS {
+		predictOptions = append(predictOptions, llama.IgnoreEOS)
+	}
+
+	if c.Seed != 0 {
+		predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
+	}
+
+	return predictOptions
+}
+
+func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
+	supportStreams := false
+	modelFile := c.Model
+
+	llamaOpts := defaultLLamaOpts(c)
+
+	var inferenceModel interface{}
+	var err error
+	if c.Backend == "" {
+		inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
+	} else {
+		inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
+	}
+	if err != nil {
+		return nil, err
+	}
+
+	var fn func() (string, error)
+
+	switch model := inferenceModel.(type) {
+	case *rwkv.RwkvState:
+		supportStreams = true
+
+		fn = func() (string, error) {
+			stopWord := "\n"
+			if len(c.StopWords) > 0 {
+				stopWord = c.StopWords[0]
+			}
+
+			if err := model.ProcessInput(s); err != nil {
+				return "", err
+			}
+
+			response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
+
+			return response, nil
+		}
+	case *gpt2.StableLM:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *gpt2.GPT2:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *gptj.GPTJ:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gptj.PredictOption{
+				gptj.SetTemperature(c.Temperature),
+				gptj.SetTopP(c.TopP),
+				gptj.SetTopK(c.TopK),
+				gptj.SetTokens(c.Maxtokens),
+				gptj.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gptj.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gptj.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *llama.LLama:
+		supportStreams = true
+		fn = func() (string, error) {
+
+			if tokenCallback != nil {
+				model.SetTokenCallback(tokenCallback)
+			}
+
+			predictOptions := buildLLamaPredictOptions(c)
+
+			str, er := model.Predict(
+				s,
+				predictOptions...,
+			)
+			// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
+			// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
+			// after a stream event has occurred
+			model.SetTokenCallback(nil)
+			return str, er
+		}
+	}
+
+	return func() (string, error) {
+		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
+		mutexMap.Lock()
+		l, ok := mutexes[modelFile]
+		if !ok {
+			m := &sync.Mutex{}
+			mutexes[modelFile] = m
+			l = m
+		}
+		mutexMap.Unlock()
+		l.Lock()
+		defer l.Unlock()
+
+		res, err := fn()
+		if tokenCallback != nil && !supportStreams {
+			tokenCallback(res)
+		}
+		return res, err
+	}, nil
+}
+
+func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) {
+	result := []Choice{}
+
+	n := input.N
+
+	if input.N == 0 {
+		n = 1
+	}
+
+	// get the model function to call for the result
+	predFunc, err := ModelInference(predInput, loader, *config, tokenCallback)
+	if err != nil {
+		return result, err
+	}
+
+	for i := 0; i < n; i++ {
+		prediction, err := predFunc()
+		if err != nil {
+			return result, err
+		}
+
+		prediction = Finetune(*config, predInput, prediction)
+		cb(prediction, &result)
+
+		//result = append(result, Choice{Text: prediction})
+
+	}
+	return result, err
+}
+
+var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
+var mu sync.Mutex = sync.Mutex{}
+
+func Finetune(config Config, input, prediction string) string {
+	if config.Echo {
+		prediction = input + prediction
+	}
+
+	for _, c := range config.Cutstrings {
+		mu.Lock()
+		reg, ok := cutstrings[c]
+		if !ok {
+			cutstrings[c] = regexp.MustCompile(c)
+			reg = cutstrings[c]
+		}
+		mu.Unlock()
+		prediction = reg.ReplaceAllString(prediction, "")
+	}
+
+	for _, c := range config.TrimSpace {
+		prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
+	}
+	return prediction
+
+}
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -5,15 +5,11 @@ services:
    image: quay.io/go-skynet/local-ai:latest
    build:
      context: .
-      dockerfile: Dockerfile
-      # args:
-        # BUILD_TYPE: generic # Uncomment to build CPU generic code that works on most HW
+      dockerfile: Dockerfile.dev
    ports:
      - 8080:8080
-    environment:
-      - MODELS_PATH=$MODELS_PATH
-      - CONTEXT_SIZE=$CONTEXT_SIZE
-      - THREADS=$THREADS
-      - DEBUG=$DEBUG
+    env_file:
+      - .env
    volumes:
-      - ./models:/models:cached
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+cd /build
+
+make build
+
+./local-ai "$@"
--- a/examples/README.md
+++ b/examples/README.md
@@ -0,0 +1,78 @@
+# Examples
+
+Here is a list of projects that can easily be integrated with the LocalAI backend. 
+
+### Projects
+
+
+### Chatbot-UI
+
+_by [@mkellerman](https://github.com/mkellerman)_
+
+![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
+
+This integration shows how to use LocalAI with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/)
+
+### Discord bot
+
+_by [@mudler](https://github.com/mudler)_
+
+Run a discord bot which lets you talk directly with a model
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/), or for a live demo you can talk with our bot in #random-bot in our discord server.
+
+### Langchain
+
+_by [@dave-gray101](https://github.com/dave-gray101)_
+
+A ready to use example to show e2e how to integrate LocalAI with langchain
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/)
+
+### Langchain Python
+
+_by [@mudler](https://github.com/mudler)_
+
+A ready to use example to show e2e how to integrate LocalAI with langchain
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/)
+
+### LocalAI WebUI
+
+_by [@dhruvgera](https://github.com/dhruvgera)_
+
+![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
+
+A light, community-maintained web interface for LocalAI
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/localai-webui/)
+
+### How to run rwkv models
+
+_by [@mudler](https://github.com/mudler)_
+
+A full example on how to run RWKV models with LocalAI
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/)
+
+### Slack bot
+
+_by [@mudler](https://github.com/mudler)_
+
+Run a slack bot which lets you talk directly with a model
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/)
+
+### Question answering on documents
+
+_by [@mudler](https://github.com/mudler)_
+
+Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)
+
+## Want to contribute?
+
+Create an issue, and put `Example: <description>` in the title! We will post your examples here.
--- a/examples/chatbot-ui/README.md
+++ b/examples/chatbot-ui/README.md
@@ -0,0 +1,46 @@
+# chatbot-ui
+
+Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
+
+![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
+
+## Setup
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/chatbot-ui
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+## Pointing chatbot-ui to a separately managed LocalAI service
+
+If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
+```
+version: '3.6'
+
+services:
+  chatgpt:
+    image: ghcr.io/mckaywrigley/chatbot-ui:main
+    ports:
+      - 3000:3000
+    environment:
+      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+      - 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
+```
+
+Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`.
+
+## Accessing chatbot-ui
+
+Open http://localhost:3000 for the Web UI.
+
--- a/examples/chatbot-ui/docker-compose.yaml
+++ b/examples/chatbot-ui/docker-compose.yaml
@@ -0,0 +1,24 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
+
+  chatgpt:
+    image: ghcr.io/mckaywrigley/chatbot-ui:main
+    ports:
+      - 3000:3000
+    environment:
+      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+      - 'OPENAI_API_HOST=http://api:8080'
--- a/examples/chatbot-ui/models/completion.tmpl
+++ b/examples/chatbot-ui/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
--- a/examples/chatbot-ui/models/gpt-3.5-turbo.yaml
+++ b/examples/chatbot-ui/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,17 @@
+name: gpt-3.5-turbo
+parameters:
+  model: ggml-gpt4all-j
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+threads: 14
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+template:
+  completion: completion
+  chat: gpt4all
--- a/examples/chatbot-ui/models/gpt4all.tmpl
+++ b/examples/chatbot-ui/models/gpt4all.tmpl
@@ -0,0 +1,4 @@
+The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+### Prompt:
+{{.Input}}
+### Response:
--- a/examples/discord-bot/.env.example
+++ b/examples/discord-bot/.env.example
@@ -0,0 +1,6 @@
+OPENAI_API_KEY=x
+DISCORD_BOT_TOKEN=x
+DISCORD_CLIENT_ID=x
+OPENAI_API_BASE=http://api:8080
+ALLOWED_SERVER_IDS=x
+SERVER_TO_MODERATION_CHANNEL=1:1
--- a/examples/discord-bot/README.md
+++ b/examples/discord-bot/README.md
@@ -0,0 +1,76 @@
+# discord-bot
+
+![Screenshot from 2023-05-01 07-58-19](https://user-images.githubusercontent.com/2420543/235413924-0cb2e75b-f2d6-4119-8610-44386e44afb8.png)
+
+## Setup
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/discord-bot
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# Set the discord bot options (see: https://github.com/go-skynet/gpt-discord-bot#setup)
+cp -rfv .env.example .env
+vim .env
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+Note: see setup options here: https://github.com/go-skynet/gpt-discord-bot#setup
+
+Open up the URL in the console and give permission to the bot in your server. Start a thread with `/chat ..`
+
+## Kubernetes
+
+- install the local-ai chart first
+- change OPENAI_API_BASE to point to the API address and apply the discord-bot manifest:
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: discord-bot
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: localai
+  namespace: discord-bot
+  labels:
+    app: localai
+spec:
+  selector:
+    matchLabels:
+      app: localai
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: localai
+      name: localai
+    spec:
+      containers:
+        - name: localai-discord
+          env:
+          - name: OPENAI_API_KEY
+            value: "x"
+          - name: DISCORD_BOT_TOKEN
+            value: ""
+          - name: DISCORD_CLIENT_ID
+            value: ""
+          - name: OPENAI_API_BASE
+            value: "http://local-ai.default.svc.cluster.local:8080"
+          - name: ALLOWED_SERVER_IDS
+            value: "xx"
+          - name: SERVER_TO_MODERATION_CHANNEL
+            value: "1:1"
+          image: quay.io/go-skynet/gpt-discord-bot:main
+```
--- a/examples/discord-bot/docker-compose.yaml
+++ b/examples/discord-bot/docker-compose.yaml
@@ -0,0 +1,21 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
+
+  bot:
+    image: quay.io/go-skynet/gpt-discord-bot:main
+    env_file:
+    - .env
--- a/examples/discord-bot/models
+++ b/examples/discord-bot/models
@@ -0,0 +1 @@
+../chatbot-ui/models/
--- a/examples/langchain-agent/agent.py
+++ b/examples/langchain-agent/agent.py
@@ -0,0 +1,47 @@
+## Loosely based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6
+
+from io import StringIO
+import sys
+import os
+from typing import Dict, Optional
+
+from langchain.agents import load_tools
+from langchain.agents import initialize_agent
+from langchain.agents.tools import Tool
+from langchain.llms import OpenAI
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
+model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
+
+class PythonREPL:
+    """Simulates a standalone Python REPL."""
+
+    def __init__(self):
+        pass        
+
+    def run(self, command: str) -> str:
+        """Run command and returns anything printed."""
+        # sys.stderr.write("EXECUTING PYTHON CODE:\n---\n" + command + "\n---\n")
+        old_stdout = sys.stdout
+        sys.stdout = mystdout = StringIO()
+        try:
+            exec(command, globals())
+            sys.stdout = old_stdout
+            output = mystdout.getvalue()
+        except Exception as e:
+            sys.stdout = old_stdout
+            output = str(e)
+        # sys.stderr.write("PYTHON OUTPUT: \"" + output + "\"\n")
+        return output
+      
+llm = OpenAI(temperature=0.0, openai_api_base=base_path, model_name=model_name)
+python_repl = Tool(
+        "Python REPL",
+        PythonREPL().run,
+        """A Python shell. Use this to execute python commands. Input should be a valid python command.
+        If you expect output it should be printed out.""",
+    )
+tools = [python_repl]
+agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
+agent.run("What is the 10th fibonacci number?")
+
--- a/examples/langchain-python/README.md
+++ b/examples/langchain-python/README.md
@@ -0,0 +1,33 @@
+## Langchain-python
+
+Langchain example from [quickstart](https://python.langchain.com/en/latest/getting_started/getting_started.html).
+
+To interact with langchain, you can just set the `OPENAI_API_BASE` URL and provide a token with a random string.
+
+See the example below:
+
+```
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/langchain-python
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# start with docker-compose
+docker-compose up -d --build
+
+
+pip install langchain
+pip install openai
+
+export OPENAI_API_BASE=http://localhost:8080
+export OPENAI_API_KEY=sk-
+
+python test.py
+# A good company name for a company that makes colorful socks would be "Colorsocks".
+```
--- a/examples/langchain-python/docker-compose.yaml
+++ b/examples/langchain-python/docker-compose.yaml
@@ -0,0 +1,16 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
--- a/examples/langchain-python/models
+++ b/examples/langchain-python/models
@@ -0,0 +1 @@
+../chatbot-ui/models
--- a/examples/langchain-python/test.py
+++ b/examples/langchain-python/test.py
@@ -0,0 +1,6 @@
+
+from langchain.llms import OpenAI
+
+llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
+text = "What would be a good company name for a company that makes colorful socks?"
+print(llm(text))
--- a/examples/langchain/.gitignore
+++ b/examples/langchain/.gitignore
@@ -0,0 +1,2 @@
+models/ggml-koala-13B-4bit-128g
+models/ggml-gpt4all-j
--- a/examples/langchain/JS.Dockerfile
+++ b/examples/langchain/JS.Dockerfile
@@ -0,0 +1,6 @@
+FROM node:latest
+COPY ./langchainjs-localai-example /app
+WORKDIR /app
+RUN npm install
+RUN npm run build
+ENTRYPOINT [ "npm", "run", "start" ]
--- a/examples/langchain/PY.Dockerfile
+++ b/examples/langchain/PY.Dockerfile
@@ -0,0 +1,5 @@
+FROM python:3.10-bullseye
+COPY ./langchainpy-localai-example /app
+WORKDIR /app
+RUN pip install --no-cache-dir -r requirements.txt
+ENTRYPOINT [ "python", "./full_demo.py" ];
--- a/examples/langchain/README.md
+++ b/examples/langchain/README.md
@@ -0,0 +1,30 @@
+# langchain
+
+Example of using langchain, with the standard OpenAI llm module, and LocalAI. Has docker compose profiles for both the Typescript and Python versions.
+
+**Please Note** - This is a tech demo example at this time. ggml-gpt4all-j has pretty terrible results for most langchain applications with the settings used in this example.
+
+## Setup
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/langchain
+
+# (optional) - Edit the example code in typescript.
+# vi ./langchainjs-localai-example/index.ts
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# start with docker-compose for typescript!
+docker-compose --profile ts up --build
+
+# or start with docker-compose for python!
+docker-compose --profile py up --build
+```
+
+## Copyright
+
+Some of the example code in index.mts and full_demo.py is adapted from the langchainjs project and is Copyright (c) Harrison Chase. Used under the terms of the MIT license, as is the remainder of this code.
--- a/examples/langchain/docker-compose.yaml
+++ b/examples/langchain/docker-compose.yaml
@@ -0,0 +1,43 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
+
+  js:
+    build:
+      context: .
+      dockerfile: JS.Dockerfile
+    profiles:
+      - js
+      - ts
+    depends_on:
+    - "api"
+    environment:
+      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+      - 'OPENAI_API_BASE=http://api:8080/v1'
+      - 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
+
+  py:
+    build:
+      context: .
+      dockerfile: PY.Dockerfile
+    profiles:
+      - py
+    depends_on:
+    - "api"
+    environment:
+      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+      - 'OPENAI_API_BASE=http://api:8080/v1'
+      - 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
--- a/examples/langchain/langchainjs-localai-example/.gitignore
+++ b/examples/langchain/langchainjs-localai-example/.gitignore
@@ -0,0 +1,2 @@
+node_modules/
+dist/
--- a/examples/langchain/langchainjs-localai-example/.vscode/launch.json
+++ b/examples/langchain/langchainjs-localai-example/.vscode/launch.json
@@ -0,0 +1,20 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "type": "node",
+            "request": "launch",
+            "name": "Launch Program",
+            // "skipFiles": [
+            //     "<node_internals>/**"
+            // ],
+            "program": "${workspaceFolder}\\dist\\index.mjs",
+            "outFiles": [
+                "${workspaceFolder}/**/*.js"
+            ]
+        }
+    ]
+}
--- a/examples/langchain/langchainjs-localai-example/package-lock.json
+++ b/examples/langchain/langchainjs-localai-example/package-lock.json
--- a/examples/langchain/langchainjs-localai-example/package.json
+++ b/examples/langchain/langchainjs-localai-example/package.json
@@ -0,0 +1,21 @@
+{
+  "name": "langchainjs-localai-example",
+  "version": "0.1.0",
+  "description": "Trivial Example of using langchain + the OpenAI API + LocalAI together",
+  "main": "index.mjs",
+  "scripts": {
+    "build": "tsc --build",
+    "clean": "tsc --build --clean",
+    "start": "node --trace-warnings dist/index.mjs"
+  },
+  "author": "dave@gray101.com",
+  "license": "MIT",
+  "devDependencies": {
+    "@types/node": "^18.16.4",
+    "typescript": "^5.0.4"
+  },
+  "dependencies": {
+    "langchain": "^0.0.67",
+    "typeorm": "^0.3.15"
+  }
+}
--- a/examples/langchain/langchainjs-localai-example/src/index.mts
+++ b/examples/langchain/langchainjs-localai-example/src/index.mts
@@ -0,0 +1,79 @@
+import { OpenAIChat } from "langchain/llms/openai";
+import { loadQAStuffChain } from "langchain/chains";
+import { Document } from "langchain/document";
+import { initializeAgentExecutorWithOptions } from "langchain/agents";
+import {Calculator} from "langchain/tools/calculator";
+
+const pathToLocalAi = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1';
+const fakeApiKey = process.env['OPENAI_API_KEY'] || '-';
+const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo';
+
+function getModel(): OpenAIChat {
+  return new OpenAIChat({
+    prefixMessages: [
+      {
+        role: "system",
+        content: "You are a helpful assistant that answers in pirate language",
+      },
+    ],
+    modelName: modelName,
+    maxTokens: 50,
+    openAIApiKey: fakeApiKey,
+    maxRetries: 2
+  }, {
+    basePath: pathToLocalAi,
+    apiKey: fakeApiKey,
+  });
+}
+
+// Minimal example.
+export const run = async () => {
+  const model = getModel();
+  console.log(`about to model.call at ${new Date().toUTCString()}`);
+  const res = await model.call(
+    "What would be a good company name a company that makes colorful socks?"
+  );
+  console.log(`${new Date().toUTCString()}`);
+  console.log({ res });
+};
+
+await run();
+
+// This example uses the `StuffDocumentsChain`
+export const run2 = async () => {
+  const model = getModel();
+  const chainA = loadQAStuffChain(model);
+  const docs = [
+    new Document({ pageContent: "Harrison went to Harvard." }),
+    new Document({ pageContent: "Ankush went to Princeton." }),
+  ];
+  const resA = await chainA.call({
+    input_documents: docs,
+    question: "Where did Harrison go to college?",
+  });
+  console.log({ resA });
+};
+
+await run2();
+
+// Quickly thrown together example of using tools + agents.
+// This seems like it should work, but it doesn't yet.
+export const temporarilyBrokenToolTest = async () => {
+  const model = getModel();
+
+  const executor = await initializeAgentExecutorWithOptions([new Calculator(true)], model, {
+    agentType: "zero-shot-react-description",
+  });
+
+  console.log("Loaded agent.");
+
+  const input = `What is the value of (500 *2) + 350 - 13?`;
+
+  console.log(`Executing with input "${input}"...`);
+
+  const result = await executor.call({ input });
+
+  console.log(`Got output ${result.output}`);
+}
+
+await temporarilyBrokenToolTest();
--- a/examples/langchain/langchainjs-localai-example/tsconfig.json
+++ b/examples/langchain/langchainjs-localai-example/tsconfig.json
@@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "target": "es2022",
+    "lib": ["ES2022", "DOM"],
+    "module": "ES2022",
+    "moduleResolution": "node",
+    "strict": true,
+    "esModuleInterop": true,
+    "allowSyntheticDefaultImports": true,
+    "isolatedModules": true,
+    "outDir": "./dist"
+  },
+  "include": ["src", "test"],
+  "exclude": ["node_modules", "dist"]
+}
--- a/examples/langchain/langchainpy-localai-example/.vscode/launch.json
+++ b/examples/langchain/langchainpy-localai-example/.vscode/launch.json
@@ -0,0 +1,24 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "redirectOutput": true,
+            "justMyCode": false
+        },
+        {
+            "name": "Python: Attach to Port 5678",
+            "type": "python",
+            "request": "attach",
+            "connect": {
+                "host": "localhost",
+                "port": 5678
+              },
+            "justMyCode": false
+        }
+    ]
+}
--- a/examples/langchain/langchainpy-localai-example/.vscode/settings.json
+++ b/examples/langchain/langchainpy-localai-example/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python"
+}
--- a/examples/langchain/langchainpy-localai-example/full_demo.py
+++ b/examples/langchain/langchainpy-localai-example/full_demo.py
@@ -0,0 +1,46 @@
+import os
+import logging
+
+from langchain.chat_models import ChatOpenAI
+from langchain import PromptTemplate, LLMChain
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    AIMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+from langchain.schema import (
+    AIMessage,
+    HumanMessage,
+    SystemMessage
+)
+
+# This logging incantation makes it easy to see that you're actually reaching your LocalAI instance rather than OpenAI.
+logging.basicConfig(level=logging.DEBUG)
+
+print('Langchain + LocalAI PYTHON Tests')
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
+key = os.environ.get('OPENAI_API_KEY', '-')
+model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
+
+
+chat = ChatOpenAI(temperature=0, openai_api_base=base_path, openai_api_key=key, model_name=model_name, max_tokens=100)
+
+print("Created ChatOpenAI for ", chat.model_name)
+
+template = "You are a helpful assistant that translates {input_language} to {output_language}. The next message will be a sentence in {input_language}. Respond ONLY with the translation in {output_language}. Do not respond in {input_language}!"
+system_message_prompt = SystemMessagePromptTemplate.from_template(template)
+human_template = "{text}"
+human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
+
+chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
+
+print("ABOUT to execute")
+
+# get a chat completion from the formatted messages
+response = chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
+
+print(response)
+
+print(".");
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -0,0 +1,32 @@
+aiohttp==3.8.4
+aiosignal==1.3.1
+async-timeout==4.0.2
+attrs==23.1.0
+certifi==2022.12.7
+charset-normalizer==3.1.0
+colorama==0.4.6
+dataclasses-json==0.5.7
+debugpy==1.6.7
+frozenlist==1.3.3
+greenlet==2.0.2
+idna==3.4
+langchain==0.0.159
+marshmallow==3.19.0
+marshmallow-enum==1.5.1
+multidict==6.0.4
+mypy-extensions==1.0.0
+numexpr==2.8.4
+numpy==1.24.3
+openai==0.27.6
+openapi-schema-pydantic==1.2.4
+packaging==23.1
+pydantic==1.10.7
+PyYAML==6.0
+requests==2.29.0
+SQLAlchemy==2.0.12
+tenacity==8.2.2
+tqdm==4.65.0
+typing-inspect==0.8.0
+typing_extensions==4.5.0
+urllib3==1.26.15
+yarl==1.9.2
--- a/examples/langchain/langchainpy-localai-example/simple_demo.py
+++ b/examples/langchain/langchainpy-localai-example/simple_demo.py
@@ -0,0 +1,6 @@
+
+from langchain.llms import OpenAI
+
+llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
+text = "What would be a good company name for a company that makes colorful socks?"
+print(llm(text))
--- a/examples/langchain/models/completion.tmpl
+++ b/examples/langchain/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
--- a/examples/langchain/models/gpt-3.5-turbo.yaml
+++ b/examples/langchain/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,18 @@
+name: gpt-3.5-turbo
+parameters:
+  model: ggml-gpt4all-j # ggml-koala-13B-4bit-128g
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+threads: 4
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+backend: "gptj"
+template:
+  completion: completion
+  chat: gpt4all
--- a/examples/langchain/models/gpt4all.tmpl
+++ b/examples/langchain/models/gpt4all.tmpl
@@ -0,0 +1,4 @@
+The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+### Prompt:
+{{.Input}}
+### Response:
--- a/examples/localai-webui/README.md
+++ b/examples/localai-webui/README.md
@@ -0,0 +1,26 @@
+# localai-webui
+
+Example of integration with [dhruvgera/localai-frontend](https://github.com/Dhruvgera/LocalAI-frontend).
+
+![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
+
+## Setup
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/localai-webui
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download any desired models to models/ in the parent LocalAI project dir
+# For example: wget https://gpt4all.io/models/ggml-gpt4all-j.bin
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+Open http://localhost:3000 for the Web UI.
+
--- a/examples/localai-webui/docker-compose.yml
+++ b/examples/localai-webui/docker-compose.yml
@@ -0,0 +1,20 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - 8080:8080
+    env_file:
+      - .env
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai"]
+
+  frontend:
+    image: quay.io/go-skynet/localai-frontend:master
+    ports:
+      - 3000:3000
--- a/examples/query_data/.gitignore
+++ b/examples/query_data/.gitignore
@@ -0,0 +1 @@
+storage/
--- a/examples/query_data/README.md
+++ b/examples/query_data/README.md
@@ -0,0 +1,49 @@
+# Data query example
+
+This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
+
+It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
+
+## Requirements
+
+For this in order to work, you will need a model compatible with the `llama.cpp` backend. This is will not work with gpt4all.
+
+The example uses `WizardLM`. Edit the config files in `models/` accordingly to specify the model you use (change `HERE`).
+
+You will also need a training data set. Copy that over `data`.
+
+## Setup
+
+Start the API:
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/query_data
+
+# Copy your models, edit config files accordingly
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+### Create a storage:
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+python store.py
+```
+
+After it finishes, a directory "storage" will be created with the vector index database.
+
+## Query
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+python query.py
+```
--- a/examples/query_data/data/.keep
+++ b/examples/query_data/data/.keep
--- a/examples/query_data/docker-compose.yml
+++ b/examples/query_data/docker-compose.yml
@@ -0,0 +1,15 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - 8080:8080
+    env_file:
+      - .env
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai"]
--- a/examples/query_data/models/completion.tmpl
+++ b/examples/query_data/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
--- a/examples/query_data/models/embeddings.yaml
+++ b/examples/query_data/models/embeddings.yaml
@@ -0,0 +1,18 @@
+name: text-embedding-ada-002
+parameters:
+  model: HERE
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+threads: 14
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+embeddings: true
+template:
+  completion: completion
+  chat: gpt4all
--- a/examples/query_data/models/gpt-3.5-turbo.yaml
+++ b/examples/query_data/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,18 @@
+name: gpt-3.5-turbo
+parameters:
+  model: HERE
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+threads: 14
+embeddings: true
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+template:
+  completion: completion
+  chat: wizardlm
--- a/examples/query_data/models/wizardlm.tmpl
+++ b/examples/query_data/models/wizardlm.tmpl
@@ -0,0 +1,3 @@
+{{.Input}}
+
+### Response:
--- a/examples/query_data/query.py
+++ b/examples/query_data/query.py
@@ -0,0 +1,33 @@
+import os
+
+# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
+# os.environ['OPENAI_API_KEY']= ""
+
+from llama_index import   LLMPredictor, PromptHelper, ServiceContext
+from langchain.llms.openai import OpenAI
+from llama_index import StorageContext, load_index_from_storage
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
+
+# This example uses text-davinci-003 by default; feel free to change if desired
+llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_base=base_path))
+
+# Configure prompt parameters and initialise helper
+max_input_size = 1024
+num_output = 256
+max_chunk_overlap = 20
+
+prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
+
+# Load documents from the 'data' directory
+service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
+
+# rebuild storage context
+storage_context = StorageContext.from_defaults(persist_dir='./storage')
+
+# load index
+index = load_index_from_storage(storage_context,     service_context=service_context,    )
+
+query_engine = index.as_query_engine()
+response = query_engine.query("XXXXXX your question here XXXXX")
+print(response)
--- a/examples/query_data/store.py
+++ b/examples/query_data/store.py
@@ -0,0 +1,27 @@
+import os
+
+# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
+# os.environ['OPENAI_API_KEY']= ""
+
+from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext
+from langchain.llms.openai import OpenAI
+from llama_index import StorageContext, load_index_from_storage
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
+
+# This example uses text-davinci-003 by default; feel free to change if desired
+llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
+
+# Configure prompt parameters and initialise helper
+max_input_size = 256
+num_output = 256
+max_chunk_overlap = 10
+
+prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
+
+# Load documents from the 'data' directory
+documents = SimpleDirectoryReader('data').load_data()
+service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 257)
+index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
+index.storage_context.persist(persist_dir="./storage")
+
--- a/examples/rwkv/Dockerfile.build
+++ b/examples/rwkv/Dockerfile.build
@@ -0,0 +1,10 @@
+FROM python
+
+# convert the model (one-off)
+RUN pip3 install torch numpy
+
+WORKDIR /build
+COPY ./scripts/ .
+
+RUN git clone --recurse-submodules https://github.com/saharNooby/rwkv.cpp && cd rwkv.cpp && cmake . && cmake --build . --config Release
+ENTRYPOINT [ "/build/build.sh" ]
--- a/examples/rwkv/README.md
+++ b/examples/rwkv/README.md
@@ -0,0 +1,59 @@
+# rwkv
+
+Example of how to run rwkv models.
+
+## Run models
+
+Setup:
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/rwkv
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# build the tooling image to convert an rwkv model locally:
+docker build -t rwkv-converter -f Dockerfile.build .
+
+# download and convert a model (one-off) - it's going to be fast on CPU too!
+docker run -ti --name converter -v $PWD:/data rwkv-converter https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096.pth /data/models/rwkv
+
+# Get the tokenizer
+wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O models/rwkv.tokenizer.json
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+Test it out:
+
+```bash
+curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
+    "model": "gpt-3.5-turbo",
+    "prompt": "A long time ago, in a galaxy far away",
+    "max_tokens": 100,
+    "temperature": 0.9, "top_p": 0.8, "top_k": 80
+  }'
+
+# {"object":"text_completion","model":"gpt-3.5-turbo","choices":[{"text":", there was a small group of five friends: Annie, Bryan, Charlie, Emily, and Jesse."}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
+
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+     "model": "gpt-3.5-turbo",            
+     "messages": [{"role": "user", "content": "How are you?"}],
+     "temperature": 0.9, "top_p": 0.8, "top_k": 80
+   }'
+
+# {"object":"chat.completion","model":"gpt-3.5-turbo","choices":[{"message":{"role":"assistant","content":" Good, thanks. I am about to go to bed. I' ll talk to you later.Bye."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
+```
+
+### Fine tuning
+
+See [RWKV-LM](https://github.com/BlinkDL/RWKV-LM#training--fine-tuning). There is also a Google [colab](https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_v4_RNN_Pile_Fine_Tuning.ipynb).
+
+## See also
+
+- [RWKV-LM](https://github.com/BlinkDL/RWKV-LM)
+- [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
--- a/examples/rwkv/docker-compose.yaml
+++ b/examples/rwkv/docker-compose.yaml
@@ -0,0 +1,16 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
--- a/examples/rwkv/models/gpt-3.5-turbo.yaml
+++ b/examples/rwkv/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,19 @@
+name: gpt-3.5-turbo
+parameters:
+  model: rwkv
+  top_k: 80
+  temperature: 0.9
+  max_tokens: 100
+  top_p: 0.8
+context_size: 1024
+threads: 14
+backend: "rwkv"
+cutwords:
+- "Bob:.*"
+roles:
+  user: "Bob:"
+  system: "Alice:"
+  assistant: "Alice:"
+template:
+  completion: rwkv_completion
+  chat: rwkv_chat
--- a/examples/rwkv/models/rwkv_chat.tmpl
+++ b/examples/rwkv/models/rwkv_chat.tmpl
@@ -0,0 +1,13 @@
+The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
+
+Bob: Hello Alice, how are you doing?
+
+Alice: Hi Bob! Thanks, I'm fine. What about you?
+
+Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
+
+Alice: Not at all! I'm listening.
+
+{{.Input}}
+
+Alice: 
--- a/examples/rwkv/models/rwkv_completion.tmpl
+++ b/examples/rwkv/models/rwkv_completion.tmpl
@@ -0,0 +1 @@
+Complete the following sentence: {{.Input}} 
--- a/examples/rwkv/scripts/build.sh
+++ b/examples/rwkv/scripts/build.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+set -ex
+
+URL=$1
+OUT=$2
+FILENAME=$(basename $URL)
+
+wget -nc $URL -O /build/$FILENAME
+
+python3 /build/rwkv.cpp/rwkv/convert_pytorch_to_ggml.py /build/$FILENAME /build/float-model float16
+python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_2
--- a/examples/slack-bot/.env.example
+++ b/examples/slack-bot/.env.example
@@ -0,0 +1,11 @@
+SLACK_APP_TOKEN=xapp-1-...
+SLACK_BOT_TOKEN=xoxb-...
+OPENAI_API_KEY=sk-...
+OPENAI_API_BASE=http://api:8080
+OPENAI_MODEL=gpt-3.5-turbo
+OPENAI_TIMEOUT_SECONDS=60
+#OPENAI_SYSTEM_TEXT="You proofread text. When you receive a message, you will check
+#for mistakes and make suggestion to improve the language of the given text"
+USE_SLACK_LANGUAGE=true
+SLACK_APP_LOG_LEVEL=INFO
+TRANSLATE_MARKDOWN=true
--- a/examples/slack-bot/README.md
+++ b/examples/slack-bot/README.md
@@ -0,0 +1,27 @@
+# Slack bot
+
+Slackbot using: https://github.com/seratch/ChatGPT-in-Slack
+
+## Setup
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/slack-bot
+
+git clone https://github.com/seratch/ChatGPT-in-Slack
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# Set the discord bot options (see: https://github.com/seratch/ChatGPT-in-Slack)
+cp -rfv .env.example .env
+vim .env
+
+# start with docker-compose
+docker-compose up -d --build
+```
--- a/examples/slack-bot/docker-compose.yaml
+++ b/examples/slack-bot/docker-compose.yaml
@@ -0,0 +1,23 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
+
+  bot:
+    build:
+     context: ./ChatGPT-in-Slack
+     dockerfile: Dockerfile
+    env_file:
+    - .env
--- a/examples/slack-bot/models
+++ b/examples/slack-bot/models
@@ -0,0 +1 @@
+../chatbot-ui/models
--- a/go.mod
+++ b/go.mod
@@ -3,41 +3,57 @@ module github.com/go-skynet/LocalAI
 go 1.19

 require (
-	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4
-	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640
-	github.com/gofiber/fiber/v2 v2.42.0
-	github.com/jaypipes/ghw v0.10.0
+	github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
+	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
+	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675
+	github.com/gofiber/fiber/v2 v2.44.0
+	github.com/hashicorp/go-multierror v1.1.1
+	github.com/onsi/ginkgo/v2 v2.9.4
+	github.com/onsi/gomega v1.27.6
+	github.com/otiai10/openaigo v1.1.0
 	github.com/rs/zerolog v1.29.1
-	github.com/urfave/cli/v2 v2.25.0
+	github.com/sashabaranov/go-openai v1.9.3
+	github.com/swaggo/swag v1.16.1
+	github.com/urfave/cli/v2 v2.25.3
+	github.com/valyala/fasthttp v1.47.0
+	gopkg.in/yaml.v3 v3.0.1
 )

 require (
-	github.com/StackExchange/wmi v1.2.1 // indirect
-	github.com/andybalholm/brotli v1.0.4 // indirect
+	github.com/KyleBanks/depth v1.2.1 // indirect
+	github.com/PuerkitoBio/purell v1.1.1 // indirect
+	github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
+	github.com/andybalholm/brotli v1.0.5 // indirect
 	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
-	github.com/ghodss/yaml v1.0.0 // indirect
-	github.com/go-ole/go-ole v1.2.6 // indirect
+	github.com/go-logr/logr v1.2.4 // indirect
+	github.com/go-openapi/jsonpointer v0.19.5 // indirect
+	github.com/go-openapi/jsonreference v0.19.6 // indirect
+	github.com/go-openapi/spec v0.20.4 // indirect
+	github.com/go-openapi/swag v0.19.15 // indirect
+	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
+	github.com/google/go-cmp v0.5.9 // indirect
+	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
 	github.com/google/uuid v1.3.0 // indirect
-	github.com/jaypipes/pcidb v1.0.0 // indirect
-	github.com/klauspost/compress v1.15.9 // indirect
-	github.com/kr/text v0.2.0 // indirect
+	github.com/hashicorp/errwrap v1.0.0 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
+	github.com/klauspost/compress v1.16.3 // indirect
+	github.com/mailru/easyjson v0.7.6 // indirect
 	github.com/mattn/go-colorable v0.1.13 // indirect
-	github.com/mattn/go-isatty v0.0.17 // indirect
+	github.com/mattn/go-isatty v0.0.18 // indirect
 	github.com/mattn/go-runewidth v0.0.14 // indirect
-	github.com/mitchellh/go-homedir v1.1.0 // indirect
-	github.com/philhofer/fwd v1.1.1 // indirect
-	github.com/pkg/errors v0.9.1 // indirect
+	github.com/philhofer/fwd v1.1.2 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
 	github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
-	github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d // indirect
-	github.com/tinylib/msgp v1.1.6 // indirect
+	github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
+	github.com/tinylib/msgp v1.1.8 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
-	github.com/valyala/fasthttp v1.44.0 // indirect
 	github.com/valyala/tcplisten v1.0.0 // indirect
 	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
-	golang.org/x/sys v0.6.0 // indirect
+	golang.org/x/net v0.9.0 // indirect
+	golang.org/x/sys v0.7.0 // indirect
+	golang.org/x/text v0.9.0 // indirect
+	golang.org/x/tools v0.8.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
-	howett.net/plist v1.0.0 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -1,58 +1,96 @@
-github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
-github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
-github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
-github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
+github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
+github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
+github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
+github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
+github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
+github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
+github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
-github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
-github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
-github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
-github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
-github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
-github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
-github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4 h1:GkGuqnhDFKlCsT6Bo8sdY00A7rFXCzfU1nBOSS4ZnYM=
-github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
-github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94 h1:rtrrMvlIq+g0/ltXjDdLeNtz0uc4wJ4Qs15GFU4ba4c=
-github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640 h1:8SSVbQ3yvq7JnfLCLF4USV0PkQnnduUkaNCv/hHDa3E=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
+github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
+github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
+github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs=
+github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns=
+github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M=
+github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I=
+github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
+github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
+github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
+github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ6JsAm9Q1A8I8j9YfNy10bmIfwOiyGyU5wQ=
+github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
+github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
+github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230504223241-67ff6a4db244/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675 h1:plXywr95RghidIHPHl+O/zpcNXenEeS6w/6WftFNr9E=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
+github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
-github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
-github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
+github.com/gofiber/fiber/v2 v2.44.0 h1:Z90bEvPcJM5GFJnu1py0E1ojoerkyew3iiNJ78MQCM8=
+github.com/gofiber/fiber/v2 v2.44.0/go.mod h1:VTMtb/au8g01iqvHyaCzftuM/xmZgKOZCtFzz6CdV9w=
+github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
+github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
-github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
-github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8=
-github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk=
-github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
-github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
-github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
-github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
+github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
+github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
+github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
+github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
+github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
+github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
 github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
 github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
 github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
 github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
-github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98=
+github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
 github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
-github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
-github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
+github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
+github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
 github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
-github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
+github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
+github.com/otiai10/mint v1.4.1 h1:HOVBfKP1oXIc0wWo9hZ8JLdZtyCPWqjvmFDuVZ0yv2Y=
+github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg=
+github.com/otiai10/openaigo v1.1.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg=
 github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
-github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
+github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
@@ -60,68 +98,100 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
 github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
 github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
+github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
-github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
-github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
+github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk=
+github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
+github.com/swaggo/swag v1.16.1 h1:fTNRhKstPKxcnoKsytm4sahr8FaYzUcT7i1/3nd/fBg=
+github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKikGxto=
 github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
-github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
-github.com/urfave/cli/v2 v2.25.0/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
+github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
+github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
+github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY=
+github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
-github.com/valyala/fasthttp v1.44.0 h1:R+gLUhldIsfg1HokMuQjdQ5bh9nuXHPIfvkYUu9eR5Q=
-github.com/valyala/fasthttp v1.44.0/go.mod h1:f6VbjjoI3z1NDOZOv17o6RvtRSWxC77seBFc2uWtgiY=
+github.com/valyala/fasthttp v1.47.0 h1:y7moDoxYzMooFpT5aHgNgVOQDrS3qlkfiP9mDtGGK9c=
+github.com/valyala/fasthttp v1.47.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
 github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
 github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
-golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
+golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
+golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
+golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
+golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
+golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
-golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
+golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y=
+golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
-gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
-howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
-howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/kubernetes/data-volume.yaml
+++ b/kubernetes/data-volume.yaml
@@ -1,28 +0,0 @@
-# Create a PVC containing a model binary, sourced from an arbitrary HTTP server
-# (requires https://github.com/kubevirt/containerized-data-importer)
-apiVersion: cdi.kubevirt.io/v1beta1
-kind: DataVolume
-metadata:
-  name: models
-  namespace: local-ai
-spec:
-  contentType: archive
-  source:
-    http:
-      url: http://<model_server>/koala-7B-4bit-128g.GGML.tar
-      secretRef: model-secret
-  pvc:
-    accessModes:
-    - ReadWriteOnce
-    resources:
-      requests:
-        storage: 5Gi
---
-apiVersion: v1
-kind: Secret
-metadata:
-  name: model-secret
-  namespace: local-ai
-data:
-  accessKeyId: <model_server_username_base64_encoded>
-  secretKey: <model_server_password_base64_encoded>
--- a/kubernetes/deployment.yaml
+++ b/kubernetes/deployment.yaml
@@ -1,57 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: local-ai
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: local-ai
-  namespace: local-ai
-  labels:
-    app: local-ai
-spec:
-  selector:
-    matchLabels:
-      app: local-ai
-  replicas: 1
-  template:
-    metadata:
-      labels:
-        app: local-ai
-      name: local-ai
-    spec:
-      containers:
-        - name: local-ai
-          image: quay.io/go-skynet/local-ai:latest
-          env:
-          - name: THREADS
-            value: "14"
-          - name: CONTEXT_SIZE
-            value: "512"
-          - name: MODELS_PATH
-            value: /models
-          volumeMounts:
-          - mountPath: /models
-            name: models
-      volumes:
-      - name: models
-        persistentVolumeClaim:
-          claimName: models
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: local-ai
-  namespace: local-ai
-  # If using AWS, you'll need to override the default 60s load balancer idle timeout
-  # annotations:
-  #   service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
-spec:
-  selector:
-    app: local-ai
-  type: LoadBalancer
-  ports:
-    - protocol: TCP
-      port: 8080
-      targetPort: 8080
--- a/main.go
+++ b/main.go
@@ -1,11 +1,12 @@
 package main

 import (
+	"fmt"
 	"os"
+	"path/filepath"

 	api "github.com/go-skynet/LocalAI/api"
 	model "github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/jaypipes/ghw"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 	"github.com/urfave/cli/v2"
@@ -20,12 +21,6 @@ func main() {
 		os.Exit(1)
 	}

-	threads := 4
-	cpu, err := ghw.CPU()
-	if err == nil {
-		threads = int(cpu.TotalCores)
-	}
-
 	app := &cli.App{
 		Name:  "LocalAI",
 		Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.",
@@ -42,13 +37,18 @@ func main() {
 				Name:        "threads",
 				DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
 				EnvVars:     []string{"THREADS"},
-				Value:       threads,
+				Value:       4,
 			},
 			&cli.StringFlag{
 				Name:        "models-path",
 				DefaultText: "Path containing models used for inferencing",
 				EnvVars:     []string{"MODELS_PATH"},
-				Value:       path,
+				Value:       filepath.Join(path, "models"),
+			},
+			&cli.StringFlag{
+				Name:        "config-file",
+				DefaultText: "Config file",
+				EnvVars:     []string{"CONFIG_FILE"},
 			},
 			&cli.StringFlag{
 				Name:        "address",
@@ -80,11 +80,8 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
 		UsageText: `local-ai [options]`,
 		Copyright: "go-skynet authors",
 		Action: func(ctx *cli.Context) error {
-			zerolog.SetGlobalLevel(zerolog.InfoLevel)
-			if ctx.Bool("debug") {
-				zerolog.SetGlobalLevel(zerolog.DebugLevel)
-			}
-			return api.Start(model.NewModelLoader(ctx.String("models-path")), ctx.String("address"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"))
+			fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
+			return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
 		},
 	}

--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -10,43 +10,46 @@ import (
 	"sync"
 	"text/template"

+	"github.com/hashicorp/go-multierror"
 	"github.com/rs/zerolog/log"

+	rwkv "github.com/donomii/go-rwkv.cpp"
 	gpt2 "github.com/go-skynet/go-gpt2.cpp"
 	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
 	llama "github.com/go-skynet/go-llama.cpp"
 )

 type ModelLoader struct {
-	modelPath string
+	ModelPath string
 	mu        sync.Mutex

 	models            map[string]*llama.LLama
 	gptmodels         map[string]*gptj.GPTJ
 	gpt2models        map[string]*gpt2.GPT2
 	gptstablelmmodels map[string]*gpt2.StableLM
-
-	promptsTemplates map[string]*template.Template
+	rwkv              map[string]*rwkv.RwkvState
+	promptsTemplates  map[string]*template.Template
 }

 func NewModelLoader(modelPath string) *ModelLoader {
 	return &ModelLoader{
-		modelPath:         modelPath,
+		ModelPath:         modelPath,
 		gpt2models:        make(map[string]*gpt2.GPT2),
 		gptmodels:         make(map[string]*gptj.GPTJ),
 		gptstablelmmodels: make(map[string]*gpt2.StableLM),
 		models:            make(map[string]*llama.LLama),
+		rwkv:              make(map[string]*rwkv.RwkvState),
 		promptsTemplates:  make(map[string]*template.Template),
 	}
 }

 func (ml *ModelLoader) ExistsInModelPath(s string) bool {
-	_, err := os.Stat(filepath.Join(ml.modelPath, s))
+	_, err := os.Stat(filepath.Join(ml.ModelPath, s))
 	return err == nil
 }

 func (ml *ModelLoader) ListModels() ([]string, error) {
-	files, err := ioutil.ReadDir(ml.modelPath)
+	files, err := ioutil.ReadDir(ml.ModelPath)
 	if err != nil {
 		return []string{}, err
 	}
@@ -70,7 +73,18 @@ func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string,

 	m, ok := ml.promptsTemplates[modelName]
 	if !ok {
-		return "", fmt.Errorf("no prompt template available")
+		modelFile := filepath.Join(ml.ModelPath, modelName)
+		if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
+			return "", err
+		}
+
+		t, exists := ml.promptsTemplates[modelName]
+		if exists {
+			m = t
+		}
+	}
+	if m == nil {
+		return "", fmt.Errorf("failed loading any template")
 	}

 	var buf bytes.Buffer
@@ -88,14 +102,14 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
 	}

 	// Check if the model path exists
-	// skip any error here - we run anyway if a template is not exist
+	// skip any error here - we run anyway if a template does not exist
 	modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName)

 	if !ml.ExistsInModelPath(modelTemplateFile) {
 		return nil
 	}

-	dat, err := os.ReadFile(filepath.Join(ml.modelPath, modelTemplateFile))
+	dat, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile))
 	if err != nil {
 		return err
 	}
@@ -125,7 +139,7 @@ func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, erro
 	}

 	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.modelPath, modelName)
+	modelFile := filepath.Join(ml.ModelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

 	model, err := gpt2.NewStableLM(modelFile)
@@ -156,15 +170,8 @@ func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
 		return m, nil
 	}

-	// TODO: This needs refactoring, it's really bad to have it in here
-	// Check if we have a GPTStable model loaded instead - if we do we return an error so the API tries with StableLM
-	if _, ok := ml.gptstablelmmodels[modelName]; ok {
-		log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
-		return nil, fmt.Errorf("this model is a GPTStableLM one")
-	}
-
 	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.modelPath, modelName)
+	modelFile := filepath.Join(ml.ModelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

 	model, err := gpt2.New(modelFile)
@@ -195,19 +202,8 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
 		return m, nil
 	}

-	// TODO: This needs refactoring, it's really bad to have it in here
-	// Check if we have a GPT2 model loaded instead - if we do we return an error so the API tries with GPT2
-	if _, ok := ml.gpt2models[modelName]; ok {
-		log.Debug().Msgf("Model is GPT2: %s", modelName)
-		return nil, fmt.Errorf("this model is a GPT2 one")
-	}
-	if _, ok := ml.gptstablelmmodels[modelName]; ok {
-		log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
-		return nil, fmt.Errorf("this model is a GPTStableLM one")
-	}
-
 	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.modelPath, modelName)
+	modelFile := filepath.Join(ml.ModelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

 	model, err := gptj.New(modelFile)
@@ -224,6 +220,36 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
 	return model, err
 }

+func (ml *ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (*rwkv.RwkvState, error) {
+	ml.mu.Lock()
+	defer ml.mu.Unlock()
+
+	log.Debug().Msgf("Loading model name: %s", modelName)
+
+	// Check if we already have a loaded model
+	if !ml.ExistsInModelPath(modelName) {
+		return nil, fmt.Errorf("model does not exist")
+	}
+
+	if m, ok := ml.rwkv[modelName]; ok {
+		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
+		return m, nil
+	}
+
+	// Load the model and keep it in memory for later use
+	modelFile := filepath.Join(ml.ModelPath, modelName)
+	tokenPath := filepath.Join(ml.ModelPath, tokenFile)
+	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
+
+	model := rwkv.LoadFiles(modelFile, tokenPath, threads)
+	if model == nil {
+		return nil, fmt.Errorf("could not load model")
+	}
+
+	ml.rwkv[modelName] = model
+	return model, nil
+}
+
 func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()
@@ -240,23 +266,8 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
 		return m, nil
 	}

-	// TODO: This needs refactoring, it's really bad to have it in here
-	// Check if we have a GPTJ model loaded instead - if we do we return an error so the API tries with GPTJ
-	if _, ok := ml.gptmodels[modelName]; ok {
-		log.Debug().Msgf("Model is GPTJ: %s", modelName)
-		return nil, fmt.Errorf("this model is a GPTJ one")
-	}
-	if _, ok := ml.gpt2models[modelName]; ok {
-		log.Debug().Msgf("Model is GPT2: %s", modelName)
-		return nil, fmt.Errorf("this model is a GPT2 one")
-	}
-	if _, ok := ml.gptstablelmmodels[modelName]; ok {
-		log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
-		return nil, fmt.Errorf("this model is a GPTStableLM one")
-	}
-
 	// Load the model and keep it in memory for later use
-	modelFile := filepath.Join(ml.modelPath, modelName)
+	modelFile := filepath.Join(ml.ModelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

 	model, err := llama.New(modelFile, opts...)
@@ -272,3 +283,83 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
 	ml.models[modelName] = model
 	return model, err
 }
+
+const tokenizerSuffix = ".tokenizer.json"
+
+var loadedModels map[string]interface{} = map[string]interface{}{}
+var muModels sync.Mutex
+
+func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
+	switch strings.ToLower(backendString) {
+	case "llama":
+		return ml.LoadLLaMAModel(modelFile, llamaOpts...)
+	case "stablelm":
+		return ml.LoadStableLMModel(modelFile)
+	case "gpt2":
+		return ml.LoadGPT2Model(modelFile)
+	case "gptj":
+		return ml.LoadGPTJModel(modelFile)
+	case "rwkv":
+		return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
+	default:
+		return nil, fmt.Errorf("backend unsupported: %s", backendString)
+	}
+}
+
+func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
+	updateModels := func(model interface{}) {
+		muModels.Lock()
+		defer muModels.Unlock()
+		loadedModels[modelFile] = model
+	}
+
+	muModels.Lock()
+	m, exists := loadedModels[modelFile]
+	if exists {
+		muModels.Unlock()
+		return m, nil
+	}
+	muModels.Unlock()
+
+	model, modelerr := ml.LoadLLaMAModel(modelFile, llamaOpts...)
+	if modelerr == nil {
+		updateModels(model)
+		return model, nil
+	} else {
+		err = multierror.Append(err, modelerr)
+	}
+
+	model, modelerr = ml.LoadGPTJModel(modelFile)
+	if modelerr == nil {
+		updateModels(model)
+		return model, nil
+	} else {
+		err = multierror.Append(err, modelerr)
+	}
+
+	model, modelerr = ml.LoadGPT2Model(modelFile)
+	if modelerr == nil {
+		updateModels(model)
+		return model, nil
+	} else {
+		err = multierror.Append(err, modelerr)
+	}
+
+	model, modelerr = ml.LoadStableLMModel(modelFile)
+	if modelerr == nil {
+		updateModels(model)
+		return model, nil
+	} else {
+		err = multierror.Append(err, modelerr)
+	}
+
+	model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
+	if modelerr == nil {
+		updateModels(model)
+		return model, nil
+	} else {
+		err = multierror.Append(err, modelerr)
+	}
+
+	return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
+}
--- a/prompt-templates/wizardlm.tmpl
+++ b/prompt-templates/wizardlm.tmpl
@@ -0,0 +1,3 @@
+{{.Input}}
+
+### Response:
--- a/renovate.json
+++ b/renovate.json
@@ -0,0 +1,4 @@
+{
+  "$schema": "https://docs.renovatebot.com/renovate-schema.json",
+  "extends": ["config:base"]
+}
--- a/tests/fixtures/completion.tmpl
+++ b/tests/fixtures/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
--- a/tests/fixtures/config.yaml
+++ b/tests/fixtures/config.yaml
@@ -0,0 +1,32 @@
+- name: list1
+  parameters:
+    model: testmodel
+    top_p: 80
+    top_k: 0.9
+    temperature: 0.1
+  context_size: 10
+  stopwords:
+  - "HUMAN:"
+  - "### Response:"
+  roles:
+    user: "HUMAN:"
+    system: "GPT:"
+  template:
+    completion: completion
+    chat: ggml-gpt4all-j
+- name: list2
+  parameters:
+    top_p: 80
+    top_k: 0.9
+    temperature: 0.1
+    model: testmodel
+  context_size: 10
+  stopwords:
+  - "HUMAN:"
+  - "### Response:"
+  roles:
+    user: "HUMAN:"
+    system: "GPT:"
+  template:
+    completion: completion
+    chat: ggml-gpt4all-j
--- a/tests/fixtures/ggml-gpt4all-j.tmpl
+++ b/tests/fixtures/ggml-gpt4all-j.tmpl
@@ -0,0 +1,4 @@
+The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+### Prompt:
+{{.Input}}
+### Response:
--- a/tests/fixtures/gpt4.yaml
+++ b/tests/fixtures/gpt4.yaml
@@ -0,0 +1,16 @@
+name: gpt4all
+parameters:
+  model: testmodel
+  top_p: 80
+  top_k: 0.9
+  temperature: 0.1
+context_size: 10
+stopwords:
+- "HUMAN:"
+- "### Response:"
+roles:
+  user: "HUMAN:"
+  system: "GPT:"
+template:
+  completion: completion
+  chat: ggml-gpt4all-j
--- a/tests/fixtures/gpt4_2.yaml
+++ b/tests/fixtures/gpt4_2.yaml
@@ -0,0 +1,16 @@
+name: gpt4all-2
+parameters:
+  model: testmodel
+  top_p: 80
+  top_k: 0.9
+  temperature: 0.1
+context_size: 10
+stopwords:
+- "HUMAN:"
+- "### Response:"
+roles:
+  user: "HUMAN:"
+  system: "GPT:"
+template:
+  completion: completion
+  chat: ggml-gpt4all-j
Author	SHA1	Message	Date
mudler	18f5a1f7db	example: add langchain agent	2023-05-07 00:15:12 +02:00
ci-robbot [bot]	cbdcc839f3	⬆️ Update go-skynet/go-llama.cpp (#201 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-05-06 22:49:44 +02:00
mudler	e1c8f087f4	Update README	2023-05-06 19:18:03 +02:00
mudler	3a90ea44a5	Update readme and examples	2023-05-06 19:15:22 +02:00
renovate[bot]	e55492475d	fix(deps): update github.com/go-skynet/go-llama.cpp digest to 691d479 (#189 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-06 00:42:31 +02:00
Dave	07ec2e441d	mini fix - OpenAI documentation url (#200 )	2023-05-06 00:42:08 +02:00
ci-robbot [bot]	38d7e0b43c	⬆️ Update go-skynet/go-llama.cpp (#198 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-05-06 00:21:48 +02:00
Dave	3411bfd00d	Langchain Example Updates (#199 )	2023-05-06 00:21:06 +02:00
Ettore Di Giacinto	7e5fe35ae4	Mixed enhancements (#196 )	2023-05-06 00:00:58 +02:00
mudler	8c8cf38d4d	tests: use 1 core	2023-05-05 23:29:34 +02:00
mudler	75b25297fd	tests: run with ginkgo	2023-05-05 22:51:30 +02:00
mudler	009ee47fe2	Don't allow 0 as thread count	2023-05-05 22:51:20 +02:00
mudler	ec2adc2c03	tests: use 3 cores	2023-05-05 22:07:01 +02:00
mudler	ad301e6ed7	example(add): document query example	2023-05-05 21:56:31 +02:00
mudler	d094381e5d	ci: lower fixtures spec	2023-05-05 21:28:38 +02:00
mudler	3ff9bbd217	examples: add rwkv script folder	2023-05-05 19:04:52 +02:00
mudler	e62ee2bc06	fix: remove trailing 0s from embeddings This happens when no max_tokens are set, so by default go-llama allocates more space for the slice and padding happens.	2023-05-05 18:35:03 +02:00
mudler	b49721cdd1	fix: respect config from file for backends settings	2023-05-05 18:05:10 +02:00
mudler	64c0a7967f	fix: pass prediction options when using the model	2023-05-05 15:56:02 +02:00
mudler	e96eadab40	feat: support deprecated embeddings API	2023-05-05 15:55:19 +02:00
mudler	e73283121b	feat: support arrays for prompt and input Signed-off-by: mudler <mudler@mocaccino.org>	2023-05-05 15:54:59 +02:00
mudler	857d13e8d6	debug: wire up go-fiber debugger	2023-05-05 15:53:57 +02:00
ci-robbot [bot]	91db3d4d5c	⬆️ Update go-skynet/go-llama.cpp (#194 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-05-05 13:45:50 +02:00
Ettore Di Giacinto	961cf29217	feat: expose mirostat to config (#193 )	2023-05-05 13:45:37 +02:00
Ettore Di Giacinto	c839b334eb	feat: add embeddings for go-llama.cpp backend (#190 )	2023-05-05 11:20:06 +02:00
Ettore Di Giacinto	714bfcd45b	fix: missing returning error and free callback stream (#187 )	2023-05-04 19:49:43 +02:00
renovate[bot]	77ce8b953e	fix(deps): update github.com/donomii/go-rwkv.cpp digest to af62fcc (#171 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-04 18:30:48 +02:00
renovate[bot]	01ada95941	fix(deps): update github.com/go-skynet/go-llama.cpp digest to 2e6ae12 (#172 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-04 18:30:11 +02:00
ci-robbot [bot]	eabdc5042a	⬆️ Update go-skynet/go-llama.cpp (#184 ) Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>	2023-05-04 18:28:49 +02:00
Dhruv Gera	96267d9437	localai: Include the WebUI project example (#130 ) Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-05-04 18:27:58 +02:00
Ettore Di Giacinto	9497a24127	fix: hardcode default number of cores to '4' (#186 )	2023-05-04 18:14:58 +02:00
Ettore Di Giacinto	fdf75c6d0e	rwkv fixes and examples (#185 )	2023-05-04 17:32:23 +02:00
mudler	6352308882	ci: minor fixups	2023-05-04 15:08:20 +02:00
mudler	a8172a0f4e	ci: fix typo	2023-05-04 15:04:41 +02:00
mudler	ebcd10d66f	ci: manually update deps	2023-05-04 15:01:29 +02:00
mudler	885642915f	ci: add renovate suffix	2023-05-04 12:26:59 +02:00
mudler	2e424491c0	ci: lookupNameTemplate -> depNameTemplate	2023-05-04 12:23:05 +02:00
mudler	aa6faef8f7	ci: versioning -> versioningTemplate	2023-05-04 12:07:29 +02:00
mudler	b3254baf60	ci: add versioning	2023-05-04 12:05:39 +02:00
mudler	0a43d27f0e	ci: update renovate	2023-05-04 12:02:19 +02:00
Ettore Di Giacinto	3fe11fe24d	ci: attempt to configure renovate with custom regexes (#178 )	2023-05-04 11:55:14 +02:00
renovate[bot]	af18fdc749	fix(deps): update module github.com/sashabaranov/go-openai to v1.9.3 (#174 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-04 08:44:02 +02:00
renovate[bot]	32b5eddd7d	fix(deps): update module github.com/onsi/ginkgo/v2 to v2.9.4 (#173 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-04 08:41:51 +02:00
Dave	07c3aa1869	Dockerized Langchain / PY example (#175 )	2023-05-04 08:41:13 +02:00
renovate[bot]	e59bad89e7	fix(deps): update module github.com/sashabaranov/go-openai to v1.9.2 (#164 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-03 23:05:50 +02:00
Jeremy Price	b971807980	Looks for models in $CWD/models/ dir by default (#169 )	2023-05-03 23:03:31 +02:00
Ettore Di Giacinto	c974dad799	Return usage in the API responses (#166 )	2023-05-03 17:29:18 +02:00
Ettore Di Giacinto	4eae570ef5	Update docs (#163 )	2023-05-03 15:51:54 +02:00
Ettore Di Giacinto	67992a7d99	feat: support slices or strings in the prompt completion endpoint (#162 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-05-03 13:13:31 +02:00
renovate[bot]	0a4899f366	fix(deps): update github.com/go-skynet/go-llama.cpp digest to 8ceb616 (#150 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-03 11:48:06 +02:00
renovate[bot]	1eb02f6c91	fix(deps): update module github.com/onsi/ginkgo/v2 to v2.9.3 (#161 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-03 11:47:54 +02:00
mudler	575874e4fb	readme: minor update	2023-05-03 11:46:29 +02:00
Ettore Di Giacinto	751b7eca62	feat: add rwkv support (#158 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-05-03 11:45:22 +02:00
Ettore Di Giacinto	1ae7150810	feat: allow to specify default backend for model (#156 ) Signed-off-by: mudler <mudler@c3os.io>	2023-05-03 00:31:28 +02:00
Ettore Di Giacinto	70caf9bf8c	feat: support stopwords both string and arrays (#154 )	2023-05-02 23:30:00 +02:00
Dave	0b226ac027	Stop parameter of OpenAIRequest changed to String Array (#153 )	2023-05-02 22:02:45 +02:00
Ettore Di Giacinto	220d6fd59b	feat: add stream events (#152 )	2023-05-02 20:03:35 +02:00
antongisli	0a00a4b58e	adding mac build and example (#151 ) Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-05-02 19:24:45 +02:00
Ettore Di Giacinto	156e15a4fa	Bump llama.cpp, downgrade gpt4all-j (#149 )	2023-05-02 16:07:18 +02:00
renovate[bot]	271d3f6673	fix(deps): update module github.com/valyala/fasthttp to v1.47.0 (#143 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-01 23:36:58 +02:00
Ettore Di Giacinto	fec4ab93c5	docs: Add langchain to the example index (#147 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-05-01 23:21:07 +02:00
renovate[bot]	38a7a7a54d	fix(deps): update github.com/go-skynet/go-gpt4all-j.cpp digest to 77bf8c1 (#141 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-05-01 23:18:41 +02:00
Ettore Di Giacinto	0db0704e2c	docs: Add slack-bot example (#145 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-05-01 23:18:24 +02:00
Dave	88f472e5d2	Add LangchainJS Examples (#146 )	2023-05-01 23:18:14 +02:00
Ettore Di Giacinto	92452d46da	feat: add new gpt4all-j binding (#142 )	2023-05-01 20:00:15 +02:00
Ettore Di Giacinto	ac70252d70	drop: remove helm charts, now in separate repo (#134 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-05-01 18:07:41 +02:00
renovate[bot]	f6451d2518	fix(deps): update module github.com/urfave/cli/v2 to v2.25.3 (#140 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-01 18:07:29 +02:00
Ettore Di Giacinto	2473f9d19b	docs: add discord-bot preview (#137 )	2023-05-01 11:03:34 +02:00
renovate[bot]	bc583385a9	fix(deps): update module github.com/urfave/cli/v2 to v2.25.2 (#136 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-01 07:53:48 +02:00
renovate[bot]	8286bfbab7	fix(deps): update module github.com/sashabaranov/go-openai to v1.9.1 (#135 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-01 07:52:20 +02:00
Ettore Di Giacinto	d129fabe3b	docs: enhancements (#133 )	2023-04-30 23:27:02 +02:00
renovate[bot]	2539867247	fix(deps): update github.com/go-skynet/go-llama.cpp digest to 377fd24 (#129 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-30 11:09:48 +02:00
renovate[bot]	69fedb92d9	fix(deps): update github.com/go-skynet/go-llama.cpp digest to 361b9f8 (#127 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-30 08:47:27 +02:00
Ettore Di Giacinto	54b5eadcc4	docs: add discord-bot example (#126 )	2023-04-30 00:31:28 +02:00
Ettore Di Giacinto	16773e2a35	feat: make images to build sources on start (#124 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-29 20:38:37 +02:00
renovate[bot]	78503c62b7	fix(deps): update github.com/go-skynet/go-llama.cpp digest to 9bf702f (#125 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-29 16:53:39 +02:00
Ettore Di Giacinto	a330c9cee5	update: bump llama.cpp to 7f15c5c (#122 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-29 15:20:50 +02:00
Ettore Di Giacinto	ff0867996e	tests: increase timeout (#121 )	2023-04-29 14:56:00 +02:00
Ettore Di Giacinto	1bf8f996d1	docs: clarify GPT4ALL-J licensing (#120 )	2023-04-29 14:50:22 +02:00
Ettore Di Giacinto	52f4d993c1	feat: add /edit endpoint (#119 )	2023-04-29 09:22:09 +02:00
renovate[bot]	d0ceebc5d7	fix(deps): update module github.com/valyala/fasthttp to v1.46.0 (#118 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-28 22:44:29 +02:00
renovate[bot]	9122af3ae1	fix(deps): update github.com/go-skynet/go-llama.cpp digest to 3d084e4 (#108 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-28 19:24:49 +02:00
Ettore Di Giacinto	b8533428bc	bump: update llama.cpp (#117 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-28 19:24:28 +02:00
Ettore Di Giacinto	677905334c	docs: reorder section (#116 )	2023-04-28 13:55:23 +02:00
Mauro Morales	d1d55d29a0	Add Kairos LocalAI example to the links (#115 )	2023-04-28 13:52:17 +02:00
Ettore Di Giacinto	e07dba7ad6	docs: Add contributors (#113 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-28 10:54:39 +02:00
Matthieu Talbot	062f832510	Add EXPOSE to Dockerfile (#107 )	2023-04-27 16:45:24 +00:00
Ettore Di Giacinto	d0330bb64b	docs: update example README.md (#104 )	2023-04-27 17:46:14 +02:00
antongisli	91a23ec6ec	Anton readme (#99 ) Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-04-27 17:17:03 +02:00
Ron Evans	0b000dd043	examples: correct typo in README (#103 ) Signed-off-by: deadprogram <ron@hybridgroup.com>	2023-04-27 17:14:38 +02:00
Ettore Di Giacinto	c73ba91a66	docs: update README	2023-04-27 15:39:48 +02:00
Ettore Di Giacinto	dfc00f8bc1	docs: update README.md (#98 )	2023-04-27 15:06:55 +02:00
Ettore Di Giacinto	a18ff9c9b3	docs: move api docs (#96 )	2023-04-27 10:42:50 +02:00
Ettore Di Giacinto	d0199279ad	docs: update, add config docs (#94 )	2023-04-27 10:39:01 +02:00
Ettore Di Giacinto	9ede1e12d8	few typos and clarity changes (#91 ) (#92 ) Co-authored-by: antongisli <anton@huge.geek.nz>	2023-04-27 07:47:39 +02:00
Ettore Di Giacinto	c806eae0de	feat: config files and SSE (#83 ) Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: Tyler Gillson <tyler.gillson@gmail.com> Co-authored-by: Tyler Gillson <tyler.gillson@gmail.com>	2023-04-26 21:18:18 -07:00
renovate[bot]	4e2061636e	chore(deps): update actions/checkout action to v3 (#82 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-25 07:46:29 +02:00
renovate[bot]	e3ef171968	fix(deps): update module github.com/gofiber/fiber/v2 to v2.44.0 (#81 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-25 07:46:14 +02:00
Ettore Di Giacinto	12d83a4184	feat: Return OpenAI errors and update docs (#80 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-24 23:42:03 +02:00
renovate[bot]	045412e8dd	fix(deps): update module github.com/urfave/cli/v2 to v2.25.1 (#78 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 18:16:23 +02:00
renovate[bot]	9896a9a58b	fix(deps): update github.com/go-skynet/go-llama.cpp digest to e45cebe (#77 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 18:16:10 +02:00
Ettore Di Giacinto	b9011bda59	feat: automatic updates with renovate, docs updates (#76 )	2023-04-24 18:10:58 +02:00
Ettore Di Giacinto	2b2f5fa36a	feat: update llama.cpp (#72 )	2023-04-24 14:15:49 +02:00
renovate[bot]	43c557dc5c	fix(deps): update github.com/go-skynet/go-gpt4all-j.cpp digest to 1f7bff5 (#74 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 14:14:21 +02:00
renovate[bot]	7abb2c9bd7	fix(deps): update github.com/go-skynet/go-gpt2.cpp digest to 245a5bf (#73 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 14:13:04 +02:00
renovate[bot]	7a9ea4480a	Configure Renovate (#71 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 14:11:39 +02:00
Vladimir Malyutin	31bcc558de	Update README.md (#62 )	2023-04-22 14:42:30 +02:00
Ettore Di Giacinto	676e15f785	fix: make MacOS builds work (#61 )	2023-04-22 11:05:23 +02:00
Marc R Kellerman	3e71c90949	feature: add devcontainer for live debugging (#60 )	2023-04-22 01:20:03 +02:00
Ettore Di Giacinto	550ae9c968	docs: add Discord channel link (#59 )	2023-04-22 00:46:17 +02:00
Ettore Di Giacinto	1c872ec326	feat: add CI/tests (#58 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-22 00:44:52 +02:00
Marc R Kellerman	05f35b182c	fix(makefile): fix go-gpt2 folder and add verification before git clone (#51 ) Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-04-22 00:29:32 +02:00
Ettore Di Giacinto	79791438fe	Use the first available model if not specified (#55 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-21 22:54:43 +02:00
Tyler Gillson	bf20cc34f6	feat: Add helm chart (#56 )	2023-04-21 13:22:03 -07:00
Ettore Di Giacinto	5cba71de70	Add stopwords, debug mode, and other API enhancements (#54 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-21 19:46:59 +02:00
Ettore Di Giacinto	4b7e83056d	Update .env	2023-04-21 01:47:35 +02:00
				`@@ -0,0 +1 @@`
				`Complete the following sentence: {{.Input}}`