Minor fixes (#285 )

⬆️ Update go-skynet/go-bert.cpp (#274 )
Signed-off-by: GitHub <noreply@github.com> Co-authored-by: mudler <mudler@users.noreply.github.com>
2026-02-03 11:13:31 -05:00 · 2023-05-17 21:01:46 +02:00 · 2023-05-17 11:56:32 +02:00 · 2023-05-17 11:52:54 +02:00 · 2023-05-17 11:52:37 +02:00 · 2023-05-17 09:47:22 +02:00
121 changed files with 7433 additions and 908 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,3 @@
+ARG GO_VERSION=1.20
+FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
+RUN apt-get update && apt-get install -y cmake
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,46 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
+{
+	"name": "Existing Docker Compose (Extend)",
+
+	// Update the 'dockerComposeFile' list if you have more compose files or use different names.
+	// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
+	"dockerComposeFile": [
+		"../docker-compose.yaml",
+		"docker-compose.yml"
+	],
+
+	// The 'service' property is the name of the service for the container that VS Code should
+	// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
+	"service": "api",
+
+	// The optional 'workspaceFolder' property is the path VS Code should open by default when
+	// connected. This is typically a file mount in .devcontainer/docker-compose.yml
+	"workspaceFolder": "/workspace",
+
+	"features": {
+		"ghcr.io/devcontainers/features/go:1": {},
+		"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
+	},
+
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+
+	// Uncomment the next line if you want start specific services in your Docker Compose config.
+	// "runServices": [],
+
+	// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
+	// "shutdownAction": "none",
+
+	// Uncomment the next line to run commands after the container is created.
+	"postCreateCommand": "make prepare"
+
+	// Configure tool-specific properties.
+	// "customizations": {},
+
+	// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "devcontainer"
+}
--- a/.devcontainer/docker-compose.yml
+++ b/.devcontainer/docker-compose.yml
@@ -0,0 +1,26 @@
+version: '3.6'
+services:
+  # Update this to the name of the service you want to work with in your docker-compose.yml file
+  api:
+    # Uncomment if you want to override the service's Dockerfile to one in the .devcontainer 
+    # folder. Note that the path of the Dockerfile and context is relative to the *primary* 
+    # docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
+    # array). The sample below assumes your primary file is in the root of your project.
+    #
+    build:
+      context: .
+      dockerfile: .devcontainer/Dockerfile
+
+    volumes:
+      # Update this to wherever you want VS Code to mount the folder of your project
+      - .:/workspace:cached
+
+    # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
+    # cap_add:
+    #   - SYS_PTRACE
+    # security_opt:
+    #   - seccomp:unconfined
+
+    # Overrides default command so things don't shut down after the process ends.
+    command: /bin/sh -c "while sleep 1000; do :; done"
+ 
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,4 @@
+models
+examples/chatbot-ui/models
+examples/rwkv/models
+examples/**/models
--- a/.env
+++ b/.env
@@ -0,0 +1,5 @@
+# THREADS=14
+# CONTEXT_SIZE=512
+MODELS_PATH=/models
+# DEBUG=true
+# BUILD_TYPE=generic
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+set -xe
+REPO=$1
+BRANCH=$2
+VAR=$3
+
+LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
+
+sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -0,0 +1,51 @@
+name: Bump dependencies
+on:
+  schedule:
+    - cron: 0 20 * * *
+  workflow_dispatch:
+jobs:
+  bump:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - repository: "go-skynet/go-llama.cpp"
+            variable: "GOLLAMA_VERSION"
+            branch: "master"
+          - repository: "go-skynet/go-gpt2.cpp"
+            variable: "GOGPT2_VERSION"
+            branch: "master"
+          - repository: "donomii/go-rwkv.cpp"
+            variable: "RWKV_VERSION"
+            branch: "main"
+          - repository: "ggerganov/whisper.cpp"
+            variable: "WHISPER_CPP_VERSION"
+            branch: "master"
+          - repository: "go-skynet/go-bert.cpp"
+            variable: "BERT_VERSION"
+            branch: "master"
+          - repository: "go-skynet/bloomz.cpp"
+            variable: "BLOOMZ_VERSION"
+            branch: "main"
+          - repository: "nomic-ai/gpt4all"
+            variable: "GPT4ALL_VERSION"
+            branch: "main"
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Bump dependencies 🔧
+        run: |
+          bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v5
+        with:
+          token: ${{ secrets.UPDATE_BOT_TOKEN }}
+          push-to-fork: ci-forks/LocalAI
+          commit-message: ':arrow_up: Update ${{ matrix.repository }}'
+          title: ':arrow_up: Update ${{ matrix.repository }}'
+          branch: "update/${{ matrix.variable }}"
+          body: Bump of ${{ matrix.repository }} version
+          signoff: true
+
+
+
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -2,6 +2,7 @@
 name: 'build container images'

 on:
+  pull_request:
  push:
    branches:
      - master
@@ -12,78 +13,66 @@ jobs:
  docker:
    runs-on: ubuntu-latest
    steps:
-      - name: Release space from worker
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          df -h
      - name: Checkout
        uses: actions/checkout@v3
+
      - name: Prepare
        id: prep
        run: |
-          DOCKER_IMAGE=quay.io/go-skynet/llama-cli
-          VERSION=latest
+          DOCKER_IMAGE=quay.io/go-skynet/local-ai
+          VERSION=master
          SHORTREF=${GITHUB_SHA::8}
+
          # If this is git tag, use the tag name as a docker tag
          if [[ $GITHUB_REF == refs/tags/* ]]; then
            VERSION=${GITHUB_REF#refs/tags/}
          fi
          TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}"
+
          # If the VERSION looks like a version number, assume that
          # this is the most recent version of the image and also
          # tag it 'latest'.
-          if [[ $VERSION =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
+          if [[ $VERSION =~ ^v[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
            TAGS="$TAGS,${DOCKER_IMAGE}:latest"
          fi
+
          # Set output parameters.
          echo ::set-output name=tags::${TAGS}
          echo ::set-output name=docker_image::${DOCKER_IMAGE}
-          echo ::set-output name=image::${DOCKER_IMAGE}:${VERSION}
+
      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
        with:
          platforms: all
+
      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@master
+
      - name: Login to DockerHub
        if: github.event_name != 'pull_request'
        uses: docker/login-action@v2
        with:
          registry: quay.io
-          username: ${{ secrets.QUAY_USERNAME }}
-          password: ${{ secrets.QUAY_PASSWORD }}
-      - uses: earthly/actions/setup-earthly@v1
+          username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+          password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
      - name: Build
-        run: |
-            earthly config "global.conversion_parallelism" "1"
-            earthly config "global.buildkit_max_parallelism" "1"
-            earthly --push +image-all --IMAGE=${{ steps.prep.outputs.image }}
+        if: github.event_name != 'pull_request'
+        uses: docker/build-push-action@v4
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.prep.outputs.tags }}
+      - name: Build PRs
+        if: github.event_name == 'pull_request'
+        uses: docker/build-push-action@v4
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64
+          push: false
+          tags: ${{ steps.prep.outputs.tags }}
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -0,0 +1,44 @@
+---
+name: 'tests'
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+    tags:
+      - '*'
+
+jobs:
+  ubuntu-latest:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v3
+        with: 
+          submodules: true
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential ffmpeg
+      - name: Test
+        run: |
+          make test
+
+  macOS-latest:
+    runs-on: macOS-latest
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v3
+        with: 
+          submodules: true
+
+      - name: Dependencies
+        run: |
+          brew update
+          brew install sdl2 ffmpeg
+      - name: Test
+        run: |
+          make test
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,20 @@
+# go-llama build artifacts
+go-llama
+gpt4all
+go-stable-diffusion
+go-gpt2
+go-rwkv
+whisper.cpp
+
+# LocalAI build binary
+LocalAI
+local-ai
+# prevent above rules from omitting the helm chart
+!charts/*
+
+# Ignore models
+models/*
+test-models/
+
+# just in case
+.DS_Store
--- a/.goreleaser.yaml
+++ b/.goreleaser.yaml
@@ -1,5 +1,5 @@
 # Make sure to check the documentation at http://goreleaser.com
-project_name: llama-cli
+project_name: local-ai
 builds:
  - ldflags:
      - -w -s
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,20 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Launch Go",
+            "type": "go",
+            "request": "launch",
+            "mode": "debug",
+            "program": "${workspaceFolder}/main.go",
+            "args": [
+                "api"
+            ],
+            "env": {
+                "C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "DEBUG": "true"
+            }
+        }
+    ]
+}
--- a/10
+++ b/10
@@ -0,0 +1,10 @@
+ARG GO_VERSION=1.20
+ARG BUILD_TYPE=
+FROM golang:$GO_VERSION
+WORKDIR /build
+RUN apt-get update && apt-get install -y cmake libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5 
+COPY . .
+RUN ln -s /usr/include/opencv4/opencv2/ /usr/include/opencv2
+RUN make prepare-sources
+EXPOSE 8080
+ENTRYPOINT [ "/build/entrypoint.sh" ]
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@@ -0,0 +1,15 @@
+ARG GO_VERSION=1.20
+ARG DEBIAN_VERSION=11
+ARG BUILD_TYPE=
+
+FROM golang:$GO_VERSION as builder
+WORKDIR /build
+RUN apt-get update && apt-get install -y cmake libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5 
+RUN ln -s /usr/include/opencv4/opencv2/ /usr/include/opencv2
+COPY . .
+RUN make build
+
+FROM debian:$DEBIAN_VERSION
+COPY --from=builder /build/local-ai /usr/bin/local-ai
+EXPOSE 8080
+ENTRYPOINT [ "/usr/bin/local-ai" ]
--- a/46
+++ b/46
@@ -1,47 +1,5 @@
 VERSION 0.7

-go-deps:
-    ARG GO_VERSION=1.20
-    FROM golang:$GO_VERSION
-    WORKDIR /build
-    COPY go.mod ./
-    COPY go.sum ./
-    RUN go mod download
-    RUN apt-get update
-    SAVE ARTIFACT go.mod AS LOCAL go.mod
-    SAVE ARTIFACT go.sum AS LOCAL go.sum
-
-model-image:
-    ARG MODEL_IMAGE=quay.io/go-skynet/models:ggml2-alpaca-7b-v0.2
-    FROM $MODEL_IMAGE
-    SAVE ARTIFACT /models/model.bin
-
 build:
-    FROM +go-deps
-    WORKDIR /build
-    RUN git clone https://github.com/go-skynet/llama
-    RUN cd llama && make libllama.a
-    COPY . .
-    RUN C_INCLUDE_PATH=/build/llama LIBRARY_PATH=/build/llama go build -o llama-cli ./
-    SAVE ARTIFACT llama-cli AS LOCAL llama-cli
-
-image:
-    FROM +go-deps
-    ARG IMAGE=alpaca-cli
-    COPY +model-image/model.bin /model.bin
-    COPY +build/llama-cli /llama-cli
-    ENV MODEL_PATH=/model.bin
-    ENTRYPOINT [ "/llama-cli" ]
-    SAVE IMAGE --push $IMAGE
-
-lite-image:
-    FROM +go-deps
-    ARG IMAGE=alpaca-cli-nomodel
-    COPY +build/llama-cli /llama-cli
-    ENV MODEL_PATH=/model.bin
-    ENTRYPOINT [ "/llama-cli" ]
-    SAVE IMAGE --push $IMAGE-lite
-
-image-all:
-    BUILD --platform=linux/amd64 --platform=linux/arm64 +image
-    BUILD --platform=linux/amd64 --platform=linux/arm64 +lite-image
+    FROM DOCKERFILE -f Dockerfile .
+    SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai
--- a/230
+++ b/230
@@ -0,0 +1,230 @@
+GOCMD=go
+GOTEST=$(GOCMD) test
+GOVET=$(GOCMD) vet
+BINARY_NAME=local-ai
+
+GOLLAMA_VERSION?=7f9ae4246088f0c08ed322acbae21d69cd2eb547
+GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
+GPT4ALL_VERSION?=a07237e54fcdfdb351913587052ac061a2a7bdff
+GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827
+RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
+RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
+WHISPER_CPP_VERSION?=95b02d76b04d18e4ce37ed8353a1f0797f1717ea
+BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
+BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
+BUILD_TYPE?=
+CGO_LDFLAGS?=
+CUDA_LIBPATH?=/usr/local/cuda/lib64/
+STABLEDIFFUSION_VERSION?=c0748eca3642d58bcf9521108bcee46959c647dc
+
+GO_TAGS?=
+
+OPTIONAL_TARGETS?=
+
+GREEN  := $(shell tput -Txterm setaf 2)
+YELLOW := $(shell tput -Txterm setaf 3)
+WHITE  := $(shell tput -Txterm setaf 7)
+CYAN   := $(shell tput -Txterm setaf 6)
+RESET  := $(shell tput -Txterm sgr0)
+
+C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
+LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
+
+ifeq ($(BUILD_TYPE),openblas)
+	CGO_LDFLAGS+=-lopenblas
+endif
+
+ifeq ($(BUILD_TYPE),cublas)
+	CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
+endif
+
+
+ifeq ($(GO_TAGS),stablediffusion)
+	OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
+endif
+
+.PHONY: all test build vendor
+
+all: help
+
+## GPT4ALL
+gpt4all:
+	git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
+	cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
+	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
+	@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
+	@find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
+	@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
+	mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h
+
+## BERT embeddings
+go-bert:
+	git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert
+	cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
+	@find ./go-bert -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
+	@find ./go-bert -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
+	@find ./go-bert -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
+
+## stable diffusion
+go-stable-diffusion:
+	git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion go-stable-diffusion
+	cd go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
+
+go-stable-diffusion/libstablediffusion.a:
+	$(MAKE) -C go-stable-diffusion libstablediffusion.a
+
+## RWKV
+go-rwkv:
+	git clone --recurse-submodules $(RWKV_REPO) go-rwkv
+	cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
+	@find ./go-rwkv -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
+	@find ./go-rwkv -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
+	@find ./go-rwkv -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
+
+go-rwkv/librwkv.a: go-rwkv
+	cd go-rwkv && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a .. && cp ggml/src/libggml.a ..
+
+## bloomz
+bloomz:
+	git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
+	@find ./bloomz -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
+	@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
+	@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
+	@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
+	@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
+
+bloomz/libbloomz.a: bloomz
+	cd bloomz && make libbloomz.a
+
+go-bert/libgobert.a: go-bert
+	$(MAKE) -C go-bert libgobert.a
+
+gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
+	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
+
+## CEREBRAS GPT
+go-gpt2: 
+	git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
+	cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
+	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
+	@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
+
+go-gpt2/libgpt2.a: go-gpt2
+	$(MAKE) -C go-gpt2 libgpt2.a
+
+whisper.cpp:
+	git clone https://github.com/ggerganov/whisper.cpp.git
+	cd whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
+	@find ./whisper.cpp -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_whisper_/g' {} +
+	@find ./whisper.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_whisper_/g' {} +
+	@find ./whisper.cpp -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_whisper_/g' {} +
+
+whisper.cpp/libwhisper.a: whisper.cpp
+	cd whisper.cpp && make libwhisper.a
+
+go-llama:
+	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
+	cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
+
+go-llama/libbinding.a: go-llama 
+	$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
+
+replace:
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
+	$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
+	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
+	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
+	$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
+	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
+
+prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
+	$(GOCMD) mod download
+
+## GENERIC
+rebuild: ## Rebuilds the project
+	$(MAKE) -C go-llama clean
+	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
+	$(MAKE) -C go-gpt2 clean
+	$(MAKE) -C go-rwkv clean
+	$(MAKE) -C whisper.cpp clean
+	$(MAKE) -C go-stable-diffusion clean
+	$(MAKE) -C go-bert clean
+	$(MAKE) -C bloomz clean
+	$(MAKE) build
+
+prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a  ## Prepares for building
+
+clean: ## Remove build related file
+	rm -fr ./go-llama
+	rm -rf ./gpt4all
+	rm -rf ./go-stable-diffusion
+	rm -rf ./go-gpt2
+	rm -rf ./go-rwkv
+	rm -rf ./go-bert
+	rm -rf ./bloomz
+	rm -rf ./whisper.cpp
+	rm -rf $(BINARY_NAME)
+
+## Build:
+
+build: prepare ## Build the project
+	$(info ${GREEN}I local-ai build info:${RESET})
+	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
+	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -tags "$(GO_TAGS)" -x -o $(BINARY_NAME) ./
+
+generic-build: ## Build the project using generic
+	BUILD_TYPE="generic" $(MAKE) build
+
+## Run
+run: prepare ## run local-ai
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
+
+test-models/testmodel:
+	mkdir test-models
+	mkdir test-dir
+	wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
+	wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
+	wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
+	wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
+	wget https://huggingface.co/imxcstar/rwkv-4-raven-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096-16_Q4_2.bin -O test-models/rwkv
+	wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
+	cp tests/fixtures/* test-models
+
+test: prepare test-models/testmodel
+	cp tests/fixtures/* test-models
+	@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api
+
+## Help:
+help: ## Show this help.
+	@echo ''
+	@echo 'Usage:'
+	@echo '  ${YELLOW}make${RESET} ${GREEN}<target>${RESET}'
+	@echo ''
+	@echo 'Targets:'
+	@awk 'BEGIN {FS = ":.*?## "} { \
+		if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf "    ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \
+		else if (/^## .*$$/) {printf "  ${CYAN}%s${RESET}\n", substr($$1,4)} \
+		}' $(MAKEFILE_LIST)
--- a/README.md
+++ b/README.md
--- a/api.go
+++ b/api.go
@@ -1,78 +0,0 @@
-package main
-
-import (
-	"strconv"
-
-	llama "github.com/go-skynet/llama/go"
-	"github.com/gofiber/fiber/v2"
-)
-
-func api(l *llama.LLama, listenAddr string, threads int) error {
-	app := fiber.New()
-
-	/*
-		curl --location --request POST 'http://localhost:8080/predict' --header 'Content-Type: application/json' --data-raw '{
-		    "text": "What is an alpaca?",
-		    "topP": 0.8,
-		    "topK": 50,
-		    "temperature": 0.7,
-		    "tokens": 100
-		}'
-	*/
-
-	// Endpoint to generate the prediction
-	app.Post("/predict", func(c *fiber.Ctx) error {
-		// Get input data from the request body
-		input := new(struct {
-			Text string `json:"text"`
-		})
-		if err := c.BodyParser(input); err != nil {
-			return err
-		}
-
-		// Set the parameters for the language model prediction
-		topP, err := strconv.ParseFloat(c.Query("topP", "0.9"), 64) // Default value of topP is 0.9
-		if err != nil {
-			return err
-		}
-
-		topK, err := strconv.Atoi(c.Query("topK", "40")) // Default value of topK is 40
-		if err != nil {
-			return err
-		}
-
-		temperature, err := strconv.ParseFloat(c.Query("temperature", "0.5"), 64) // Default value of temperature is 0.5
-		if err != nil {
-			return err
-		}
-
-		tokens, err := strconv.Atoi(c.Query("tokens", "128")) // Default value of tokens is 128
-		if err != nil {
-			return err
-		}
-
-		// Generate the prediction using the language model
-		prediction, err := l.Predict(
-			input.Text,
-			llama.SetTemperature(temperature),
-			llama.SetTopP(topP),
-			llama.SetTopK(topK),
-			llama.SetTokens(tokens),
-			llama.SetThreads(threads),
-		)
-		if err != nil {
-			return err
-		}
-
-		// Return the prediction in the response body
-		return c.JSON(struct {
-			Prediction string `json:"prediction"`
-		}{
-			Prediction: prediction,
-		})
-	})
-
-	// Start the server
-	app.Listen(":8080")
-	return nil
-}
--- a/api/api.go
+++ b/api/api.go
@@ -0,0 +1,100 @@
+package api
+
+import (
+	"errors"
+
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/gofiber/fiber/v2/middleware/cors"
+	"github.com/gofiber/fiber/v2/middleware/logger"
+	"github.com/gofiber/fiber/v2/middleware/recover"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+func App(configFile string, loader *model.ModelLoader, uploadLimitMB, threads, ctxSize int, f16 bool, debug, disableMessage bool, imageDir string) *fiber.App {
+	zerolog.SetGlobalLevel(zerolog.InfoLevel)
+	if debug {
+		zerolog.SetGlobalLevel(zerolog.DebugLevel)
+	}
+
+	// Return errors as JSON responses
+	app := fiber.New(fiber.Config{
+		BodyLimit:             uploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
+		DisableStartupMessage: disableMessage,
+		// Override default error handler
+		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
+			// Status code defaults to 500
+			code := fiber.StatusInternalServerError
+
+			// Retrieve the custom status code if it's a *fiber.Error
+			var e *fiber.Error
+			if errors.As(err, &e) {
+				code = e.Code
+			}
+
+			// Send custom error page
+			return ctx.Status(code).JSON(
+				ErrorResponse{
+					Error: &APIError{Message: err.Error(), Code: code},
+				},
+			)
+		},
+	})
+
+	if debug {
+		app.Use(logger.New(logger.Config{
+			Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
+		}))
+	}
+
+	cm := make(ConfigMerger)
+	if err := cm.LoadConfigs(loader.ModelPath); err != nil {
+		log.Error().Msgf("error loading config files: %s", err.Error())
+	}
+
+	if configFile != "" {
+		if err := cm.LoadConfigFile(configFile); err != nil {
+			log.Error().Msgf("error loading config file: %s", err.Error())
+		}
+	}
+
+	if debug {
+		for k, v := range cm {
+			log.Debug().Msgf("Model: %s (config: %+v)", k, v)
+		}
+	}
+	// Default middleware config
+	app.Use(recover.New())
+	app.Use(cors.New())
+
+	// openAI compatible API endpoint
+	app.Post("/v1/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
+
+	app.Post("/v1/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
+
+	app.Post("/v1/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
+
+	app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
+	app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
+
+	// /v1/engines/{engine_id}/embeddings
+
+	app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
+
+	app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, debug, loader, threads, ctxSize, f16))
+
+	app.Post("/v1/images/generations", imageEndpoint(cm, debug, loader, imageDir))
+
+	if imageDir != "" {
+		app.Static("/generated-images", imageDir)
+	}
+
+	app.Get("/v1/models", listModels(loader, cm))
+	app.Get("/models", listModels(loader, cm))
+
+	return app
+}
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -0,0 +1,191 @@
+package api_test
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"runtime"
+
+	. "github.com/go-skynet/LocalAI/api"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	openaigo "github.com/otiai10/openaigo"
+	"github.com/sashabaranov/go-openai"
+)
+
+var _ = Describe("API test", func() {
+
+	var app *fiber.App
+	var modelLoader *model.ModelLoader
+	var client *openai.Client
+	var client2 *openaigo.Client
+	Context("API query", func() {
+		BeforeEach(func() {
+			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
+			app = App("", modelLoader, 15, 1, 512, false, true, true, "")
+			go app.Listen("127.0.0.1:9090")
+
+			defaultConfig := openai.DefaultConfig("")
+			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
+
+			client2 = openaigo.NewClient("")
+			client2.BaseURL = defaultConfig.BaseURL
+
+			// Wait for API to be ready
+			client = openai.NewClientWithConfig(defaultConfig)
+			Eventually(func() error {
+				_, err := client.ListModels(context.TODO())
+				return err
+			}, "2m").ShouldNot(HaveOccurred())
+		})
+		AfterEach(func() {
+			app.Shutdown()
+		})
+		It("returns the models list", func() {
+			models, err := client.ListModels(context.TODO())
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(models.Models)).To(Equal(10))
+		})
+		It("can generate completions", func() {
+			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
+		})
+
+		It("can generate chat completions ", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
+		})
+
+		It("can generate completions from model configs", func() {
+			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
+		})
+
+		It("can generate chat completions from model configs", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
+		})
+
+		It("returns errors", func() {
+			_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
+			Expect(err).To(HaveOccurred())
+			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 12 errors occurred:"))
+		})
+		It("transcribes audio", func() {
+			if runtime.GOOS != "linux" {
+				Skip("test supported only on linux")
+			}
+			resp, err := client.CreateTranscription(
+				context.Background(),
+				openai.AudioRequest{
+					Model:    openai.Whisper1,
+					FilePath: filepath.Join(os.Getenv("TEST_DIR"), "audio.wav"),
+				},
+			)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(resp.Text).To(ContainSubstring("This is the Micro Machine Man presenting"))
+		})
+
+		It("calculate embeddings", func() {
+			if runtime.GOOS != "linux" {
+				Skip("test supported only on linux")
+			}
+			resp, err := client.CreateEmbeddings(
+				context.Background(),
+				openai.EmbeddingRequest{
+					Model: openai.AdaEmbeddingV2,
+					Input: []string{"sun", "cat"},
+				},
+			)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
+			Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
+
+			sunEmbedding := resp.Data[0].Embedding
+			resp2, err := client.CreateEmbeddings(
+				context.Background(),
+				openai.EmbeddingRequest{
+					Model: openai.AdaEmbeddingV2,
+					Input: []string{"sun"},
+				},
+			)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
+		})
+
+		Context("backends", func() {
+			It("runs rwkv", func() {
+				if runtime.GOOS != "linux" {
+					Skip("test supported only on linux")
+				}
+				resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
+				Expect(err).ToNot(HaveOccurred())
+				Expect(len(resp.Choices) > 0).To(BeTrue())
+				Expect(resp.Choices[0].Text).To(Equal(" five."))
+			})
+		})
+	})
+
+	Context("Config file", func() {
+		BeforeEach(func() {
+			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
+			app = App(os.Getenv("CONFIG_FILE"), modelLoader, 5, 1, 512, false, true, true, "")
+			go app.Listen("127.0.0.1:9090")
+
+			defaultConfig := openai.DefaultConfig("")
+			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
+			client2 = openaigo.NewClient("")
+			client2.BaseURL = defaultConfig.BaseURL
+			// Wait for API to be ready
+			client = openai.NewClientWithConfig(defaultConfig)
+			Eventually(func() error {
+				_, err := client.ListModels(context.TODO())
+				return err
+			}, "2m").ShouldNot(HaveOccurred())
+		})
+		AfterEach(func() {
+			app.Shutdown()
+		})
+		It("can generate chat completions from config file", func() {
+
+			models, err := client.ListModels(context.TODO())
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(models.Models)).To(Equal(12))
+		})
+		It("can generate chat completions from config file", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
+		})
+		It("can generate chat completions from config file", func() {
+			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
+		})
+		It("can generate edit completions from config file", func() {
+			request := openaigo.EditCreateRequestBody{
+				Model:       "list2",
+				Instruction: "foo",
+				Input:       "bar",
+			}
+			resp, err := client2.CreateEdit(context.Background(), request)
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
+		})
+
+	})
+})
--- a/api/apt_suite_test.go
+++ b/api/apt_suite_test.go
@@ -0,0 +1,13 @@
+package api_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestLocalAI(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "LocalAI test suite")
+}
--- a/api/config.go
+++ b/api/config.go
@@ -0,0 +1,297 @@
+package api
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"strings"
+
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+	"gopkg.in/yaml.v3"
+)
+
+type Config struct {
+	OpenAIRequest               `yaml:"parameters"`
+	Name                        string            `yaml:"name"`
+	StopWords                   []string          `yaml:"stopwords"`
+	Cutstrings                  []string          `yaml:"cutstrings"`
+	TrimSpace                   []string          `yaml:"trimspace"`
+	ContextSize                 int               `yaml:"context_size"`
+	F16                         bool              `yaml:"f16"`
+	Threads                     int               `yaml:"threads"`
+	Debug                       bool              `yaml:"debug"`
+	Roles                       map[string]string `yaml:"roles"`
+	Embeddings                  bool              `yaml:"embeddings"`
+	Backend                     string            `yaml:"backend"`
+	TemplateConfig              TemplateConfig    `yaml:"template"`
+	MirostatETA                 float64           `yaml:"mirostat_eta"`
+	MirostatTAU                 float64           `yaml:"mirostat_tau"`
+	Mirostat                    int               `yaml:"mirostat"`
+	NGPULayers                  int               `yaml:"gpu_layers"`
+	ImageGenerationAssets       string            `yaml:"asset_dir"`
+	PromptStrings, InputStrings []string
+	InputToken                  [][]int
+}
+
+type TemplateConfig struct {
+	Completion string `yaml:"completion"`
+	Chat       string `yaml:"chat"`
+	Edit       string `yaml:"edit"`
+}
+
+type ConfigMerger map[string]Config
+
+func ReadConfigFile(file string) ([]*Config, error) {
+	c := &[]*Config{}
+	f, err := os.ReadFile(file)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read config file: %w", err)
+	}
+	if err := yaml.Unmarshal(f, c); err != nil {
+		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
+	}
+
+	return *c, nil
+}
+
+func ReadConfig(file string) (*Config, error) {
+	c := &Config{}
+	f, err := os.ReadFile(file)
+	if err != nil {
+		return nil, fmt.Errorf("cannot read config file: %w", err)
+	}
+	if err := yaml.Unmarshal(f, c); err != nil {
+		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
+	}
+
+	return c, nil
+}
+
+func (cm ConfigMerger) LoadConfigFile(file string) error {
+	c, err := ReadConfigFile(file)
+	if err != nil {
+		return fmt.Errorf("cannot load config file: %w", err)
+	}
+
+	for _, cc := range c {
+		cm[cc.Name] = *cc
+	}
+	return nil
+}
+
+func (cm ConfigMerger) LoadConfig(file string) error {
+	c, err := ReadConfig(file)
+	if err != nil {
+		return fmt.Errorf("cannot read config file: %w", err)
+	}
+
+	cm[c.Name] = *c
+	return nil
+}
+
+func (cm ConfigMerger) LoadConfigs(path string) error {
+	files, err := ioutil.ReadDir(path)
+	if err != nil {
+		return err
+	}
+
+	for _, file := range files {
+		// Skip templates, YAML and .keep files
+		if !strings.Contains(file.Name(), ".yaml") {
+			continue
+		}
+		c, err := ReadConfig(filepath.Join(path, file.Name()))
+		if err == nil {
+			cm[c.Name] = *c
+		}
+	}
+
+	return nil
+}
+
+func updateConfig(config *Config, input *OpenAIRequest) {
+	if input.Echo {
+		config.Echo = input.Echo
+	}
+	if input.TopK != 0 {
+		config.TopK = input.TopK
+	}
+	if input.TopP != 0 {
+		config.TopP = input.TopP
+	}
+
+	if input.Temperature != 0 {
+		config.Temperature = input.Temperature
+	}
+
+	if input.Maxtokens != 0 {
+		config.Maxtokens = input.Maxtokens
+	}
+
+	switch stop := input.Stop.(type) {
+	case string:
+		if stop != "" {
+			config.StopWords = append(config.StopWords, stop)
+		}
+	case []interface{}:
+		for _, pp := range stop {
+			if s, ok := pp.(string); ok {
+				config.StopWords = append(config.StopWords, s)
+			}
+		}
+	}
+
+	if input.RepeatPenalty != 0 {
+		config.RepeatPenalty = input.RepeatPenalty
+	}
+
+	if input.Keep != 0 {
+		config.Keep = input.Keep
+	}
+
+	if input.Batch != 0 {
+		config.Batch = input.Batch
+	}
+
+	if input.F16 {
+		config.F16 = input.F16
+	}
+
+	if input.IgnoreEOS {
+		config.IgnoreEOS = input.IgnoreEOS
+	}
+
+	if input.Seed != 0 {
+		config.Seed = input.Seed
+	}
+
+	if input.Mirostat != 0 {
+		config.Mirostat = input.Mirostat
+	}
+
+	if input.MirostatETA != 0 {
+		config.MirostatETA = input.MirostatETA
+	}
+
+	if input.MirostatTAU != 0 {
+		config.MirostatTAU = input.MirostatTAU
+	}
+
+	switch inputs := input.Input.(type) {
+	case string:
+		if inputs != "" {
+			config.InputStrings = append(config.InputStrings, inputs)
+		}
+	case []interface{}:
+		for _, pp := range inputs {
+			switch i := pp.(type) {
+			case string:
+				config.InputStrings = append(config.InputStrings, i)
+			case []interface{}:
+				tokens := []int{}
+				for _, ii := range i {
+					tokens = append(tokens, int(ii.(float64)))
+				}
+				config.InputToken = append(config.InputToken, tokens)
+			}
+		}
+	}
+
+	switch p := input.Prompt.(type) {
+	case string:
+		config.PromptStrings = append(config.PromptStrings, p)
+	case []interface{}:
+		for _, pp := range p {
+			if s, ok := pp.(string); ok {
+				config.PromptStrings = append(config.PromptStrings, s)
+			}
+		}
+	}
+}
+func readInput(c *fiber.Ctx, loader *model.ModelLoader, randomModel bool) (string, *OpenAIRequest, error) {
+	input := new(OpenAIRequest)
+	// Get input data from the request body
+	if err := c.BodyParser(input); err != nil {
+		return "", nil, err
+	}
+
+	modelFile := input.Model
+
+	if c.Params("model") != "" {
+		modelFile = c.Params("model")
+	}
+
+	received, _ := json.Marshal(input)
+
+	log.Debug().Msgf("Request received: %s", string(received))
+
+	// Set model from bearer token, if available
+	bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
+	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
+
+	// If no model was specified, take the first available
+	if modelFile == "" && !bearerExists && randomModel {
+		models, _ := loader.ListModels()
+		if len(models) > 0 {
+			modelFile = models[0]
+			log.Debug().Msgf("No model specified, using: %s", modelFile)
+		} else {
+			log.Debug().Msgf("No model specified, returning error")
+			return "", nil, fmt.Errorf("no model specified")
+		}
+	}
+
+	// If a model is found in bearer token takes precedence
+	if bearerExists {
+		log.Debug().Msgf("Using model from bearer token: %s", bearer)
+		modelFile = bearer
+	}
+	return modelFile, input, nil
+}
+
+func readConfig(modelFile string, input *OpenAIRequest, cm ConfigMerger, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
+	// Load a config file if present after the model name
+	modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
+	if _, err := os.Stat(modelConfig); err == nil {
+		if err := cm.LoadConfig(modelConfig); err != nil {
+			return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
+		}
+	}
+
+	var config *Config
+	cfg, exists := cm[modelFile]
+	if !exists {
+		config = &Config{
+			OpenAIRequest: defaultRequest(modelFile),
+			ContextSize:   ctx,
+			Threads:       threads,
+			F16:           f16,
+			Debug:         debug,
+		}
+	} else {
+		config = &cfg
+	}
+
+	// Set the parameters for the language model prediction
+	updateConfig(config, input)
+
+	// Don't allow 0 as setting
+	if config.Threads == 0 {
+		if threads != 0 {
+			config.Threads = threads
+		} else {
+			config.Threads = 4
+		}
+	}
+
+	// Enforce debug flag if passed from CLI
+	if debug {
+		config.Debug = true
+	}
+
+	return config, input, nil
+}
--- a/api/openai.go
+++ b/api/openai.go
@@ -0,0 +1,672 @@
+package api
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net/http"
+	"os"
+	"path"
+	"path/filepath"
+	"strconv"
+	"strings"
+
+	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	whisperutil "github.com/go-skynet/LocalAI/pkg/whisper"
+	llama "github.com/go-skynet/go-llama.cpp"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
+	"github.com/valyala/fasthttp"
+)
+
+// APIError provides error information returned by the OpenAI API.
+type APIError struct {
+	Code    any     `json:"code,omitempty"`
+	Message string  `json:"message"`
+	Param   *string `json:"param,omitempty"`
+	Type    string  `json:"type"`
+}
+
+type ErrorResponse struct {
+	Error *APIError `json:"error,omitempty"`
+}
+
+type OpenAIUsage struct {
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
+}
+
+type Item struct {
+	Embedding []float32 `json:"embedding"`
+	Index     int       `json:"index"`
+	Object    string    `json:"object,omitempty"`
+
+	// Images
+	URL     string `json:"url,omitempty"`
+	B64JSON string `json:"b64_json,omitempty"`
+}
+
+type OpenAIResponse struct {
+	Created int      `json:"created,omitempty"`
+	Object  string   `json:"object,omitempty"`
+	ID      string   `json:"id,omitempty"`
+	Model   string   `json:"model,omitempty"`
+	Choices []Choice `json:"choices,omitempty"`
+	Data    []Item   `json:"data,omitempty"`
+
+	Usage OpenAIUsage `json:"usage"`
+}
+
+type Choice struct {
+	Index        int      `json:"index,omitempty"`
+	FinishReason string   `json:"finish_reason,omitempty"`
+	Message      *Message `json:"message,omitempty"`
+	Delta        *Message `json:"delta,omitempty"`
+	Text         string   `json:"text,omitempty"`
+}
+
+type Message struct {
+	Role    string `json:"role,omitempty" yaml:"role"`
+	Content string `json:"content,omitempty" yaml:"content"`
+}
+
+type OpenAIModel struct {
+	ID     string `json:"id"`
+	Object string `json:"object"`
+}
+
+type OpenAIRequest struct {
+	Model string `json:"model" yaml:"model"`
+
+	// whisper
+	File     string `json:"file" validate:"required"`
+	Language string `json:"language"`
+	//whisper/image
+	ResponseFormat string `json:"response_format"`
+	// image
+	Size string `json:"size"`
+	// Prompt is read only by completion/image API calls
+	Prompt interface{} `json:"prompt" yaml:"prompt"`
+
+	// Edit endpoint
+	Instruction string      `json:"instruction" yaml:"instruction"`
+	Input       interface{} `json:"input" yaml:"input"`
+
+	Stop interface{} `json:"stop" yaml:"stop"`
+
+	// Messages is read only by chat/completion API calls
+	Messages []Message `json:"messages" yaml:"messages"`
+
+	Stream bool `json:"stream"`
+	Echo   bool `json:"echo"`
+	// Common options between all the API calls
+	TopP        float64 `json:"top_p" yaml:"top_p"`
+	TopK        int     `json:"top_k" yaml:"top_k"`
+	Temperature float64 `json:"temperature" yaml:"temperature"`
+	Maxtokens   int     `json:"max_tokens" yaml:"max_tokens"`
+
+	N int `json:"n"`
+
+	// Custom parameters - not present in the OpenAI API
+	Batch         int     `json:"batch" yaml:"batch"`
+	F16           bool    `json:"f16" yaml:"f16"`
+	IgnoreEOS     bool    `json:"ignore_eos" yaml:"ignore_eos"`
+	RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
+	Keep          int     `json:"n_keep" yaml:"n_keep"`
+
+	MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"`
+	MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"`
+	Mirostat    int     `json:"mirostat" yaml:"mirostat"`
+
+	Seed int `json:"seed" yaml:"seed"`
+
+	// Image (not supported by OpenAI)
+	Mode int `json:"mode"`
+	Step int `json:"step"`
+}
+
+func defaultRequest(modelFile string) OpenAIRequest {
+	return OpenAIRequest{
+		TopP:        0.7,
+		TopK:        80,
+		Maxtokens:   512,
+		Temperature: 0.9,
+		Model:       modelFile,
+	}
+}
+
+// https://platform.openai.com/docs/api-reference/completions
+func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+
+		model, input, err := readInput(c, loader, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+
+		templateFile := config.Model
+
+		if config.TemplateConfig.Completion != "" {
+			templateFile = config.TemplateConfig.Completion
+		}
+
+		var result []Choice
+		for _, i := range config.PromptStrings {
+			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+			templatedInput, err := loader.TemplatePrefix(templateFile, struct {
+				Input string
+			}{Input: i})
+			if err == nil {
+				i = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", i)
+			}
+
+			r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
+				*c = append(*c, Choice{Text: s})
+			}, nil)
+			if err != nil {
+				return err
+			}
+
+			result = append(result, r...)
+		}
+
+		resp := &OpenAIResponse{
+			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Choices: result,
+			Object:  "text_completion",
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
+
+// https://platform.openai.com/docs/api-reference/embeddings
+func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		model, input, err := readInput(c, loader, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+		items := []Item{}
+
+		for i, s := range config.InputToken {
+			// get the model function to call for the result
+			embedFn, err := ModelEmbedding("", s, loader, *config)
+			if err != nil {
+				return err
+			}
+
+			embeddings, err := embedFn()
+			if err != nil {
+				return err
+			}
+			items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
+		}
+
+		for i, s := range config.InputStrings {
+			// get the model function to call for the result
+			embedFn, err := ModelEmbedding(s, []int{}, loader, *config)
+			if err != nil {
+				return err
+			}
+
+			embeddings, err := embedFn()
+			if err != nil {
+				return err
+			}
+			items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
+		}
+
+		resp := &OpenAIResponse{
+			Model:  input.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Data:   items,
+			Object: "list",
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
+
+func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+
+	process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
+		ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
+			resp := OpenAIResponse{
+				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
+				Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
+				Object:  "chat.completion.chunk",
+			}
+			log.Debug().Msgf("Sending goroutine: %s", s)
+
+			responses <- resp
+			return true
+		})
+		close(responses)
+	}
+	return func(c *fiber.Ctx) error {
+		model, input, err := readInput(c, loader, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+
+		var predInput string
+
+		mess := []string{}
+		for _, i := range input.Messages {
+			var content string
+			r := config.Roles[i.Role]
+			if r != "" {
+				content = fmt.Sprint(r, " ", i.Content)
+			} else {
+				content = i.Content
+			}
+
+			mess = append(mess, content)
+		}
+
+		predInput = strings.Join(mess, "\n")
+
+		if input.Stream {
+			log.Debug().Msgf("Stream request received")
+			c.Context().SetContentType("text/event-stream")
+			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
+			//	c.Set("Content-Type", "text/event-stream")
+			c.Set("Cache-Control", "no-cache")
+			c.Set("Connection", "keep-alive")
+			c.Set("Transfer-Encoding", "chunked")
+		}
+
+		templateFile := config.Model
+
+		if config.TemplateConfig.Chat != "" {
+			templateFile = config.TemplateConfig.Chat
+		}
+
+		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+		templatedInput, err := loader.TemplatePrefix(templateFile, struct {
+			Input string
+		}{Input: predInput})
+		if err == nil {
+			predInput = templatedInput
+			log.Debug().Msgf("Template found, input modified to: %s", predInput)
+		}
+
+		if input.Stream {
+			responses := make(chan OpenAIResponse)
+
+			go process(predInput, input, config, loader, responses)
+
+			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
+
+				for ev := range responses {
+					var buf bytes.Buffer
+					enc := json.NewEncoder(&buf)
+					enc.Encode(ev)
+
+					fmt.Fprintf(w, "event: data\n\n")
+					fmt.Fprintf(w, "data: %v\n\n", buf.String())
+					log.Debug().Msgf("Sending chunk: %s", buf.String())
+					w.Flush()
+				}
+
+				w.WriteString("event: data\n\n")
+				resp := &OpenAIResponse{
+					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+					Choices: []Choice{{FinishReason: "stop"}},
+				}
+				respData, _ := json.Marshal(resp)
+
+				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
+				w.Flush()
+			}))
+			return nil
+		}
+
+		result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
+			*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
+		}, nil)
+		if err != nil {
+			return err
+		}
+
+		resp := &OpenAIResponse{
+			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Choices: result,
+			Object:  "chat.completion",
+		}
+		respData, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", respData)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
+
+func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		model, input, err := readInput(c, loader, true)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+
+		templateFile := config.Model
+
+		if config.TemplateConfig.Edit != "" {
+			templateFile = config.TemplateConfig.Edit
+		}
+
+		var result []Choice
+		for _, i := range config.InputStrings {
+			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
+			templatedInput, err := loader.TemplatePrefix(templateFile, struct {
+				Input       string
+				Instruction string
+			}{Input: i})
+			if err == nil {
+				i = templatedInput
+				log.Debug().Msgf("Template found, input modified to: %s", i)
+			}
+
+			r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
+				*c = append(*c, Choice{Text: s})
+			}, nil)
+			if err != nil {
+				return err
+			}
+
+			result = append(result, r...)
+		}
+
+		resp := &OpenAIResponse{
+			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
+			Choices: result,
+			Object:  "edit",
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
+
+// https://platform.openai.com/docs/api-reference/images/create
+
+/*
+*
+
+	curl http://localhost:8080/v1/images/generations \
+	  -H "Content-Type: application/json" \
+	  -d '{
+	    "prompt": "A cute baby sea otter",
+	    "n": 1,
+	    "size": "512x512"
+	  }'
+
+*
+*/
+func imageEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, imageDir string) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		m, input, err := readInput(c, loader, false)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		if m == "" {
+			m = model.StableDiffusionBackend
+		}
+		log.Debug().Msgf("Loading model: %+v", m)
+
+		config, input, err := readConfig(m, input, cm, loader, debug, 0, 0, false)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		log.Debug().Msgf("Parameter Config: %+v", config)
+
+		// XXX: Only stablediffusion is supported for now
+		if config.Backend == "" {
+			config.Backend = model.StableDiffusionBackend
+		}
+
+		sizeParts := strings.Split(input.Size, "x")
+		if len(sizeParts) != 2 {
+			return fmt.Errorf("Invalid value for 'size'")
+		}
+		width, err := strconv.Atoi(sizeParts[0])
+		if err != nil {
+			return fmt.Errorf("Invalid value for 'size'")
+		}
+		height, err := strconv.Atoi(sizeParts[1])
+		if err != nil {
+			return fmt.Errorf("Invalid value for 'size'")
+		}
+
+		b64JSON := false
+		if input.ResponseFormat == "b64_json" {
+			b64JSON = true
+		}
+
+		var result []Item
+		for _, i := range config.PromptStrings {
+			n := input.N
+			if input.N == 0 {
+				n = 1
+			}
+			for j := 0; j < n; j++ {
+				prompts := strings.Split(i, "|")
+				positive_prompt := prompts[0]
+				negative_prompt := ""
+				if len(prompts) > 1 {
+					negative_prompt = prompts[1]
+				}
+
+				mode := 0
+				step := 15
+
+				if input.Mode != 0 {
+					mode = input.Mode
+				}
+
+				if input.Step != 0 {
+					step = input.Step
+				}
+
+				tempDir := ""
+				if !b64JSON {
+					tempDir = imageDir
+				}
+				// Create a temporary file
+				outputFile, err := ioutil.TempFile(tempDir, "b64")
+				if err != nil {
+					return err
+				}
+				outputFile.Close()
+				output := outputFile.Name() + ".png"
+				// Rename the temporary file
+				err = os.Rename(outputFile.Name(), output)
+				if err != nil {
+					return err
+				}
+
+				baseURL := c.BaseURL()
+
+				fn, err := ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, loader, *config)
+				if err != nil {
+					return err
+				}
+				if err := fn(); err != nil {
+					return err
+				}
+
+				item := &Item{}
+
+				if b64JSON {
+					defer os.RemoveAll(output)
+					data, err := os.ReadFile(output)
+					if err != nil {
+						return err
+					}
+					item.B64JSON = base64.StdEncoding.EncodeToString(data)
+				} else {
+					base := filepath.Base(output)
+					item.URL = baseURL + "/generated-images/" + base
+				}
+
+				result = append(result, *item)
+			}
+		}
+
+		resp := &OpenAIResponse{
+			Data: result,
+		}
+
+		jsonResult, _ := json.Marshal(resp)
+		log.Debug().Msgf("Response: %s", jsonResult)
+
+		// Return the prediction in the response body
+		return c.JSON(resp)
+	}
+}
+
+// https://platform.openai.com/docs/api-reference/audio/create
+func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		m, input, err := readInput(c, loader, false)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+
+		config, input, err := readConfig(m, input, cm, loader, debug, threads, ctx, f16)
+		if err != nil {
+			return fmt.Errorf("failed reading parameters from request:%w", err)
+		}
+		// retrieve the file data from the request
+		file, err := c.FormFile("file")
+		if err != nil {
+			return err
+		}
+		f, err := file.Open()
+		if err != nil {
+			return err
+		}
+		defer f.Close()
+
+		dir, err := os.MkdirTemp("", "whisper")
+
+		if err != nil {
+			return err
+		}
+		defer os.RemoveAll(dir)
+
+		dst := filepath.Join(dir, path.Base(file.Filename))
+		dstFile, err := os.Create(dst)
+		if err != nil {
+			return err
+		}
+
+		if _, err := io.Copy(dstFile, f); err != nil {
+			log.Debug().Msgf("Audio file copying error %+v - %+v - err %+v", file.Filename, dst, err)
+			return err
+		}
+
+		log.Debug().Msgf("Audio file copied to: %+v", dst)
+
+		whisperModel, err := loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads))
+		if err != nil {
+			return err
+		}
+
+		if whisperModel == nil {
+			return fmt.Errorf("could not load whisper model")
+		}
+
+		w, ok := whisperModel.(whisper.Model)
+		if !ok {
+			return fmt.Errorf("loader returned non-whisper object")
+		}
+
+		tr, err := whisperutil.Transcript(w, dst, input.Language, uint(config.Threads))
+		if err != nil {
+			return err
+		}
+
+		log.Debug().Msgf("Trascribed: %+v", tr)
+		// TODO: handle different outputs here
+		return c.Status(http.StatusOK).JSON(fiber.Map{"text": tr})
+	}
+}
+
+func listModels(loader *model.ModelLoader, cm ConfigMerger) func(ctx *fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		models, err := loader.ListModels()
+		if err != nil {
+			return err
+		}
+		var mm map[string]interface{} = map[string]interface{}{}
+
+		dataModels := []OpenAIModel{}
+		for _, m := range models {
+			mm[m] = nil
+			dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
+		}
+
+		for k := range cm {
+			if _, exists := mm[k]; !exists {
+				dataModels = append(dataModels, OpenAIModel{ID: k, Object: "model"})
+			}
+		}
+
+		return c.JSON(struct {
+			Object string        `json:"object"`
+			Data   []OpenAIModel `json:"data"`
+		}{
+			Object: "list",
+			Data:   dataModels,
+		})
+	}
+}
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -0,0 +1,561 @@
+package api
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+	"sync"
+
+	"github.com/donomii/go-rwkv.cpp"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/stablediffusion"
+	"github.com/go-skynet/bloomz.cpp"
+	bert "github.com/go-skynet/go-bert.cpp"
+	gpt2 "github.com/go-skynet/go-gpt2.cpp"
+	llama "github.com/go-skynet/go-llama.cpp"
+	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
+)
+
+// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
+var mutexMap sync.Mutex
+var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
+
+func defaultLLamaOpts(c Config) []llama.ModelOption {
+	llamaOpts := []llama.ModelOption{}
+	if c.ContextSize != 0 {
+		llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize))
+	}
+	if c.F16 {
+		llamaOpts = append(llamaOpts, llama.EnableF16Memory)
+	}
+	if c.Embeddings {
+		llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
+	}
+
+	if c.NGPULayers != 0 {
+		llamaOpts = append(llamaOpts, llama.SetGPULayers(c.NGPULayers))
+	}
+
+	return llamaOpts
+}
+
+func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c Config) (func() error, error) {
+	if c.Backend != model.StableDiffusionBackend {
+		return nil, fmt.Errorf("endpoint only working with stablediffusion models")
+	}
+	inferenceModel, err := loader.BackendLoader(c.Backend, c.ImageGenerationAssets, []llama.ModelOption{}, uint32(c.Threads))
+	if err != nil {
+		return nil, err
+	}
+
+	var fn func() error
+	switch model := inferenceModel.(type) {
+	case *stablediffusion.StableDiffusion:
+		fn = func() error {
+			return model.GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst)
+		}
+
+	default:
+		fn = func() error {
+			return fmt.Errorf("creation of images not supported by the backend")
+		}
+	}
+
+	return func() error {
+		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
+		mutexMap.Lock()
+		l, ok := mutexes[c.Backend]
+		if !ok {
+			m := &sync.Mutex{}
+			mutexes[c.Backend] = m
+			l = m
+		}
+		mutexMap.Unlock()
+		l.Lock()
+		defer l.Unlock()
+
+		return fn()
+	}, nil
+}
+
+func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
+	if !c.Embeddings {
+		return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
+	}
+
+	modelFile := c.Model
+
+	llamaOpts := defaultLLamaOpts(c)
+
+	var inferenceModel interface{}
+	var err error
+	if c.Backend == "" {
+		inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
+	} else {
+		inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
+	}
+	if err != nil {
+		return nil, err
+	}
+
+	var fn func() ([]float32, error)
+	switch model := inferenceModel.(type) {
+	case *llama.LLama:
+		fn = func() ([]float32, error) {
+			predictOptions := buildLLamaPredictOptions(c)
+			if len(tokens) > 0 {
+				return model.TokenEmbeddings(tokens, predictOptions...)
+			}
+			return model.Embeddings(s, predictOptions...)
+		}
+	// bert embeddings
+	case *bert.Bert:
+		fn = func() ([]float32, error) {
+			if len(tokens) > 0 {
+				return model.TokenEmbeddings(tokens, bert.SetThreads(c.Threads))
+			}
+			return model.Embeddings(s, bert.SetThreads(c.Threads))
+		}
+	default:
+		fn = func() ([]float32, error) {
+			return nil, fmt.Errorf("embeddings not supported by the backend")
+		}
+	}
+
+	return func() ([]float32, error) {
+		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
+		mutexMap.Lock()
+		l, ok := mutexes[modelFile]
+		if !ok {
+			m := &sync.Mutex{}
+			mutexes[modelFile] = m
+			l = m
+		}
+		mutexMap.Unlock()
+		l.Lock()
+		defer l.Unlock()
+
+		embeds, err := fn()
+		if err != nil {
+			return embeds, err
+		}
+		// Remove trailing 0s
+		for i := len(embeds) - 1; i >= 0; i-- {
+			if embeds[i] == 0.0 {
+				embeds = embeds[:i]
+			} else {
+				break
+			}
+		}
+		return embeds, nil
+	}, nil
+}
+
+func buildLLamaPredictOptions(c Config) []llama.PredictOption {
+	// Generate the prediction using the language model
+	predictOptions := []llama.PredictOption{
+		llama.SetTemperature(c.Temperature),
+		llama.SetTopP(c.TopP),
+		llama.SetTopK(c.TopK),
+		llama.SetTokens(c.Maxtokens),
+		llama.SetThreads(c.Threads),
+	}
+
+	if c.Mirostat != 0 {
+		predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
+	}
+
+	if c.MirostatETA != 0 {
+		predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA))
+	}
+
+	if c.MirostatTAU != 0 {
+		predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU))
+	}
+
+	if c.Debug {
+		predictOptions = append(predictOptions, llama.Debug)
+	}
+
+	predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
+
+	if c.RepeatPenalty != 0 {
+		predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
+	}
+
+	if c.Keep != 0 {
+		predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
+	}
+
+	if c.Batch != 0 {
+		predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
+	}
+
+	if c.F16 {
+		predictOptions = append(predictOptions, llama.EnableF16KV)
+	}
+
+	if c.IgnoreEOS {
+		predictOptions = append(predictOptions, llama.IgnoreEOS)
+	}
+
+	if c.Seed != 0 {
+		predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
+	}
+
+	return predictOptions
+}
+
+func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
+	supportStreams := false
+	modelFile := c.Model
+
+	llamaOpts := defaultLLamaOpts(c)
+
+	var inferenceModel interface{}
+	var err error
+	if c.Backend == "" {
+		inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
+	} else {
+		inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
+	}
+	if err != nil {
+		return nil, err
+	}
+
+	var fn func() (string, error)
+
+	switch model := inferenceModel.(type) {
+	case *rwkv.RwkvState:
+		supportStreams = true
+
+		fn = func() (string, error) {
+			stopWord := "\n"
+			if len(c.StopWords) > 0 {
+				stopWord = c.StopWords[0]
+			}
+
+			if err := model.ProcessInput(s); err != nil {
+				return "", err
+			}
+
+			response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
+
+			return response, nil
+		}
+	case *gpt2.GPTNeoX:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *gpt2.Replit:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *gpt2.Starcoder:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *gpt2.RedPajama:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *bloomz.Bloomz:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []bloomz.PredictOption{
+				bloomz.SetTemperature(c.Temperature),
+				bloomz.SetTopP(c.TopP),
+				bloomz.SetTopK(c.TopK),
+				bloomz.SetTokens(c.Maxtokens),
+				bloomz.SetThreads(c.Threads),
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, bloomz.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *gpt2.StableLM:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *gpt2.Dolly:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *gpt2.GPT2:
+		fn = func() (string, error) {
+			// Generate the prediction using the language model
+			predictOptions := []gpt2.PredictOption{
+				gpt2.SetTemperature(c.Temperature),
+				gpt2.SetTopP(c.TopP),
+				gpt2.SetTopK(c.TopK),
+				gpt2.SetTokens(c.Maxtokens),
+				gpt2.SetThreads(c.Threads),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
+			}
+
+			if c.Seed != 0 {
+				predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
+			}
+
+			return model.Predict(
+				s,
+				predictOptions...,
+			)
+		}
+	case *gpt4all.Model:
+		supportStreams = true
+
+		fn = func() (string, error) {
+			if tokenCallback != nil {
+				model.SetTokenCallback(tokenCallback)
+			}
+
+			// Generate the prediction using the language model
+			predictOptions := []gpt4all.PredictOption{
+				gpt4all.SetTemperature(c.Temperature),
+				gpt4all.SetTopP(c.TopP),
+				gpt4all.SetTopK(c.TopK),
+				gpt4all.SetTokens(c.Maxtokens),
+			}
+
+			if c.Batch != 0 {
+				predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch))
+			}
+
+			str, er := model.Predict(
+				s,
+				predictOptions...,
+			)
+			// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
+			// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
+			// after a stream event has occurred
+			model.SetTokenCallback(nil)
+			return str, er
+		}
+	case *llama.LLama:
+		supportStreams = true
+		fn = func() (string, error) {
+
+			if tokenCallback != nil {
+				model.SetTokenCallback(tokenCallback)
+			}
+
+			predictOptions := buildLLamaPredictOptions(c)
+
+			str, er := model.Predict(
+				s,
+				predictOptions...,
+			)
+			// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
+			// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
+			// after a stream event has occurred
+			model.SetTokenCallback(nil)
+			return str, er
+		}
+	}
+
+	return func() (string, error) {
+		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
+		mutexMap.Lock()
+		l, ok := mutexes[modelFile]
+		if !ok {
+			m := &sync.Mutex{}
+			mutexes[modelFile] = m
+			l = m
+		}
+		mutexMap.Unlock()
+		l.Lock()
+		defer l.Unlock()
+
+		res, err := fn()
+		if tokenCallback != nil && !supportStreams {
+			tokenCallback(res)
+		}
+		return res, err
+	}, nil
+}
+
+func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) {
+	result := []Choice{}
+
+	n := input.N
+
+	if input.N == 0 {
+		n = 1
+	}
+
+	// get the model function to call for the result
+	predFunc, err := ModelInference(predInput, loader, *config, tokenCallback)
+	if err != nil {
+		return result, err
+	}
+
+	for i := 0; i < n; i++ {
+		prediction, err := predFunc()
+		if err != nil {
+			return result, err
+		}
+
+		prediction = Finetune(*config, predInput, prediction)
+		cb(prediction, &result)
+
+		//result = append(result, Choice{Text: prediction})
+
+	}
+	return result, err
+}
+
+var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
+var mu sync.Mutex = sync.Mutex{}
+
+func Finetune(config Config, input, prediction string) string {
+	if config.Echo {
+		prediction = input + prediction
+	}
+
+	for _, c := range config.Cutstrings {
+		mu.Lock()
+		reg, ok := cutstrings[c]
+		if !ok {
+			cutstrings[c] = regexp.MustCompile(c)
+			reg = cutstrings[c]
+		}
+		mu.Unlock()
+		prediction = reg.ReplaceAllString(prediction, "")
+	}
+
+	for _, c := range config.TrimSpace {
+		prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
+	}
+	return prediction
+
+}
--- a/client/client.go
+++ b/client/client.go
@@ -1,75 +0,0 @@
-package client
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"net/http"
-)
-
-type Prediction struct {
-	Prediction string `json:"prediction"`
-}
-
-type Client struct {
-	baseURL  string
-	client   *http.Client
-	endpoint string
-}
-
-func NewClient(baseURL string) *Client {
-	return &Client{
-		baseURL:  baseURL,
-		client:   &http.Client{},
-		endpoint: "/predict",
-	}
-}
-
-type InputData struct {
-	Text        string  `json:"text"`
-	TopP        float64 `json:"topP,omitempty"`
-	TopK        int     `json:"topK,omitempty"`
-	Temperature float64 `json:"temperature,omitempty"`
-	Tokens      int     `json:"tokens,omitempty"`
-}
-
-func (c *Client) Predict(text string, opts ...InputOption) (string, error) {
-	input := NewInputData(opts...)
-	input.Text = text
-
-	// encode input data to JSON format
-	inputBytes, err := json.Marshal(input)
-	if err != nil {
-		return "", err
-	}
-
-	// create HTTP request
-	url := c.baseURL + c.endpoint
-	req, err := http.NewRequest("POST", url, bytes.NewBuffer(inputBytes))
-	if err != nil {
-		return "", err
-	}
-
-	// set request headers
-	req.Header.Set("Content-Type", "application/json")
-
-	// send request and get response
-	resp, err := c.client.Do(req)
-	if err != nil {
-		return "", err
-	}
-	defer resp.Body.Close()
-
-	if resp.StatusCode != http.StatusOK {
-		return "", fmt.Errorf("request failed with status %d", resp.StatusCode)
-	}
-
-	// decode response body to Prediction struct
-	var prediction Prediction
-	err = json.NewDecoder(resp.Body).Decode(&prediction)
-	if err != nil {
-		return "", err
-	}
-
-	return prediction.Prediction, nil
-}
--- a/client/options.go
+++ b/client/options.go
@@ -1,51 +0,0 @@
-package client
-
-import "net/http"
-
-type ClientOption func(c *Client)
-
-func WithHTTPClient(httpClient *http.Client) ClientOption {
-	return func(c *Client) {
-		c.client = httpClient
-	}
-}
-
-func WithEndpoint(endpoint string) ClientOption {
-	return func(c *Client) {
-		c.endpoint = endpoint
-	}
-}
-
-type InputOption func(d *InputData)
-
-func NewInputData(opts ...InputOption) *InputData {
-	data := &InputData{}
-	for _, opt := range opts {
-		opt(data)
-	}
-	return data
-}
-
-func WithTopP(topP float64) InputOption {
-	return func(d *InputData) {
-		d.TopP = topP
-	}
-}
-
-func WithTopK(topK int) InputOption {
-	return func(d *InputData) {
-		d.TopK = topK
-	}
-}
-
-func WithTemperature(temperature float64) InputOption {
-	return func(d *InputData) {
-		d.Temperature = temperature
-	}
-}
-
-func WithTokens(tokens int) InputOption {
-	return func(d *InputData) {
-		d.Tokens = tokens
-	}
-}
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -0,0 +1,15 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: .
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    env_file:
+      - .env
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+cd /build
+
+make build
+
+./local-ai "$@"
--- a/examples/README.md
+++ b/examples/README.md
@@ -0,0 +1,94 @@
+# Examples
+
+Here is a list of projects that can easily be integrated with the LocalAI backend. 
+
+### Projects
+
+
+### Chatbot-UI
+
+_by [@mkellerman](https://github.com/mkellerman)_
+
+![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
+
+This integration shows how to use LocalAI with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/)
+
+### Discord bot
+
+_by [@mudler](https://github.com/mudler)_
+
+Run a discord bot which lets you talk directly with a model
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/), or for a live demo you can talk with our bot in #random-bot in our discord server.
+
+### Langchain
+
+_by [@dave-gray101](https://github.com/dave-gray101)_
+
+A ready to use example to show e2e how to integrate LocalAI with langchain
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/)
+
+### Langchain Python
+
+_by [@mudler](https://github.com/mudler)_
+
+A ready to use example to show e2e how to integrate LocalAI with langchain
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/)
+
+### LocalAI WebUI
+
+_by [@dhruvgera](https://github.com/dhruvgera)_
+
+![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
+
+A light, community-maintained web interface for LocalAI
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/localai-webui/)
+
+### How to run rwkv models
+
+_by [@mudler](https://github.com/mudler)_
+
+A full example on how to run RWKV models with LocalAI
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/)
+
+### Slack bot
+
+_by [@mudler](https://github.com/mudler)_
+
+Run a slack bot which lets you talk directly with a model
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/)
+
+### Question answering on documents with llama-index
+
+_by [@mudler](https://github.com/mudler)_
+
+Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)
+
+### Question answering on documents with langchain and chroma
+
+_by [@mudler](https://github.com/mudler)_
+
+Shows how to integrate with `Langchain` and `Chroma` to enable question answering on a set of documents.
+
+[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-chroma/)
+
+### Template for Runpod.io
+
+_by [@fHachenberg](https://github.com/fHachenberg)_
+
+Allows to run any LocalAI-compatible model as a backend on the servers of https://runpod.io
+
+[Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra)
+
+## Want to contribute?
+
+Create an issue, and put `Example: <description>` in the title! We will post your examples here.
--- a/examples/chatbot-ui/README.md
+++ b/examples/chatbot-ui/README.md
@@ -0,0 +1,48 @@
+# chatbot-ui
+
+Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
+
+![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
+
+## Setup
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/chatbot-ui
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# start with docker-compose
+docker-compose up -d --pull always
+# or you can build the images with:
+# docker-compose up -d --build
+```
+
+## Pointing chatbot-ui to a separately managed LocalAI service
+
+If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
+```
+version: '3.6'
+
+services:
+  chatgpt:
+    image: ghcr.io/mckaywrigley/chatbot-ui:main
+    ports:
+      - 3000:3000
+    environment:
+      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+      - 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
+```
+
+Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`.
+
+## Accessing chatbot-ui
+
+Open http://localhost:3000 for the Web UI.
+
--- a/examples/chatbot-ui/docker-compose.yaml
+++ b/examples/chatbot-ui/docker-compose.yaml
@@ -0,0 +1,24 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
+
+  chatgpt:
+    image: ghcr.io/mckaywrigley/chatbot-ui:main
+    ports:
+      - 3000:3000
+    environment:
+      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+      - 'OPENAI_API_HOST=http://api:8080'
--- a/examples/chatbot-ui/models/completion.tmpl
+++ b/examples/chatbot-ui/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
--- a/examples/chatbot-ui/models/gpt-3.5-turbo.yaml
+++ b/examples/chatbot-ui/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,17 @@
+name: gpt-3.5-turbo
+parameters:
+  model: ggml-gpt4all-j
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+threads: 14
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+template:
+  completion: completion
+  chat: gpt4all
--- a/examples/chatbot-ui/models/gpt4all.tmpl
+++ b/examples/chatbot-ui/models/gpt4all.tmpl
@@ -0,0 +1,4 @@
+The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+### Prompt:
+{{.Input}}
+### Response:
--- a/examples/discord-bot/.env.example
+++ b/examples/discord-bot/.env.example
@@ -0,0 +1,6 @@
+OPENAI_API_KEY=x
+DISCORD_BOT_TOKEN=x
+DISCORD_CLIENT_ID=x
+OPENAI_API_BASE=http://api:8080
+ALLOWED_SERVER_IDS=x
+SERVER_TO_MODERATION_CHANNEL=1:1
--- a/examples/discord-bot/README.md
+++ b/examples/discord-bot/README.md
@@ -0,0 +1,76 @@
+# discord-bot
+
+![Screenshot from 2023-05-01 07-58-19](https://user-images.githubusercontent.com/2420543/235413924-0cb2e75b-f2d6-4119-8610-44386e44afb8.png)
+
+## Setup
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/discord-bot
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# Set the discord bot options (see: https://github.com/go-skynet/gpt-discord-bot#setup)
+cp -rfv .env.example .env
+vim .env
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+Note: see setup options here: https://github.com/go-skynet/gpt-discord-bot#setup
+
+Open up the URL in the console and give permission to the bot in your server. Start a thread with `/chat ..`
+
+## Kubernetes
+
+- install the local-ai chart first
+- change OPENAI_API_BASE to point to the API address and apply the discord-bot manifest:
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: discord-bot
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: localai
+  namespace: discord-bot
+  labels:
+    app: localai
+spec:
+  selector:
+    matchLabels:
+      app: localai
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: localai
+      name: localai
+    spec:
+      containers:
+        - name: localai-discord
+          env:
+          - name: OPENAI_API_KEY
+            value: "x"
+          - name: DISCORD_BOT_TOKEN
+            value: ""
+          - name: DISCORD_CLIENT_ID
+            value: ""
+          - name: OPENAI_API_BASE
+            value: "http://local-ai.default.svc.cluster.local:8080"
+          - name: ALLOWED_SERVER_IDS
+            value: "xx"
+          - name: SERVER_TO_MODERATION_CHANNEL
+            value: "1:1"
+          image: quay.io/go-skynet/gpt-discord-bot:main
+```
--- a/examples/discord-bot/docker-compose.yaml
+++ b/examples/discord-bot/docker-compose.yaml
@@ -0,0 +1,21 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
+
+  bot:
+    image: quay.io/go-skynet/gpt-discord-bot:main
+    env_file:
+    - .env
--- a/examples/discord-bot/models
+++ b/examples/discord-bot/models
@@ -0,0 +1 @@
+../chatbot-ui/models/
--- a/examples/langchain-chroma/README.md
+++ b/examples/langchain-chroma/README.md
@@ -0,0 +1,54 @@
+# Data query example
+
+This example makes use of [langchain and chroma](https://blog.langchain.dev/langchain-chroma/) to enable question answering on a set of documents.
+
+## Setup
+
+Download the models and start the API:
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/query_data
+
+wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+### Python requirements
+
+```
+pip install -r requirements.txt
+```
+
+### Create a storage
+
+In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+wget https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt
+python store.py
+```
+
+After it finishes, a directory "storage" will be created with the vector index database.
+
+## Query
+
+We can now query the dataset. 
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+python query.py
+# President Trump recently stated during a press conference regarding tax reform legislation that "we're getting rid of all these loopholes." He also mentioned that he wants to simplify the system further through changes such as increasing the standard deduction amount and making other adjustments aimed at reducing taxpayers' overall burden.    
+```
+
+Keep in mind now things are hit or miss!
--- a/examples/langchain-chroma/models/completion.tmpl
+++ b/examples/langchain-chroma/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
--- a/examples/langchain-chroma/models/embeddings.yaml
+++ b/examples/langchain-chroma/models/embeddings.yaml
@@ -0,0 +1,5 @@
+name: text-embedding-ada-002
+parameters:
+  model: bert
+backend: bert-embeddings
+embeddings: true
--- a/examples/langchain-chroma/models/gpt-3.5-turbo.yaml
+++ b/examples/langchain-chroma/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,16 @@
+name: gpt-3.5-turbo
+parameters:
+  model: ggml-gpt4all-j
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+template:
+  completion: completion
+  chat: gpt4all
--- a/examples/langchain-chroma/models/gpt4all.tmpl
+++ b/examples/langchain-chroma/models/gpt4all.tmpl
@@ -0,0 +1,4 @@
+The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+### Prompt:
+{{.Input}}
+### Response:
--- a/examples/langchain-chroma/query.py
+++ b/examples/langchain-chroma/query.py
@@ -0,0 +1,20 @@
+
+import os
+from langchain.vectorstores import Chroma
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.llms import OpenAI
+from langchain.chains import VectorDBQA
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
+
+# Load and process the text
+embedding = OpenAIEmbeddings()
+persist_directory = 'db'
+
+# Now we can load the persisted database from disk, and use it as normal. 
+vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
+qa = VectorDBQA.from_chain_type(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path), chain_type="stuff", vectorstore=vectordb)
+
+query = "What the president said about taxes ?"
+print(qa.run(query))
+
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -0,0 +1,4 @@
+langchain==0.0.160
+openai==0.27.6
+chromadb==0.3.21
+llama-index==0.6.2
--- a/examples/langchain-chroma/store.py
+++ b/examples/langchain-chroma/store.py
@@ -0,0 +1,28 @@
+
+import os
+from langchain.vectorstores import Chroma
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter,TokenTextSplitter,CharacterTextSplitter
+from langchain.llms import OpenAI
+from langchain.chains import VectorDBQA
+from langchain.document_loaders import TextLoader
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
+
+# Load and process the text
+loader = TextLoader('state_of_the_union.txt')
+documents = loader.load()
+
+text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
+#text_splitter = TokenTextSplitter()
+texts = text_splitter.split_documents(documents)
+
+# Embed and store the texts
+# Supplying a persist_directory will store the embeddings on disk
+persist_directory = 'db'
+
+embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
+vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory)
+
+vectordb.persist()
+vectordb = None
--- a/examples/langchain-python/README.md
+++ b/examples/langchain-python/README.md
@@ -0,0 +1,35 @@
+## Langchain-python
+
+Langchain example from [quickstart](https://python.langchain.com/en/latest/getting_started/getting_started.html).
+
+To interact with langchain, you can just set the `OPENAI_API_BASE` URL and provide a token with a random string.
+
+See the example below:
+
+```
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/langchain-python
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# start with docker-compose
+docker-compose up -d --build
+
+
+pip install langchain
+pip install openai
+
+export OPENAI_API_BASE=http://localhost:8080
+export OPENAI_API_KEY=sk-
+
+python test.py
+# A good company name for a company that makes colorful socks would be "Colorsocks".
+
+python agent.py
+```
--- a/examples/langchain-python/agent.py
+++ b/examples/langchain-python/agent.py
@@ -0,0 +1,44 @@
+## This is a fork/based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6
+
+from io import StringIO
+import sys
+import os
+from typing import Dict, Optional
+
+from langchain.agents import load_tools
+from langchain.agents import initialize_agent
+from langchain.agents.tools import Tool
+from langchain.llms import OpenAI
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
+model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
+
+class PythonREPL:
+    """Simulates a standalone Python REPL."""
+
+    def __init__(self):
+        pass        
+
+    def run(self, command: str) -> str:
+        """Run command and returns anything printed."""
+        old_stdout = sys.stdout
+        sys.stdout = mystdout = StringIO()
+        try:
+            exec(command, globals())
+            sys.stdout = old_stdout
+            output = mystdout.getvalue()
+        except Exception as e:
+            sys.stdout = old_stdout
+            output = str(e)
+        return output
+
+llm = OpenAI(temperature=0.0, openai_api_base=base_path, model_name=model_name)
+python_repl = Tool(
+        "Python REPL",
+        PythonREPL().run,
+        """A Python shell. Use this to execute python commands. Input should be a valid python command.
+        If you expect output it should be printed out.""",
+    )
+tools = [python_repl]
+agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
+agent.run("What is the 10th fibonacci number?")
--- a/examples/langchain-python/docker-compose.yaml
+++ b/examples/langchain-python/docker-compose.yaml
@@ -0,0 +1,16 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
--- a/examples/langchain-python/models
+++ b/examples/langchain-python/models
@@ -0,0 +1 @@
+../chatbot-ui/models
--- a/examples/langchain-python/test.py
+++ b/examples/langchain-python/test.py
@@ -0,0 +1,6 @@
+
+from langchain.llms import OpenAI
+
+llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
+text = "What would be a good company name for a company that makes colorful socks?"
+print(llm(text))
--- a/examples/langchain/.gitignore
+++ b/examples/langchain/.gitignore
@@ -0,0 +1,2 @@
+models/ggml-koala-13B-4bit-128g
+models/ggml-gpt4all-j
--- a/examples/langchain/JS.Dockerfile
+++ b/examples/langchain/JS.Dockerfile
@@ -0,0 +1,6 @@
+FROM node:latest
+COPY ./langchainjs-localai-example /app
+WORKDIR /app
+RUN npm install
+RUN npm run build
+ENTRYPOINT [ "npm", "run", "start" ]
--- a/examples/langchain/PY.Dockerfile
+++ b/examples/langchain/PY.Dockerfile
@@ -0,0 +1,5 @@
+FROM python:3.10-bullseye
+COPY ./langchainpy-localai-example /app
+WORKDIR /app
+RUN pip install --no-cache-dir -r requirements.txt
+ENTRYPOINT [ "python", "./full_demo.py" ];
--- a/examples/langchain/README.md
+++ b/examples/langchain/README.md
@@ -0,0 +1,30 @@
+# langchain
+
+Example of using langchain, with the standard OpenAI llm module, and LocalAI. Has docker compose profiles for both the Typescript and Python versions.
+
+**Please Note** - This is a tech demo example at this time. ggml-gpt4all-j has pretty terrible results for most langchain applications with the settings used in this example.
+
+## Setup
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/langchain
+
+# (optional) - Edit the example code in typescript.
+# vi ./langchainjs-localai-example/index.ts
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# start with docker-compose for typescript!
+docker-compose --profile ts up --build
+
+# or start with docker-compose for python!
+docker-compose --profile py up --build
+```
+
+## Copyright
+
+Some of the example code in index.mts and full_demo.py is adapted from the langchainjs project and is Copyright (c) Harrison Chase. Used under the terms of the MIT license, as is the remainder of this code.
--- a/examples/langchain/docker-compose.yaml
+++ b/examples/langchain/docker-compose.yaml
@@ -0,0 +1,43 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
+
+  js:
+    build:
+      context: .
+      dockerfile: JS.Dockerfile
+    profiles:
+      - js
+      - ts
+    depends_on:
+    - "api"
+    environment:
+      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+      - 'OPENAI_API_BASE=http://api:8080/v1'
+      - 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
+
+  py:
+    build:
+      context: .
+      dockerfile: PY.Dockerfile
+    profiles:
+      - py
+    depends_on:
+    - "api"
+    environment:
+      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+      - 'OPENAI_API_BASE=http://api:8080/v1'
+      - 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
--- a/examples/langchain/langchainjs-localai-example/.gitignore
+++ b/examples/langchain/langchainjs-localai-example/.gitignore
@@ -0,0 +1,2 @@
+node_modules/
+dist/
--- a/examples/langchain/langchainjs-localai-example/.vscode/launch.json
+++ b/examples/langchain/langchainjs-localai-example/.vscode/launch.json
@@ -0,0 +1,20 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "type": "node",
+            "request": "launch",
+            "name": "Launch Program",
+            // "skipFiles": [
+            //     "<node_internals>/**"
+            // ],
+            "program": "${workspaceFolder}\\dist\\index.mjs",
+            "outFiles": [
+                "${workspaceFolder}/**/*.js"
+            ]
+        }
+    ]
+}
--- a/examples/langchain/langchainjs-localai-example/package-lock.json
+++ b/examples/langchain/langchainjs-localai-example/package-lock.json
--- a/examples/langchain/langchainjs-localai-example/package.json
+++ b/examples/langchain/langchainjs-localai-example/package.json
@@ -0,0 +1,21 @@
+{
+  "name": "langchainjs-localai-example",
+  "version": "0.1.0",
+  "description": "Trivial Example of using langchain + the OpenAI API + LocalAI together",
+  "main": "index.mjs",
+  "scripts": {
+    "build": "tsc --build",
+    "clean": "tsc --build --clean",
+    "start": "node --trace-warnings dist/index.mjs"
+  },
+  "author": "dave@gray101.com",
+  "license": "MIT",
+  "devDependencies": {
+    "@types/node": "^18.16.4",
+    "typescript": "^5.0.4"
+  },
+  "dependencies": {
+    "langchain": "^0.0.67",
+    "typeorm": "^0.3.15"
+  }
+}
--- a/examples/langchain/langchainjs-localai-example/src/index.mts
+++ b/examples/langchain/langchainjs-localai-example/src/index.mts
@@ -0,0 +1,79 @@
+import { OpenAIChat } from "langchain/llms/openai";
+import { loadQAStuffChain } from "langchain/chains";
+import { Document } from "langchain/document";
+import { initializeAgentExecutorWithOptions } from "langchain/agents";
+import {Calculator} from "langchain/tools/calculator";
+
+const pathToLocalAi = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1';
+const fakeApiKey = process.env['OPENAI_API_KEY'] || '-';
+const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo';
+
+function getModel(): OpenAIChat {
+  return new OpenAIChat({
+    prefixMessages: [
+      {
+        role: "system",
+        content: "You are a helpful assistant that answers in pirate language",
+      },
+    ],
+    modelName: modelName,
+    maxTokens: 50,
+    openAIApiKey: fakeApiKey,
+    maxRetries: 2
+  }, {
+    basePath: pathToLocalAi,
+    apiKey: fakeApiKey,
+  });
+}
+
+// Minimal example.
+export const run = async () => {
+  const model = getModel();
+  console.log(`about to model.call at ${new Date().toUTCString()}`);
+  const res = await model.call(
+    "What would be a good company name a company that makes colorful socks?"
+  );
+  console.log(`${new Date().toUTCString()}`);
+  console.log({ res });
+};
+
+await run();
+
+// This example uses the `StuffDocumentsChain`
+export const run2 = async () => {
+  const model = getModel();
+  const chainA = loadQAStuffChain(model);
+  const docs = [
+    new Document({ pageContent: "Harrison went to Harvard." }),
+    new Document({ pageContent: "Ankush went to Princeton." }),
+  ];
+  const resA = await chainA.call({
+    input_documents: docs,
+    question: "Where did Harrison go to college?",
+  });
+  console.log({ resA });
+};
+
+await run2();
+
+// Quickly thrown together example of using tools + agents.
+// This seems like it should work, but it doesn't yet.
+export const temporarilyBrokenToolTest = async () => {
+  const model = getModel();
+
+  const executor = await initializeAgentExecutorWithOptions([new Calculator(true)], model, {
+    agentType: "zero-shot-react-description",
+  });
+
+  console.log("Loaded agent.");
+
+  const input = `What is the value of (500 *2) + 350 - 13?`;
+
+  console.log(`Executing with input "${input}"...`);
+
+  const result = await executor.call({ input });
+
+  console.log(`Got output ${result.output}`);
+}
+
+await temporarilyBrokenToolTest();
--- a/examples/langchain/langchainjs-localai-example/tsconfig.json
+++ b/examples/langchain/langchainjs-localai-example/tsconfig.json
@@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "target": "es2022",
+    "lib": ["ES2022", "DOM"],
+    "module": "ES2022",
+    "moduleResolution": "node",
+    "strict": true,
+    "esModuleInterop": true,
+    "allowSyntheticDefaultImports": true,
+    "isolatedModules": true,
+    "outDir": "./dist"
+  },
+  "include": ["src", "test"],
+  "exclude": ["node_modules", "dist"]
+}
--- a/examples/langchain/langchainpy-localai-example/.vscode/launch.json
+++ b/examples/langchain/langchainpy-localai-example/.vscode/launch.json
@@ -0,0 +1,24 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "redirectOutput": true,
+            "justMyCode": false
+        },
+        {
+            "name": "Python: Attach to Port 5678",
+            "type": "python",
+            "request": "attach",
+            "connect": {
+                "host": "localhost",
+                "port": 5678
+              },
+            "justMyCode": false
+        }
+    ]
+}
--- a/examples/langchain/langchainpy-localai-example/.vscode/settings.json
+++ b/examples/langchain/langchainpy-localai-example/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python"
+}
--- a/examples/langchain/langchainpy-localai-example/full_demo.py
+++ b/examples/langchain/langchainpy-localai-example/full_demo.py
@@ -0,0 +1,46 @@
+import os
+import logging
+
+from langchain.chat_models import ChatOpenAI
+from langchain import PromptTemplate, LLMChain
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    AIMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+from langchain.schema import (
+    AIMessage,
+    HumanMessage,
+    SystemMessage
+)
+
+# This logging incantation makes it easy to see that you're actually reaching your LocalAI instance rather than OpenAI.
+logging.basicConfig(level=logging.DEBUG)
+
+print('Langchain + LocalAI PYTHON Tests')
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
+key = os.environ.get('OPENAI_API_KEY', '-')
+model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
+
+
+chat = ChatOpenAI(temperature=0, openai_api_base=base_path, openai_api_key=key, model_name=model_name, max_tokens=100)
+
+print("Created ChatOpenAI for ", chat.model_name)
+
+template = "You are a helpful assistant that translates {input_language} to {output_language}. The next message will be a sentence in {input_language}. Respond ONLY with the translation in {output_language}. Do not respond in {input_language}!"
+system_message_prompt = SystemMessagePromptTemplate.from_template(template)
+human_template = "{text}"
+human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
+
+chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
+
+print("ABOUT to execute")
+
+# get a chat completion from the formatted messages
+response = chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
+
+print(response)
+
+print(".");
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -0,0 +1,32 @@
+aiohttp==3.8.4
+aiosignal==1.3.1
+async-timeout==4.0.2
+attrs==23.1.0
+certifi==2022.12.7
+charset-normalizer==3.1.0
+colorama==0.4.6
+dataclasses-json==0.5.7
+debugpy==1.6.7
+frozenlist==1.3.3
+greenlet==2.0.2
+idna==3.4
+langchain==0.0.159
+marshmallow==3.19.0
+marshmallow-enum==1.5.1
+multidict==6.0.4
+mypy-extensions==1.0.0
+numexpr==2.8.4
+numpy==1.24.3
+openai==0.27.6
+openapi-schema-pydantic==1.2.4
+packaging==23.1
+pydantic==1.10.7
+PyYAML==6.0
+requests==2.29.0
+SQLAlchemy==2.0.12
+tenacity==8.2.2
+tqdm==4.65.0
+typing-inspect==0.8.0
+typing_extensions==4.5.0
+urllib3==1.26.15
+yarl==1.9.2
--- a/examples/langchain/langchainpy-localai-example/simple_demo.py
+++ b/examples/langchain/langchainpy-localai-example/simple_demo.py
@@ -0,0 +1,6 @@
+
+from langchain.llms import OpenAI
+
+llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
+text = "What would be a good company name for a company that makes colorful socks?"
+print(llm(text))
--- a/examples/langchain/models/completion.tmpl
+++ b/examples/langchain/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
--- a/examples/langchain/models/gpt-3.5-turbo.yaml
+++ b/examples/langchain/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,18 @@
+name: gpt-3.5-turbo
+parameters:
+  model: ggml-gpt4all-j # ggml-koala-13B-4bit-128g
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+threads: 4
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+backend: "gptj"
+template:
+  completion: completion
+  chat: gpt4all
--- a/examples/langchain/models/gpt4all.tmpl
+++ b/examples/langchain/models/gpt4all.tmpl
@@ -0,0 +1,4 @@
+The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+### Prompt:
+{{.Input}}
+### Response:
--- a/examples/localai-webui/README.md
+++ b/examples/localai-webui/README.md
@@ -0,0 +1,26 @@
+# localai-webui
+
+Example of integration with [dhruvgera/localai-frontend](https://github.com/Dhruvgera/LocalAI-frontend).
+
+![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
+
+## Setup
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/localai-webui
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download any desired models to models/ in the parent LocalAI project dir
+# For example: wget https://gpt4all.io/models/ggml-gpt4all-j.bin
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+Open http://localhost:3000 for the Web UI.
+
--- a/examples/localai-webui/docker-compose.yml
+++ b/examples/localai-webui/docker-compose.yml
@@ -0,0 +1,20 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - 8080:8080
+    env_file:
+      - .env
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai"]
+
+  frontend:
+    image: quay.io/go-skynet/localai-frontend:master
+    ports:
+      - 3000:3000
--- a/examples/query_data/.gitignore
+++ b/examples/query_data/.gitignore
@@ -0,0 +1 @@
+storage/
--- a/examples/query_data/README.md
+++ b/examples/query_data/README.md
@@ -0,0 +1,67 @@
+# Data query example
+
+This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
+
+It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
+
+Summary of the steps:
+
+- prepare the dataset (and store it into `data`)
+- prepare a vector index database to run queries on
+- run queries
+
+## Requirements
+
+You will need a training data set. Copy that over `data`.
+
+## Setup
+
+Start the API:
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/query_data
+
+wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+### Create a storage
+
+In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+python store.py
+```
+
+After it finishes, a directory "storage" will be created with the vector index database.
+
+## Query
+
+We can now query the dataset. 
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+python query.py
+```
+
+## Update
+
+To update our vector database, run `update.py`
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+python update.py
+```
--- a/examples/query_data/data/.keep
+++ b/examples/query_data/data/.keep
--- a/examples/query_data/docker-compose.yml
+++ b/examples/query_data/docker-compose.yml
@@ -0,0 +1,15 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - 8080:8080
+    env_file:
+      - .env
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai"]
--- a/examples/query_data/models/completion.tmpl
+++ b/examples/query_data/models/completion.tmpl
@@ -0,0 +1 @@
+{{.Input}}
--- a/examples/query_data/models/embeddings.yaml
+++ b/examples/query_data/models/embeddings.yaml
@@ -0,0 +1,6 @@
+name: text-embedding-ada-002
+parameters:
+  model: bert
+threads: 14
+backend: bert-embeddings
+embeddings: true
--- a/examples/query_data/models/gpt-3.5-turbo.yaml
+++ b/examples/query_data/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,17 @@
+name: gpt-3.5-turbo
+parameters:
+  model: ggml-gpt4all-j
+  top_k: 80
+  temperature: 0.2
+  top_p: 0.7
+context_size: 1024
+threads: 14
+stopwords:
+- "HUMAN:"
+- "GPT:"
+roles:
+  user: " "
+  system: " "
+template:
+  completion: completion
+  chat: gpt4all
--- a/examples/query_data/query.py
+++ b/examples/query_data/query.py
@@ -0,0 +1,35 @@
+import os
+
+# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
+# os.environ['OPENAI_API_KEY']= ""
+
+from llama_index import   LLMPredictor, PromptHelper, ServiceContext
+from langchain.llms.openai import OpenAI
+from llama_index import StorageContext, load_index_from_storage
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
+
+# This example uses text-davinci-003 by default; feel free to change if desired
+llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
+
+# Configure prompt parameters and initialise helper
+max_input_size = 500
+num_output = 256
+max_chunk_overlap = 20
+
+prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
+
+# Load documents from the 'data' directory
+service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
+
+# rebuild storage context
+storage_context = StorageContext.from_defaults(persist_dir='./storage')
+
+# load index
+index = load_index_from_storage(storage_context,     service_context=service_context,    )
+
+query_engine = index.as_query_engine()
+
+data = input("Question: ")
+response = query_engine.query(data)
+print(response)
--- a/examples/query_data/store.py
+++ b/examples/query_data/store.py
@@ -0,0 +1,27 @@
+import os
+
+# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
+# os.environ['OPENAI_API_KEY']= ""
+
+from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext
+from langchain.llms.openai import OpenAI
+from llama_index import StorageContext, load_index_from_storage
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
+
+# This example uses text-davinci-003 by default; feel free to change if desired
+llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
+
+# Configure prompt parameters and initialise helper
+max_input_size = 400
+num_output = 400
+max_chunk_overlap = 30
+
+prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
+
+# Load documents from the 'data' directory
+documents = SimpleDirectoryReader('data').load_data()
+service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 400)
+index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
+index.storage_context.persist(persist_dir="./storage")
+
--- a/examples/query_data/update.py
+++ b/examples/query_data/update.py
@@ -0,0 +1,32 @@
+import os
+
+# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
+# os.environ['OPENAI_API_KEY']= ""
+
+from llama_index import   LLMPredictor, PromptHelper, SimpleDirectoryReader, ServiceContext
+from langchain.llms.openai import OpenAI
+from llama_index import StorageContext, load_index_from_storage
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
+
+# This example uses text-davinci-003 by default; feel free to change if desired
+llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
+
+# Configure prompt parameters and initialise helper
+max_input_size = 512
+num_output = 256
+max_chunk_overlap = 20
+
+prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
+
+# Load documents from the 'data' directory
+service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
+
+# rebuild storage context
+storage_context = StorageContext.from_defaults(persist_dir='./storage')
+
+# load index
+index = load_index_from_storage(storage_context,     service_context=service_context,    )
+documents = SimpleDirectoryReader('data').load_data()
+index.refresh(documents)
+index.storage_context.persist(persist_dir="./storage")
--- a/examples/rwkv/Dockerfile.build
+++ b/examples/rwkv/Dockerfile.build
@@ -0,0 +1,10 @@
+FROM python
+
+# convert the model (one-off)
+RUN pip3 install torch numpy
+
+WORKDIR /build
+COPY ./scripts/ .
+
+RUN git clone --recurse-submodules https://github.com/saharNooby/rwkv.cpp && cd rwkv.cpp && cmake . && cmake --build . --config Release
+ENTRYPOINT [ "/build/build.sh" ]
--- a/examples/rwkv/README.md
+++ b/examples/rwkv/README.md
@@ -0,0 +1,59 @@
+# rwkv
+
+Example of how to run rwkv models.
+
+## Run models
+
+Setup:
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/rwkv
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# build the tooling image to convert an rwkv model locally:
+docker build -t rwkv-converter -f Dockerfile.build .
+
+# download and convert a model (one-off) - it's going to be fast on CPU too!
+docker run -ti --name converter -v $PWD:/data rwkv-converter https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096.pth /data/models/rwkv
+
+# Get the tokenizer
+wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O models/rwkv.tokenizer.json
+
+# start with docker-compose
+docker-compose up -d --build
+```
+
+Test it out:
+
+```bash
+curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
+    "model": "gpt-3.5-turbo",
+    "prompt": "A long time ago, in a galaxy far away",
+    "max_tokens": 100,
+    "temperature": 0.9, "top_p": 0.8, "top_k": 80
+  }'
+
+# {"object":"text_completion","model":"gpt-3.5-turbo","choices":[{"text":", there was a small group of five friends: Annie, Bryan, Charlie, Emily, and Jesse."}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
+
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+     "model": "gpt-3.5-turbo",            
+     "messages": [{"role": "user", "content": "How are you?"}],
+     "temperature": 0.9, "top_p": 0.8, "top_k": 80
+   }'
+
+# {"object":"chat.completion","model":"gpt-3.5-turbo","choices":[{"message":{"role":"assistant","content":" Good, thanks. I am about to go to bed. I' ll talk to you later.Bye."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
+```
+
+### Fine tuning
+
+See [RWKV-LM](https://github.com/BlinkDL/RWKV-LM#training--fine-tuning). There is also a Google [colab](https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_v4_RNN_Pile_Fine_Tuning.ipynb).
+
+## See also
+
+- [RWKV-LM](https://github.com/BlinkDL/RWKV-LM)
+- [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
--- a/examples/rwkv/docker-compose.yaml
+++ b/examples/rwkv/docker-compose.yaml
@@ -0,0 +1,16 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
--- a/examples/rwkv/models/gpt-3.5-turbo.yaml
+++ b/examples/rwkv/models/gpt-3.5-turbo.yaml
@@ -0,0 +1,19 @@
+name: gpt-3.5-turbo
+parameters:
+  model: rwkv
+  top_k: 80
+  temperature: 0.9
+  max_tokens: 100
+  top_p: 0.8
+context_size: 1024
+threads: 14
+backend: "rwkv"
+cutwords:
+- "Bob:.*"
+roles:
+  user: "Bob:"
+  system: "Alice:"
+  assistant: "Alice:"
+template:
+  completion: rwkv_completion
+  chat: rwkv_chat
--- a/examples/rwkv/models/rwkv_chat.tmpl
+++ b/examples/rwkv/models/rwkv_chat.tmpl
@@ -0,0 +1,13 @@
+The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
+
+Bob: Hello Alice, how are you doing?
+
+Alice: Hi Bob! Thanks, I'm fine. What about you?
+
+Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
+
+Alice: Not at all! I'm listening.
+
+{{.Input}}
+
+Alice: 
--- a/examples/rwkv/models/rwkv_completion.tmpl
+++ b/examples/rwkv/models/rwkv_completion.tmpl
@@ -0,0 +1 @@
+Complete the following sentence: {{.Input}} 
--- a/examples/rwkv/scripts/build.sh
+++ b/examples/rwkv/scripts/build.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+set -ex
+
+URL=$1
+OUT=$2
+FILENAME=$(basename $URL)
+
+wget -nc $URL -O /build/$FILENAME
+
+python3 /build/rwkv.cpp/rwkv/convert_pytorch_to_ggml.py /build/$FILENAME /build/float-model float16
+python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_2
--- a/examples/slack-bot/.env.example
+++ b/examples/slack-bot/.env.example
@@ -0,0 +1,11 @@
+SLACK_APP_TOKEN=xapp-1-...
+SLACK_BOT_TOKEN=xoxb-...
+OPENAI_API_KEY=sk-...
+OPENAI_API_BASE=http://api:8080
+OPENAI_MODEL=gpt-3.5-turbo
+OPENAI_TIMEOUT_SECONDS=60
+#OPENAI_SYSTEM_TEXT="You proofread text. When you receive a message, you will check
+#for mistakes and make suggestion to improve the language of the given text"
+USE_SLACK_LANGUAGE=true
+SLACK_APP_LOG_LEVEL=INFO
+TRANSLATE_MARKDOWN=true
--- a/examples/slack-bot/README.md
+++ b/examples/slack-bot/README.md
@@ -0,0 +1,27 @@
+# Slack bot
+
+Slackbot using: https://github.com/seratch/ChatGPT-in-Slack
+
+## Setup
+
+```bash
+# Clone LocalAI
+git clone https://github.com/go-skynet/LocalAI
+
+cd LocalAI/examples/slack-bot
+
+git clone https://github.com/seratch/ChatGPT-in-Slack
+
+# (optional) Checkout a specific LocalAI tag
+# git checkout -b build <TAG>
+
+# Download gpt4all-j to models/
+wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
+
+# Set the discord bot options (see: https://github.com/seratch/ChatGPT-in-Slack)
+cp -rfv .env.example .env
+vim .env
+
+# start with docker-compose
+docker-compose up -d --build
+```
--- a/examples/slack-bot/docker-compose.yaml
+++ b/examples/slack-bot/docker-compose.yaml
@@ -0,0 +1,23 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/local-ai:latest
+    build:
+      context: ../../
+      dockerfile: Dockerfile.dev
+    ports:
+      - 8080:8080
+    environment:
+      - DEBUG=true
+      - MODELS_PATH=/models
+    volumes:
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
+
+  bot:
+    build:
+     context: ./ChatGPT-in-Slack
+     dockerfile: Dockerfile
+    env_file:
+    - .env
--- a/examples/slack-bot/models
+++ b/examples/slack-bot/models
@@ -0,0 +1 @@
+../chatbot-ui/models
--- a/go.mod
+++ b/go.mod
@@ -1,45 +1,68 @@
-module github.com/go-skynet/llama-cli
+module github.com/go-skynet/LocalAI

 go 1.19

 require (
-	github.com/charmbracelet/bubbles v0.15.0
-	github.com/charmbracelet/bubbletea v0.23.2
-	github.com/charmbracelet/lipgloss v0.7.1
-	github.com/go-skynet/llama v0.0.0-20230321172246-7be5326e18cc
-	github.com/gofiber/fiber/v2 v2.42.0
-	github.com/urfave/cli/v2 v2.25.0
+	github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56
+	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d
+	github.com/go-audio/wav v1.1.0
+	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
+	github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
+	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245
+	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84
+	github.com/gofiber/fiber/v2 v2.45.0
+	github.com/hashicorp/go-multierror v1.1.1
+	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d6243fe1bc
+	github.com/onsi/ginkgo/v2 v2.9.5
+	github.com/onsi/gomega v1.27.6
+	github.com/otiai10/copy v1.11.0
+	github.com/otiai10/openaigo v1.1.0
+	github.com/rs/zerolog v1.29.1
+	github.com/sashabaranov/go-openai v1.9.4
+	github.com/swaggo/swag v1.16.1
+	github.com/urfave/cli/v2 v2.25.3
+	github.com/valyala/fasthttp v1.47.0
+	gopkg.in/yaml.v3 v3.0.1
 )

 require (
-	github.com/andybalholm/brotli v1.0.4 // indirect
-	github.com/atotto/clipboard v0.1.4 // indirect
-	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
-	github.com/containerd/console v1.0.3 // indirect
+	github.com/KyleBanks/depth v1.2.1 // indirect
+	github.com/PuerkitoBio/purell v1.1.1 // indirect
+	github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
+	github.com/andybalholm/brotli v1.0.5 // indirect
 	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
+	github.com/go-audio/audio v1.0.0 // indirect
+	github.com/go-audio/riff v1.0.0 // indirect
+	github.com/go-logr/logr v1.2.4 // indirect
+	github.com/go-openapi/jsonpointer v0.19.5 // indirect
+	github.com/go-openapi/jsonreference v0.19.6 // indirect
+	github.com/go-openapi/spec v0.20.4 // indirect
+	github.com/go-openapi/swag v0.19.15 // indirect
+	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
+	github.com/google/go-cmp v0.5.9 // indirect
+	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
 	github.com/google/uuid v1.3.0 // indirect
-	github.com/klauspost/compress v1.15.9 // indirect
-	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
+	github.com/hashicorp/errwrap v1.0.0 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
+	github.com/klauspost/compress v1.16.3 // indirect
+	github.com/mailru/easyjson v0.7.6 // indirect
 	github.com/mattn/go-colorable v0.1.13 // indirect
-	github.com/mattn/go-isatty v0.0.17 // indirect
-	github.com/mattn/go-localereader v0.0.1 // indirect
+	github.com/mattn/go-isatty v0.0.18 // indirect
 	github.com/mattn/go-runewidth v0.0.14 // indirect
-	github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b // indirect
-	github.com/muesli/cancelreader v0.2.2 // indirect
-	github.com/muesli/reflow v0.3.0 // indirect
-	github.com/muesli/termenv v0.15.1 // indirect
-	github.com/philhofer/fwd v1.1.1 // indirect
+	github.com/philhofer/fwd v1.1.2 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
 	github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
-	github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d // indirect
-	github.com/tinylib/msgp v1.1.6 // indirect
+	github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
+	github.com/tinylib/msgp v1.1.8 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
-	github.com/valyala/fasthttp v1.44.0 // indirect
 	github.com/valyala/tcplisten v1.0.0 // indirect
 	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
-	golang.org/x/sync v0.1.0 // indirect
-	golang.org/x/sys v0.6.0 // indirect
-	golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
-	golang.org/x/text v0.3.7 // indirect
+	golang.org/x/net v0.10.0 // indirect
+	golang.org/x/sys v0.8.0 // indirect
+	golang.org/x/text v0.9.0 // indirect
+	golang.org/x/tools v0.9.1 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -1,123 +1,242 @@
-github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
-github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
-github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
-github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
-github.com/aymanbagabas/go-osc52 v1.0.3/go.mod h1:zT8H+Rk4VSabYN90pWyugflM3ZhpTZNC7cASDfUCdT4=
-github.com/aymanbagabas/go-osc52 v1.2.1/go.mod h1:zT8H+Rk4VSabYN90pWyugflM3ZhpTZNC7cASDfUCdT4=
-github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
-github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
-github.com/charmbracelet/bubbles v0.15.0 h1:c5vZ3woHV5W2b8YZI1q7v4ZNQaPetfHuoHzx+56Z6TI=
-github.com/charmbracelet/bubbles v0.15.0/go.mod h1:Y7gSFbBzlMpUDR/XM9MhZI374Q+1p1kluf1uLl8iK74=
-github.com/charmbracelet/bubbletea v0.23.1/go.mod h1:JAfGK/3/pPKHTnAS8JIE2u9f61BjWTQY57RbT25aMXU=
-github.com/charmbracelet/bubbletea v0.23.2 h1:vuUJ9HJ7b/COy4I30e8xDVQ+VRDUEFykIjryPfgsdps=
-github.com/charmbracelet/bubbletea v0.23.2/go.mod h1:FaP3WUivcTM0xOKNmhciz60M6I+weYLF76mr1JyI7sM=
-github.com/charmbracelet/harmonica v0.2.0/go.mod h1:KSri/1RMQOZLbw7AHqgcBycp8pgJnQMYYT8QZRqZ1Ao=
-github.com/charmbracelet/lipgloss v0.6.0/go.mod h1:tHh2wr34xcHjC2HCXIlGSG1jaDF0S0atAUvBMP6Ppuk=
-github.com/charmbracelet/lipgloss v0.7.1 h1:17WMwi7N1b1rVWOjMT+rCh7sQkvDU75B2hbZpc5Kc1E=
-github.com/charmbracelet/lipgloss v0.7.1/go.mod h1:yG0k3giv8Qj8edTCbbg6AlQ5e8KNWpFujkNawKNhE2c=
-github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
-github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
+github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
+github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
+github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
+github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
+github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
+github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
+github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
+github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
-github.com/go-skynet/llama v0.0.0-20230321172246-7be5326e18cc h1:NcmO8mA7iRZIX0Qy2SjcsSaV14+g87MiTey1neUJaFQ=
-github.com/go-skynet/llama v0.0.0-20230321172246-7be5326e18cc/go.mod h1:ZtYsAIud4cvP9VTTI9uhdgR1uCwaO/gGKnZZ95h9i7w=
-github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
-github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2 h1:YNbUAyIRtaLODitigJU1EM5ubmMu5FmHtYAayJD6Vbg=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56 h1:s8/MZdicstKi5fn9D9mKGIQ/q6IWCYCk/BM68i8v51w=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a h1:MlyiDLNCM/wjbv8U5Elj18NvaAgl61SGiRUpqQz5dfs=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d h1:uxKTbiRnplE2SubchneSf4NChtxLJtOy9VdHnQMT0d0=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
+github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
+github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
+github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
+github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
+github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
+github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
+github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
+github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs=
+github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns=
+github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M=
+github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I=
+github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
+github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
+github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
+github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f h1:GW8RQa1RVeDF1dOuAP/y6xWVC+BRtf9tJOuEza6Asbg=
+github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
+github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf h1:VJfSn8hIDE+K5+h38M3iAyFXrxpRExMKRdTk33UDxsw=
+github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea h1:8Isk9D+Auth5OuXVAQPC3MO+5zF/2S7mvs2JZLw6a+8=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557 h1:LD66fKtvP2lmyuuKL8pBat/pVTKUbLs3L5fM/5lyi4w=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 h1:+3KPDf4Wv1VHOkzAfZnlj9qakLSYggTpm80AswhD/FU=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4/go.mod h1:VY0s5KoAI2jRCvQXKuDeEEe8KG7VaWifSNJSk+E1KtY=
+github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6 h1:XshpypO6ekU09CI19vuzke2a1Es1lV5ZaxA7CUehu0E=
+github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
+github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245 h1:IcfYY5uH0DdDXEJKJ8bq0WZCd9guPPd3xllaWNy8LOk=
+github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
+github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
+github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b h1:qqxrjY8fYDXQahmCMTCACahm1tbiqHLPUHALkFLyBfo=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84 h1:f5iYF75bAr73Tl8AdtFD5Urs/2bsHKPh52K++jLbsfk=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230516230554-b7bbefbe0b84/go.mod h1:jxyQ26t1aKC5Gn782w9WWh5n1133PxCOfkuc01xM4RQ=
+github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
+github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
+github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s=
+github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
+github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
+github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
+github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
-github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
-github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
-github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
-github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
+github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
+github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
+github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
+github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
+github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
+github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
 github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
 github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
 github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
-github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
-github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
-github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
-github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
-github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
+github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98=
+github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
 github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b h1:1XF24mVaiu7u+CFywTdcDo2ie1pzzhwjt6RHqzpMU34=
-github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b/go.mod h1:fQuZ0gauxyBcmsdE3ZT4NasjaRdxmbCS0jRHsrWu3Ho=
-github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
-github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
-github.com/muesli/reflow v0.2.1-0.20210115123740-9e1d0d53df68/go.mod h1:Xk+z4oIWdQqJzsxyjgl3P22oYZnHdZ8FFTHAQQt5BMQ=
-github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
-github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
-github.com/muesli/termenv v0.11.1-0.20220204035834-5ac8409525e0/go.mod h1:Bd5NYQ7pd+SrtBSrSNoBBmXlcY8+Xj4BMJgh8qcZrvs=
-github.com/muesli/termenv v0.13.0/go.mod h1:sP1+uffeLaEYpyOTb8pLCUctGcGLnoFjSn4YJK5e2bc=
-github.com/muesli/termenv v0.14.0/go.mod h1:kG/pF1E7fh949Xhe156crRUrHNyK221IuGO7Ez60Uc8=
-github.com/muesli/termenv v0.15.1 h1:UzuTb/+hhlBugQz28rpzey4ZuKcZ03MeKsoG7IJZIxs=
-github.com/muesli/termenv v0.15.1/go.mod h1:HeAQPTzpfs016yGtA4g00CsdYnVLJvxsS4ANqrZs2sQ=
-github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
+github.com/mudler/go-stable-diffusion v0.0.0-20230516104333-2f32a16b5b24 h1:XfRD/bZom6u4zji7aB0urIVOsPe43KlkzSRrVhlzaOM=
+github.com/mudler/go-stable-diffusion v0.0.0-20230516104333-2f32a16b5b24/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
+github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642 h1:KTkh3lOUsGqQyP4v+oa38sPFdrZtNnM4HaxTb3epdYs=
+github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d6243fe1bc h1:OPavP/SUsVWVYPhSUZKZeX8yDSQzf4G+BmUmwzrLTyI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230516143155-79d6243fe1bc/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
+github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
+github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
+github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
+github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
+github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
+github.com/otiai10/copy v1.11.0 h1:OKBD80J/mLBrwnzXqGtFCzprFSGioo30JcmR4APsNwc=
+github.com/otiai10/copy v1.11.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww=
+github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks=
+github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg=
+github.com/otiai10/openaigo v1.1.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg=
 github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
-github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
+github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
+github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
+github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
+github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
+github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
 github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/sahilm/fuzzy v0.1.0/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y=
+github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
+github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9MyPBfZM=
+github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
-github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
-github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
+github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk=
+github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
+github.com/swaggo/swag v1.16.1 h1:fTNRhKstPKxcnoKsytm4sahr8FaYzUcT7i1/3nd/fBg=
+github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKikGxto=
 github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
-github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
-github.com/urfave/cli/v2 v2.25.0/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
+github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
+github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
+github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY=
+github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
-github.com/valyala/fasthttp v1.44.0 h1:R+gLUhldIsfg1HokMuQjdQ5bh9nuXHPIfvkYUu9eR5Q=
-github.com/valyala/fasthttp v1.44.0/go.mod h1:f6VbjjoI3z1NDOZOv17o6RvtRSWxC77seBFc2uWtgiY=
+github.com/valyala/fasthttp v1.47.0 h1:y7moDoxYzMooFpT5aHgNgVOQDrS3qlkfiP9mDtGGK9c=
+github.com/valyala/fasthttp v1.47.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
 github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
 github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
-golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
-golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
+golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
+golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
+golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220204135822-1c1b9b1eba6a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
+golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 h1:JGgROgKl9N8DuW20oFS5gxc+lE67/N3FcwmBPMe7ArY=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
+golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y=
+golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4=
+golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo=
+golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/interactive.go
+++ b/interactive.go
@@ -1,142 +0,0 @@
-package main
-
-// A simple program demonstrating the text area component from the Bubbles
-// component library.
-
-import (
-	"fmt"
-	"strings"
-
-	"github.com/charmbracelet/bubbles/textarea"
-	"github.com/charmbracelet/bubbles/viewport"
-	tea "github.com/charmbracelet/bubbletea"
-	"github.com/charmbracelet/lipgloss"
-	llama "github.com/go-skynet/llama/go"
-)
-
-func startInteractive(l *llama.LLama, opts ...llama.PredictOption) error {
-	p := tea.NewProgram(initialModel(l, opts...))
-
-	_, err := p.Run()
-	return err
-}
-
-type (
-	errMsg error
-)
-
-type model struct {
-	viewport    viewport.Model
-	messages    *[]string
-	textarea    textarea.Model
-	senderStyle lipgloss.Style
-	err         error
-	l           *llama.LLama
-	opts        []llama.PredictOption
-
-	predictC chan string
-}
-
-func initialModel(l *llama.LLama, opts ...llama.PredictOption) model {
-	ta := textarea.New()
-	ta.Placeholder = "Send a message..."
-	ta.Focus()
-
-	ta.Prompt = "┃ "
-	ta.CharLimit = 280
-
-	ta.SetWidth(200)
-	ta.SetHeight(3)
-
-	// Remove cursor line styling
-	ta.FocusedStyle.CursorLine = lipgloss.NewStyle()
-
-	ta.ShowLineNumbers = false
-
-	vp := viewport.New(200, 5)
-	vp.SetContent(`Welcome to llama-cli. Type a message and press Enter to send. Alpaca doesn't keep context of the whole chat (yet).`)
-
-	ta.KeyMap.InsertNewline.SetEnabled(false)
-
-	predictChannel := make(chan string)
-	messages := []string{}
-	m := model{
-		textarea:    ta,
-		messages:    &messages,
-		viewport:    vp,
-		senderStyle: lipgloss.NewStyle().Foreground(lipgloss.Color("5")),
-		err:         nil,
-		l:           l,
-		opts:        opts,
-		predictC:    predictChannel,
-	}
-	go func() {
-		for p := range predictChannel {
-			str, _ := templateString(emptyInput, struct {
-				Instruction string
-				Input       string
-			}{Instruction: p})
-			res, _ := l.Predict(
-				str,
-				opts...,
-			)
-
-			mm := *m.messages
-			*m.messages = mm[:len(mm)-1]
-			*m.messages = append(*m.messages, m.senderStyle.Render("llama: ")+res)
-			m.viewport.SetContent(strings.Join(*m.messages, "\n"))
-			ta.Reset()
-			m.viewport.GotoBottom()
-		}
-	}()
-
-	return m
-}
-
-func (m model) Init() tea.Cmd {
-	return textarea.Blink
-}
-
-func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
-	var (
-		tiCmd tea.Cmd
-		vpCmd tea.Cmd
-	)
-
-	m.textarea, tiCmd = m.textarea.Update(msg)
-	m.viewport, vpCmd = m.viewport.Update(msg)
-
-	switch msg := msg.(type) {
-	case tea.WindowSizeMsg:
-
-	//	m.viewport.Width = msg.Width
-	//	m.viewport.Height = msg.Height
-	case tea.KeyMsg:
-		switch msg.Type {
-		case tea.KeyCtrlC, tea.KeyEsc:
-			fmt.Println(m.textarea.Value())
-			return m, tea.Quit
-		case tea.KeyEnter:
-			*m.messages = append(*m.messages, m.senderStyle.Render("You: ")+m.textarea.Value(), m.senderStyle.Render("Loading response..."))
-			m.predictC <- m.textarea.Value()
-			m.viewport.SetContent(strings.Join(*m.messages, "\n"))
-			m.textarea.Reset()
-			m.viewport.GotoBottom()
-		}
-
-	// We handle errors just like any other message
-	case errMsg:
-		m.err = msg
-		return m, nil
-	}
-
-	return m, tea.Batch(tiCmd, vpCmd)
-}
-
-func (m model) View() string {
-	return fmt.Sprintf(
-		"%s\n\n%s",
-		m.viewport.View(),
-		m.textarea.View(),
-	) + "\n\n"
-}
--- a/kubernetes/deployment.yaml
+++ b/kubernetes/deployment.yaml
@@ -1,42 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: llama
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: llama
-  namespace: llama
-  labels:
-    app: llama
-spec:
-  selector:
-    matchLabels:
-      app: llama
-  replicas: 1
-  template:
-    metadata:
-      labels:
-        app: llama
-      name: llama
-    spec:
-      containers:
-        - name: llama
-          args:
-          - api
-          image: quay.io/go-skynet/llama-cli:v0.1
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: llama
-  namespace: llama
-spec:
-  selector:
-    app: llama
-  type: LoadBalancer
-  ports:
-    - protocol: TCP
-      port: 8080
-      targetPort: 8080
--- a/main.go
+++ b/main.go
@@ -1,267 +1,105 @@
 package main

 import (
-	"bytes"
 	"fmt"
-	"io/ioutil"
 	"os"
-	"runtime"
-	"text/template"
+	"path/filepath"

-	llama "github.com/go-skynet/llama/go"
+	api "github.com/go-skynet/LocalAI/api"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
 	"github.com/urfave/cli/v2"
 )

-// Define the template string
-var emptyInput string = `Below is an instruction that describes a task. Write a response that appropriately completes the request.
-
-### Instruction:
-{{.Instruction}}
-
-### Response:`
-
-var nonEmptyInput string = `Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
-
-### Instruction:
-{{.Instruction}}
-
-### Input:
-{{.Input}}
-
-### Response:
-`
-
-func llamaFromOptions(ctx *cli.Context) (*llama.LLama, error) {
-	opts := []llama.ModelOption{llama.SetContext(ctx.Int("context-size"))}
-	if ctx.Bool("alpaca") {
-		opts = append(opts, llama.EnableAlpaca)
-	}
-
-	return llama.New(ctx.String("model"), opts...)
-}
-
-func templateString(t string, in interface{}) (string, error) {
-	// Parse the template
-	tmpl, err := template.New("prompt").Parse(t)
-	if err != nil {
-		return "", err
-	}
-
-	var buf bytes.Buffer
-	err = tmpl.Execute(&buf, in)
-	if err != nil {
-		return "", err
-	}
-	return buf.String(), nil
-}
-
-var modelFlags = []cli.Flag{
-	&cli.StringFlag{
-		Name:    "model",
-		EnvVars: []string{"MODEL_PATH"},
-	},
-	&cli.IntFlag{
-		Name:    "tokens",
-		EnvVars: []string{"TOKENS"},
-		Value:   128,
-	},
-	&cli.IntFlag{
-		Name:    "context-size",
-		EnvVars: []string{"CONTEXT_SIZE"},
-		Value:   512,
-	},
-	&cli.IntFlag{
-		Name:    "threads",
-		EnvVars: []string{"THREADS"},
-		Value:   runtime.NumCPU(),
-	},
-	&cli.Float64Flag{
-		Name:    "temperature",
-		EnvVars: []string{"TEMPERATURE"},
-		Value:   0.95,
-	},
-	&cli.Float64Flag{
-		Name:    "topp",
-		EnvVars: []string{"TOP_P"},
-		Value:   0.85,
-	},
-	&cli.IntFlag{
-		Name:    "topk",
-		EnvVars: []string{"TOP_K"},
-		Value:   20,
-	},
-	&cli.BoolFlag{
-		Name:    "alpaca",
-		EnvVars: []string{"ALPACA"},
-		Value:   true,
-	},
-}
-
 func main() {
+	log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
+
+	path, err := os.Getwd()
+	if err != nil {
+		log.Error().Msgf("error: %s", err.Error())
+		os.Exit(1)
+	}
+
 	app := &cli.App{
-		Name:    "llama-cli",
-		Version: "0.1",
-		Usage:   "llama-cli --model ... --instruction 'What is an alpaca?'",
-		Flags: append(modelFlags,
-			&cli.StringFlag{
-				Name:    "template",
-				EnvVars: []string{"TEMPLATE"},
+		Name:  "LocalAI",
+		Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.",
+		Flags: []cli.Flag{
+			&cli.BoolFlag{
+				Name:    "f16",
+				EnvVars: []string{"F16"},
+			},
+			&cli.BoolFlag{
+				Name:    "debug",
+				EnvVars: []string{"DEBUG"},
+			},
+			&cli.IntFlag{
+				Name:        "threads",
+				DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
+				EnvVars:     []string{"THREADS"},
+				Value:       4,
 			},
 			&cli.StringFlag{
-				Name:    "instruction",
-				EnvVars: []string{"INSTRUCTION"},
+				Name:        "models-path",
+				DefaultText: "Path containing models used for inferencing",
+				EnvVars:     []string{"MODELS_PATH"},
+				Value:       filepath.Join(path, "models"),
 			},
 			&cli.StringFlag{
-				Name:    "input",
-				EnvVars: []string{"INPUT"},
-			}),
-		Description: `Run llama.cpp inference`,
-		UsageText: `
-llama-cli --model ~/ggml-alpaca-7b-q4.bin --instruction "What's an alpaca?"
-
-	An Alpaca (Vicugna pacos) is a domesticated species of South American camelid, related to llamas and originally from Peru but now found throughout much of Andean region. They are bred for their fleeces which can be spun into wool or knitted items such as hats, sweaters, blankets etc
-		
-echo "An Alpaca (Vicugna pacos) is a domesticated species of South American camelid, related to llamas and originally from Peru but now found throughout much of Andean region. They are bred for their fleeces which can be spun into wool or knitted items such as hats, sweaters, blankets etc" | llama-cli --model ~/ggml-alpaca-7b-q4.bin --instruction "Proofread, improving clarity and flow" --input "-"
-
-	An Alpaca (Vicugna pacos) is a domesticated species from South America that's related to llamas. Originating in Peru but now found throughout the Andean region, they are bred for their fleeces which can be spun into wool or knitted items such as hats and sweaters—blankets too!
-`,
-		Copyright: "go-skynet authors",
-		Commands: []*cli.Command{
-			{
-				Flags: modelFlags,
-				Name:  "interactive",
-				Action: func(ctx *cli.Context) error {
-
-					l, err := llamaFromOptions(ctx)
-					if err != nil {
-						fmt.Println("Loading the model failed:", err.Error())
-						os.Exit(1)
-					}
-
-					return startInteractive(l, llama.SetTemperature(ctx.Float64("temperature")),
-						llama.SetTopP(ctx.Float64("topp")),
-						llama.SetTopK(ctx.Int("topk")),
-						llama.SetTokens(ctx.Int("tokens")),
-						llama.SetThreads(ctx.Int("threads")))
-				},
+				Name:        "config-file",
+				DefaultText: "Config file",
+				EnvVars:     []string{"CONFIG_FILE"},
 			},
-			{
-
-				Name: "api",
-				Flags: []cli.Flag{
-					&cli.IntFlag{
-						Name:    "threads",
-						EnvVars: []string{"THREADS"},
-						Value:   runtime.NumCPU(),
-					},
-					&cli.StringFlag{
-						Name:    "model",
-						EnvVars: []string{"MODEL_PATH"},
-					},
-					&cli.StringFlag{
-						Name:    "address",
-						EnvVars: []string{"ADDRESS"},
-						Value:   ":8080",
-					},
-					&cli.BoolFlag{
-						Name:    "alpaca",
-						EnvVars: []string{"ALPACA"},
-						Value:   true,
-					},
-					&cli.IntFlag{
-						Name:    "context-size",
-						EnvVars: []string{"CONTEXT_SIZE"},
-						Value:   512,
-					},
-				},
-				Action: func(ctx *cli.Context) error {
-					l, err := llamaFromOptions(ctx)
-					if err != nil {
-						fmt.Println("Loading the model failed:", err.Error())
-						os.Exit(1)
-					}
-
-					return api(l, ctx.String("address"), ctx.Int("threads"))
-				},
+			&cli.StringFlag{
+				Name:        "address",
+				DefaultText: "Bind address for the API server.",
+				EnvVars:     []string{"ADDRESS"},
+				Value:       ":8080",
+			},
+			&cli.StringFlag{
+				Name:        "image-dir",
+				DefaultText: "Image directory",
+				EnvVars:     []string{"IMAGE_DIR"},
+				Value:       "",
+			},
+			&cli.IntFlag{
+				Name:        "context-size",
+				DefaultText: "Default context size of the model",
+				EnvVars:     []string{"CONTEXT_SIZE"},
+				Value:       512,
+			},
+			&cli.IntFlag{
+				Name:        "upload-limit",
+				DefaultText: "Default upload-limit. MB",
+				EnvVars:     []string{"UPLOAD_LIMIT"},
+				Value:       15,
 			},
 		},
+		Description: `
+LocalAI is a drop-in replacement OpenAI API which runs inference locally.
+
+Some of the models compatible are:
+- Vicuna
+- Koala
+- GPT4ALL
+- GPT4ALL-J
+- Cerebras
+- Alpaca
+- StableLM (ggml quantized)
+
+It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
+`,
+		UsageText: `local-ai [options]`,
+		Copyright: "go-skynet authors",
 		Action: func(ctx *cli.Context) error {
-
-			instruction := ctx.String("instruction")
-			input := ctx.String("input")
-			templ := ctx.String("template")
-
-			promptTemplate := ""
-
-			if input != "" {
-				promptTemplate = nonEmptyInput
-			} else {
-				promptTemplate = emptyInput
-			}
-
-			if templ != "" {
-				dat, err := os.ReadFile(templ)
-				if err != nil {
-					fmt.Printf("Failed reading file: %s", err.Error())
-					os.Exit(1)
-				}
-				promptTemplate = string(dat)
-			}
-
-			if instruction == "-" {
-				dat, err := ioutil.ReadAll(os.Stdin)
-				if err != nil {
-					fmt.Printf("reading stdin failed: %s", err)
-					os.Exit(1)
-				}
-				instruction = string(dat)
-			}
-
-			if input == "-" {
-				dat, err := ioutil.ReadAll(os.Stdin)
-				if err != nil {
-					fmt.Printf("reading stdin failed: %s", err)
-					os.Exit(1)
-				}
-				input = string(dat)
-			}
-
-			str, err := templateString(promptTemplate, struct {
-				Instruction string
-				Input       string
-			}{Instruction: instruction, Input: input})
-
-			if err != nil {
-				fmt.Println("Templating the input failed:", err.Error())
-				os.Exit(1)
-			}
-
-			l, err := llamaFromOptions(ctx)
-			if err != nil {
-				fmt.Println("Loading the model failed:", err.Error())
-				os.Exit(1)
-			}
-
-			res, err := l.Predict(
-				str,
-				llama.SetTemperature(ctx.Float64("temperature")),
-				llama.SetTopP(ctx.Float64("topp")),
-				llama.SetTopK(ctx.Int("topk")),
-				llama.SetTokens(ctx.Int("tokens")),
-				llama.SetThreads(ctx.Int("threads")),
-			)
-			if err != nil {
-				fmt.Printf("predicting failed: %s", err)
-				os.Exit(1)
-			}
-			fmt.Println(res)
-			return nil
+			fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
+			return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("upload-limit"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false, ctx.String("image-dir")).Listen(ctx.String("address"))
 		},
 	}

-	err := app.Run(os.Args)
+	err = app.Run(os.Args)
 	if err != nil {
-		fmt.Println(err)
+		log.Error().Msgf("error: %s", err.Error())
 		os.Exit(1)
 	}
 }
--- a/models/.keep
+++ b/models/.keep
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -0,0 +1,194 @@
+package model
+
+import (
+	"fmt"
+	"path/filepath"
+	"strings"
+
+	rwkv "github.com/donomii/go-rwkv.cpp"
+	whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
+	"github.com/go-skynet/LocalAI/pkg/stablediffusion"
+	bloomz "github.com/go-skynet/bloomz.cpp"
+	bert "github.com/go-skynet/go-bert.cpp"
+	gpt2 "github.com/go-skynet/go-gpt2.cpp"
+	llama "github.com/go-skynet/go-llama.cpp"
+	"github.com/hashicorp/go-multierror"
+	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
+	"github.com/rs/zerolog/log"
+)
+
+const tokenizerSuffix = ".tokenizer.json"
+
+const (
+	LlamaBackend           = "llama"
+	BloomzBackend          = "bloomz"
+	StarcoderBackend       = "starcoder"
+	StableLMBackend        = "stablelm"
+	DollyBackend           = "dolly"
+	RedPajamaBackend       = "redpajama"
+	GPTNeoXBackend         = "gptneox"
+	ReplitBackend          = "replit"
+	Gpt2Backend            = "gpt2"
+	Gpt4AllLlamaBackend    = "gpt4all-llama"
+	Gpt4AllMptBackend      = "gpt4all-mpt"
+	Gpt4AllJBackend        = "gpt4all-j"
+	BertEmbeddingsBackend  = "bert-embeddings"
+	RwkvBackend            = "rwkv"
+	WhisperBackend         = "whisper"
+	StableDiffusionBackend = "stablediffusion"
+)
+
+var backends []string = []string{
+	LlamaBackend,
+	Gpt4AllLlamaBackend,
+	Gpt4AllMptBackend,
+	Gpt4AllJBackend,
+	Gpt2Backend,
+	WhisperBackend,
+	RwkvBackend,
+	BloomzBackend,
+	StableLMBackend,
+	DollyBackend,
+	RedPajamaBackend,
+	ReplitBackend,
+	GPTNeoXBackend,
+	BertEmbeddingsBackend,
+	StarcoderBackend,
+}
+
+var starCoder = func(modelFile string) (interface{}, error) {
+	return gpt2.NewStarcoder(modelFile)
+}
+
+var redPajama = func(modelFile string) (interface{}, error) {
+	return gpt2.NewRedPajama(modelFile)
+}
+
+var dolly = func(modelFile string) (interface{}, error) {
+	return gpt2.NewDolly(modelFile)
+}
+
+var gptNeoX = func(modelFile string) (interface{}, error) {
+	return gpt2.NewGPTNeoX(modelFile)
+}
+
+var replit = func(modelFile string) (interface{}, error) {
+	return gpt2.NewReplit(modelFile)
+}
+
+var stableLM = func(modelFile string) (interface{}, error) {
+	return gpt2.NewStableLM(modelFile)
+}
+
+var bertEmbeddings = func(modelFile string) (interface{}, error) {
+	return bert.New(modelFile)
+}
+
+var bloomzLM = func(modelFile string) (interface{}, error) {
+	return bloomz.New(modelFile)
+}
+var gpt2LM = func(modelFile string) (interface{}, error) {
+	return gpt2.New(modelFile)
+}
+
+var stableDiffusion = func(assetDir string) (interface{}, error) {
+	return stablediffusion.New(assetDir)
+}
+
+var whisperModel = func(modelFile string) (interface{}, error) {
+	return whisper.New(modelFile)
+}
+
+func llamaLM(opts ...llama.ModelOption) func(string) (interface{}, error) {
+	return func(s string) (interface{}, error) {
+		return llama.New(s, opts...)
+	}
+}
+
+func gpt4allLM(opts ...gpt4all.ModelOption) func(string) (interface{}, error) {
+	return func(s string) (interface{}, error) {
+		return gpt4all.New(s, opts...)
+	}
+}
+
+func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) {
+	return func(s string) (interface{}, error) {
+		log.Debug().Msgf("Loading RWKV", s, tokenFile)
+
+		model := rwkv.LoadFiles(s, tokenFile, threads)
+		if model == nil {
+			return nil, fmt.Errorf("could not load model")
+		}
+		return model, nil
+	}
+}
+
+func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
+	log.Debug().Msgf("Loading model %s from %s", backendString, modelFile)
+	switch strings.ToLower(backendString) {
+	case LlamaBackend:
+		return ml.LoadModel(modelFile, llamaLM(llamaOpts...))
+	case BloomzBackend:
+		return ml.LoadModel(modelFile, bloomzLM)
+	case StableLMBackend:
+		return ml.LoadModel(modelFile, stableLM)
+	case DollyBackend:
+		return ml.LoadModel(modelFile, dolly)
+	case RedPajamaBackend:
+		return ml.LoadModel(modelFile, redPajama)
+	case Gpt2Backend:
+		return ml.LoadModel(modelFile, gpt2LM)
+	case GPTNeoXBackend:
+		return ml.LoadModel(modelFile, gptNeoX)
+	case ReplitBackend:
+		return ml.LoadModel(modelFile, replit)
+	case StableDiffusionBackend:
+		return ml.LoadModel(modelFile, stableDiffusion)
+	case StarcoderBackend:
+		return ml.LoadModel(modelFile, starCoder)
+	case Gpt4AllLlamaBackend:
+		return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)))
+	case Gpt4AllMptBackend:
+		return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType)))
+	case Gpt4AllJBackend:
+		return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType)))
+	case BertEmbeddingsBackend:
+		return ml.LoadModel(modelFile, bertEmbeddings)
+	case RwkvBackend:
+		return ml.LoadModel(modelFile, rwkvLM(filepath.Join(ml.ModelPath, modelFile+tokenizerSuffix), threads))
+	case WhisperBackend:
+		return ml.LoadModel(modelFile, whisperModel)
+	default:
+		return nil, fmt.Errorf("backend unsupported: %s", backendString)
+	}
+}
+
+func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (interface{}, error) {
+	log.Debug().Msgf("Loading models greedly")
+
+	ml.mu.Lock()
+	m, exists := ml.models[modelFile]
+	if exists {
+		ml.mu.Unlock()
+		return m, nil
+	}
+	ml.mu.Unlock()
+	var err error
+
+	for _, b := range backends {
+		if b == BloomzBackend || b == WhisperBackend || b == RwkvBackend { // do not autoload bloomz/whisper/rwkv
+			continue
+		}
+		log.Debug().Msgf("[%s] Attempting to load", b)
+		model, modelerr := ml.BackendLoader(b, modelFile, llamaOpts, threads)
+		if modelerr == nil && model != nil {
+			log.Debug().Msgf("[%s] Loads OK", b)
+			return model, nil
+		} else if modelerr != nil {
+			err = multierror.Append(err, modelerr)
+			log.Debug().Msgf("[%s] Fails: %s", b, modelerr.Error())
+		}
+	}
+
+	return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
+}
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`Complete the following sentence: {{.Input}}`