Cleanup gh-pages branch

Signed-off-by: Dimitris Karakasilis <dimitris@spectrocloud.com>
chore(deps): update actions/checkout action to v3 (#82 )
2026-05-24 16:51:44 -04:00 · 2023-04-26 11:55:04 +03:00 · 2023-04-25 07:46:29 +02:00 · 2023-04-25 07:46:14 +02:00 · 2023-04-24 23:42:03 +02:00 · 2023-04-24 18:16:23 +02:00
25 changed files with 2 additions and 1682 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -15,7 +15,7 @@ jobs:
    steps:
      - name: Clone
-        uses: actions/checkout@v1
+        uses: actions/checkout@v3
        with: 
          submodules: true
      - name: Dependencies
@@ -31,7 +31,7 @@ jobs:
    steps:
      - name: Clone
-        uses: actions/checkout@v1
+        uses: actions/checkout@v3
        with: 
          submodules: true
--- a/13
+++ b/13
@@ -1,13 +0,0 @@
 ARG GO_VERSION=1.20
 ARG DEBIAN_VERSION=11
 ARG BUILD_TYPE=
 FROM golang:$GO_VERSION as builder
 WORKDIR /build
 RUN apt-get update && apt-get install -y cmake
 COPY . .
 RUN make build
 FROM debian:$DEBIAN_VERSION
 COPY --from=builder /build/local-ai /usr/bin/local-ai
 ENTRYPOINT [ "/usr/bin/local-ai" ]
--- a/5
+++ b/5
@@ -1,5 +0,0 @@
 VERSION 0.7
 build:
    FROM DOCKERFILE -f Dockerfile .
    SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai
--- a/21
+++ b/21
@@ -1,21 +0,0 @@
 MIT License
 Copyright (c) 2023 go-skynet authors
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/116
+++ b/116
@@ -1,116 +0,0 @@
 GOCMD=go
 GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
 GOLLAMA_VERSION?=llama.cpp-8687c1f
 GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
 GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
 GREEN  := $(shell tput -Txterm setaf 2)
 YELLOW := $(shell tput -Txterm setaf 3)
 WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)
 C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
 LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
 # Use this if you want to set the default behavior
 ifndef BUILD_TYPE
 	BUILD_TYPE:=default
 endif
 ifeq ($(BUILD_TYPE), "generic")
 	GENERIC_PREFIX:=generic-
 else
 	GENERIC_PREFIX:=
 endif
 .PHONY: all test build vendor
 all: help
 ## Build:
 build: prepare ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
 generic-build: ## Build the project using generic
 	BUILD_TYPE="generic" $(MAKE) build
 ## GPT4ALL-J
 go-gpt4all-j:
 	git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
 	cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
 	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
 	@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
 go-gpt4all-j/libgptj.a: go-gpt4all-j
 	$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
 # CEREBRAS GPT
 go-gpt2:
 	git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
 	cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION)
 	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
 	@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
 	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
 	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
 	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
 	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
 	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
 go-gpt2/libgpt2.a: go-gpt2
 	$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
 go-llama:
 	git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
 go-llama/libbinding.a: go-llama
 	$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
 replace:
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
 prepare: go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a replace
 clean: ## Remove build related file
 	rm -fr ./go-llama
 	rm -rf ./go-gpt4all-j
 	rm -rf ./go-gpt2
 	rm -rf $(BINARY_NAME)
 ## Run:
 run: prepare
 	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
 test-models/testmodel:
 	mkdir test-models
 	wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
 test: prepare test-models/testmodel
 	@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v ./...
 ## Help:
 help: ## Show this help.
 	@echo ''
 	@echo 'Usage:'
 	@echo '  ${YELLOW}make${RESET} ${GREEN}<target>${RESET}'
 	@echo ''
 	@echo 'Targets:'
 	@awk 'BEGIN {FS = ":.*?## "} { \
 		if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf "    ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \
 		else if (/^## .*$$/) {printf "  ${CYAN}%s${RESET}\n", substr($$1,4)} \
 		}' $(MAKEFILE_LIST)
--- a/README.md
+++ b/README.md
@@ -1,244 +0,0 @@
 <h1 align="center">
  <br>
  <img height="300" src="https://user-images.githubusercontent.com/2420543/233147843-88697415-6dbf-4368-a862-ab217f9f7342.jpeg"> <br>
    LocalAI
 <br>
 </h1>
 > :warning: This project has been renamed from `llama-cli` to `LocalAI` to reflect the fact that we are focusing on a fast drop-in OpenAI API rather on the CLI interface. We think that there are already many projects that can be used as a CLI interface already, for instance  [llama.cpp](https://github.com/ggerganov/llama.cpp) and [gpt4all](https://github.com/nomic-ai/gpt4all). If you are were using `llama-cli` for CLI interactions and want to keep using it, use older versions or please open up an issue - contributions are welcome!
 LocalAI is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.
 - OpenAI compatible API
 - Supports multiple-models
 - Once loaded the first time, it keep models loaded in memory for faster inference
 - Support for prompt templates
 - Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).
 Discord channel: [Discord](https://discord.gg/uJAeKSAGDy)
 ## Model compatibility
 It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) supports also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all) and [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml).
 Tested with:
 - Vicuna
 - Alpaca
 - [GPT4ALL](https://github.com/nomic-ai/gpt4all)
 - [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin)
 - Koala
 - [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
 It should also be compatible with StableLM and GPTNeoX ggml models (untested)
 Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
 ## Usage
 > `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
 The easiest way to run LocalAI is by using `docker-compose`:
 ```bash
 git clone https://github.com/go-skynet/LocalAI
 cd LocalAI
 # copy your models to models/
 cp your-model.bin models/
 # (optional) Edit the .env file to set things like context size and threads
 # vim .env
 # start with docker-compose
 docker compose up -d --build
 # Now API is accessible at localhost:8080
 curl http://localhost:8080/v1/models
 # {"object":"list","data":[{"id":"your-model.bin","object":"model"}]}
 curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
     "model": "your-model.bin",            
     "prompt": "A long time ago in a galaxy far, far away",
     "temperature": 0.7
   }'
 ```
 ## Helm Chart Installation (run LocalAI in Kubernetes)
 The local-ai Helm chart supports two options for the LocalAI server's models directory:
 1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
    Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
 2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
    First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
    ```bash
    helm install local-ai charts/local-ai -n local-ai --create-namespace
    ```
    Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
    Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
    ```bash
    helm upgrade local-ai -n local-ai charts/local-ai
    ```
    This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.
 ## Prompt templates 
 The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
 <details>
 You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
 ```
 Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
 {{.Input}}
 ### Response:
 ```
 See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for most popular models.
 </details>
 ## API
 `LocalAI` provides an API for running text generation as a service, that follows the OpenAI reference and can be used as a drop-in. The models once loaded the first time will be kept in memory.
 <details>
 Example of starting the API with `docker`:
 ```bash
 docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:latest --models-path /path/to/models --context-size 700 --threads 4
 ```
 And you'll see:
 ```
 ┌───────────────────────────────────────────────────┐ 
 │                   Fiber v2.42.0                   │ 
 │               http://127.0.0.1:8080               │ 
 │       (bound on host 0.0.0.0 and port 8080)       │ 
 │                                                   │ 
 │ Handlers ............. 1  Processes ........... 1 │ 
 │ Prefork ....... Disabled  PID ................. 1 │ 
 └───────────────────────────────────────────────────┘ 
 ```
 You can control the API server options with command line arguments:
 ```
 local-api --models-path <model_path> [--address <address>] [--threads <num_threads>]
 ```
 The API takes takes the following parameters:
 | Parameter    | Environment Variable | Default Value | Description                            |
 | ------------ | -------------------- | ------------- | -------------------------------------- |
 | models-path        | MODELS_PATH           |               | The path where you have models (ending with `.bin`).      |
 | threads      | THREADS              | Number of Physical cores     | The number of threads to use for text generation. |
 | address      | ADDRESS              | :8080         | The address and port to listen on. |
 | context-size | CONTEXT_SIZE         | 512           | Default token context size. |
 Once the server is running, you can start making requests to it using HTTP, using the OpenAI API. 
 </details>
 ### Supported OpenAI API endpoints
 You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create). 
 Following the list of endpoints/parameters supported.
 #### Chat completions
 For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
 ```
 curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
     "model": "ggml-koala-7b-model-q4_0-r2.bin",
     "messages": [{"role": "user", "content": "Say this is a test!"}],
     "temperature": 0.7
   }'
 ```
 Available additional parameters: `top_p`, `top_k`, `max_tokens`
 #### Completions
 For example, to generate a comletion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
 ```
 curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
     "model": "ggml-koala-7b-model-q4_0-r2.bin",
     "prompt": "A long time ago in a galaxy far, far away",
     "temperature": 0.7
   }'
 ```
 Available additional parameters: `top_p`, `top_k`, `max_tokens`
 #### List models
 You can list all the models available with:
 ```
 curl http://localhost:8080/v1/models
 ```
 ## Using other models
 gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
 ```bash
 wget -O tokenizer.model https://huggingface.co/decapoda-research/llama-30b-hf/resolve/main/tokenizer.model
 mkdir models
 cp gpt4all.. models/
 git clone https://gist.github.com/eiz/828bddec6162a023114ce19146cb2b82
 pip install sentencepiece
 python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
 # There will be a new model with the ".tmp" extension, you have to use that one!
 ```
 ### Windows compatibility
 It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
 ### Build locally
 Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
 In order to build the `LocalAI` container image locally you can use `docker`:
 ```
 # build the image
 docker build -t LocalAI .
 docker run LocalAI
 ```
 Or build the binary with `make`:
 ```
 make build
 ```
 ## Short-term roadmap
 - [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)
 - Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
 - Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
 - [x] Multi-model support
 - Have a webUI!
 ## License
 MIT
 ## Acknowledgements
 - [llama.cpp](https://github.com/ggerganov/llama.cpp)
 - https://github.com/tatsu-lab/stanford_alpaca
 - https://github.com/cornelk/llama-go for the initial ideas
 - https://github.com/antimatter15/alpaca.cpp for the light model version (this is compatible and tested only with that checkpoint model!)
--- a/api/api.go
+++ b/api/api.go
@@ -1,423 +0,0 @@
 package api
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"strings"
 	"sync"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	gpt2 "github.com/go-skynet/go-gpt2.cpp"
 	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
 	llama "github.com/go-skynet/go-llama.cpp"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
 	"github.com/gofiber/fiber/v2/middleware/recover"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )
 type OpenAIResponse struct {
 	Created int      `json:"created,omitempty"`
 	Object  string   `json:"chat.completion,omitempty"`
 	ID      string   `json:"id,omitempty"`
 	Model   string   `json:"model,omitempty"`
 	Choices []Choice `json:"choices,omitempty"`
 }
 type Choice struct {
 	Index        int      `json:"index,omitempty"`
 	FinishReason string   `json:"finish_reason,omitempty"`
 	Message      *Message `json:"message,omitempty"`
 	Text         string   `json:"text,omitempty"`
 }
 type Message struct {
 	Role    string `json:"role,omitempty"`
 	Content string `json:"content,omitempty"`
 }
 type OpenAIModel struct {
 	ID     string `json:"id"`
 	Object string `json:"object"`
 }
 type OpenAIRequest struct {
 	Model string `json:"model"`
 	// Prompt is read only by completion API calls
 	Prompt string `json:"prompt"`
 	Stop string `json:"stop"`
 	// Messages is read only by chat/completion API calls
 	Messages []Message `json:"messages"`
 	Echo bool `json:"echo"`
 	// Common options between all the API calls
 	TopP        float64 `json:"top_p"`
 	TopK        int     `json:"top_k"`
 	Temperature float64 `json:"temperature"`
 	Maxtokens   int     `json:"max_tokens"`
 	N int `json:"n"`
 	// Custom parameters - not present in the OpenAI API
 	Batch         int     `json:"batch"`
 	F16           bool    `json:"f16kv"`
 	IgnoreEOS     bool    `json:"ignore_eos"`
 	RepeatPenalty float64 `json:"repeat_penalty"`
 	Keep          int     `json:"n_keep"`
 	Seed int `json:"seed"`
 }
 // https://platform.openai.com/docs/api-reference/completions
 func openAIEndpoint(chat, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		var err error
 		var model *llama.LLama
 		var gptModel *gptj.GPTJ
 		var gpt2Model *gpt2.GPT2
 		var stableLMModel *gpt2.StableLM
 		input := new(OpenAIRequest)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
 		modelFile := input.Model
 		received, _ := json.Marshal(input)
 		log.Debug().Msgf("Request received: %s", string(received))
 		// Set model from bearer token, if available
 		bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
 		bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
 		// If no model was specified, take the first available
 		if modelFile == "" {
 			models, _ := loader.ListModels()
 			if len(models) > 0 {
 				modelFile = models[0]
 				log.Debug().Msgf("No model specified, using: %s", modelFile)
 			}
 		}
 		// If no model is found or specified, we bail out
 		if modelFile == "" && !bearerExists {
 			return fmt.Errorf("no model specified")
 		}
 		// If a model is found in bearer token takes precedence
 		if bearerExists {
 			log.Debug().Msgf("Using model from bearer token: %s", bearer)
 			modelFile = bearer
 		}
 		// Try to load the model
 		var llamaerr, gpt2err, gptjerr, stableerr error
 		llamaOpts := []llama.ModelOption{}
 		if ctx != 0 {
 			llamaOpts = append(llamaOpts, llama.SetContext(ctx))
 		}
 		if f16 {
 			llamaOpts = append(llamaOpts, llama.EnableF16Memory)
 		}
 		// TODO: this is ugly, better identifying the model somehow! however, it is a good stab for a first implementation..
 		model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...)
 		if llamaerr != nil {
 			gptModel, gptjerr = loader.LoadGPTJModel(modelFile)
 			if gptjerr != nil {
 				gpt2Model, gpt2err = loader.LoadGPT2Model(modelFile)
 				if gpt2err != nil {
 					stableLMModel, stableerr = loader.LoadStableLMModel(modelFile)
 					if stableerr != nil {
 						return fmt.Errorf("llama: %s gpt: %s gpt2: %s stableLM: %s", llamaerr.Error(), gptjerr.Error(), gpt2err.Error(), stableerr.Error()) // llama failed first, so we want to catch both errors
 					}
 				}
 			}
 		}
 		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 		mutexMap.Lock()
 		l, ok := mutexes[modelFile]
 		if !ok {
 			m := &sync.Mutex{}
 			mutexes[modelFile] = m
 			l = m
 		}
 		mutexMap.Unlock()
 		l.Lock()
 		defer l.Unlock()
 		// Set the parameters for the language model prediction
 		topP := input.TopP
 		if topP == 0 {
 			topP = 0.7
 		}
 		topK := input.TopK
 		if topK == 0 {
 			topK = 80
 		}
 		temperature := input.Temperature
 		if temperature == 0 {
 			temperature = 0.9
 		}
 		tokens := input.Maxtokens
 		if tokens == 0 {
 			tokens = 512
 		}
 		predInput := input.Prompt
 		if chat {
 			mess := []string{}
 			// TODO: encode roles
 			for _, i := range input.Messages {
 				mess = append(mess, i.Content)
 			}
 			predInput = strings.Join(mess, "\n")
 		}
 		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 		templatedInput, err := loader.TemplatePrefix(modelFile, struct {
 			Input string
 		}{Input: predInput})
 		if err == nil {
 			predInput = templatedInput
 			log.Debug().Msgf("Template found, input modified to: %s", predInput)
 		}
 		result := []Choice{}
 		n := input.N
 		if input.N == 0 {
 			n = 1
 		}
 		var predFunc func() (string, error)
 		switch {
 		case stableLMModel != nil:
 			predFunc = func() (string, error) {
 				// Generate the prediction using the language model
 				predictOptions := []gpt2.PredictOption{
 					gpt2.SetTemperature(temperature),
 					gpt2.SetTopP(topP),
 					gpt2.SetTopK(topK),
 					gpt2.SetTokens(tokens),
 					gpt2.SetThreads(threads),
 				}
 				if input.Batch != 0 {
 					predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
 				}
 				if input.Seed != 0 {
 					predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
 				}
 				return stableLMModel.Predict(
 					predInput,
 					predictOptions...,
 				)
 			}
 		case gpt2Model != nil:
 			predFunc = func() (string, error) {
 				// Generate the prediction using the language model
 				predictOptions := []gpt2.PredictOption{
 					gpt2.SetTemperature(temperature),
 					gpt2.SetTopP(topP),
 					gpt2.SetTopK(topK),
 					gpt2.SetTokens(tokens),
 					gpt2.SetThreads(threads),
 				}
 				if input.Batch != 0 {
 					predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
 				}
 				if input.Seed != 0 {
 					predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
 				}
 				return gpt2Model.Predict(
 					predInput,
 					predictOptions...,
 				)
 			}
 		case gptModel != nil:
 			predFunc = func() (string, error) {
 				// Generate the prediction using the language model
 				predictOptions := []gptj.PredictOption{
 					gptj.SetTemperature(temperature),
 					gptj.SetTopP(topP),
 					gptj.SetTopK(topK),
 					gptj.SetTokens(tokens),
 					gptj.SetThreads(threads),
 				}
 				if input.Batch != 0 {
 					predictOptions = append(predictOptions, gptj.SetBatch(input.Batch))
 				}
 				if input.Seed != 0 {
 					predictOptions = append(predictOptions, gptj.SetSeed(input.Seed))
 				}
 				return gptModel.Predict(
 					predInput,
 					predictOptions...,
 				)
 			}
 		case model != nil:
 			predFunc = func() (string, error) {
 				// Generate the prediction using the language model
 				predictOptions := []llama.PredictOption{
 					llama.SetTemperature(temperature),
 					llama.SetTopP(topP),
 					llama.SetTopK(topK),
 					llama.SetTokens(tokens),
 					llama.SetThreads(threads),
 				}
 				if debug {
 					predictOptions = append(predictOptions, llama.Debug)
 				}
 				if input.Stop != "" {
 					predictOptions = append(predictOptions, llama.SetStopWords(input.Stop))
 				}
 				if input.RepeatPenalty != 0 {
 					predictOptions = append(predictOptions, llama.SetPenalty(input.RepeatPenalty))
 				}
 				if input.Keep != 0 {
 					predictOptions = append(predictOptions, llama.SetNKeep(input.Keep))
 				}
 				if input.Batch != 0 {
 					predictOptions = append(predictOptions, llama.SetBatch(input.Batch))
 				}
 				if input.F16 {
 					predictOptions = append(predictOptions, llama.EnableF16KV)
 				}
 				if input.IgnoreEOS {
 					predictOptions = append(predictOptions, llama.IgnoreEOS)
 				}
 				if input.Seed != 0 {
 					predictOptions = append(predictOptions, llama.SetSeed(input.Seed))
 				}
 				return model.Predict(
 					predInput,
 					predictOptions...,
 				)
 			}
 		}
 		for i := 0; i < n; i++ {
 			prediction, err := predFunc()
 			if err != nil {
 				return err
 			}
 			if input.Echo {
 				prediction = predInput + prediction
 			}
 			if chat {
 				result = append(result, Choice{Message: &Message{Role: "assistant", Content: prediction}})
 			} else {
 				result = append(result, Choice{Text: prediction})
 			}
 		}
 		jsonResult, _ := json.Marshal(result)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(OpenAIResponse{
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 		})
 	}
 }
 func listModels(loader *model.ModelLoader) func(ctx *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		models, err := loader.ListModels()
 		if err != nil {
 			return err
 		}
 		dataModels := []OpenAIModel{}
 		for _, m := range models {
 			dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
 		}
 		return c.JSON(struct {
 			Object string        `json:"object"`
 			Data   []OpenAIModel `json:"data"`
 		}{
 			Object: "list",
 			Data:   dataModels,
 		})
 	}
 }
 func App(loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
 	zerolog.SetGlobalLevel(zerolog.InfoLevel)
 	if debug {
 		zerolog.SetGlobalLevel(zerolog.DebugLevel)
 	}
 	// Return errors as JSON responses
 	app := fiber.New(fiber.Config{
 		DisableStartupMessage: disableMessage,
 		// Override default error handler
 		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
 			// Status code defaults to 500
 			code := fiber.StatusInternalServerError
 			// Retrieve the custom status code if it's a *fiber.Error
 			var e *fiber.Error
 			if errors.As(err, &e) {
 				code = e.Code
 			}
 			// Send custom error page
 			return ctx.Status(code).JSON(struct {
 				Error string `json:"error"`
 			}{Error: err.Error()})
 		},
 	})
 	// Default middleware config
 	app.Use(recover.New())
 	app.Use(cors.New())
 	// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 	mu := map[string]*sync.Mutex{}
 	var mumutex = &sync.Mutex{}
 	// openAI compatible API endpoint
 	app.Post("/v1/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
 	app.Post("/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
 	app.Post("/v1/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
 	app.Post("/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
 	app.Get("/v1/models", listModels(loader))
 	app.Get("/models", listModels(loader))
 	return app
 }
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -1,53 +0,0 @@
 package api_test
 import (
 	"context"
 	"os"
 	. "github.com/go-skynet/LocalAI/api"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	"github.com/sashabaranov/go-openai"
 )
 var _ = Describe("API test", func() {
 	var app *fiber.App
 	var modelLoader *model.ModelLoader
 	var client *openai.Client
 	Context("API query", func() {
 		BeforeEach(func() {
 			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
 			app = App(modelLoader, 1, 512, false, false, true)
 			go app.Listen("127.0.0.1:9090")
 			defaultConfig := openai.DefaultConfig("")
 			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
 			// Wait for API to be ready
 			client = openai.NewClientWithConfig(defaultConfig)
 			Eventually(func() error {
 				_, err := client.ListModels(context.TODO())
 				return err
 			}, "2m").ShouldNot(HaveOccurred())
 		})
 		AfterEach(func() {
 			app.Shutdown()
 		})
 		It("returns the models list", func() {
 			models, err := client.ListModels(context.TODO())
 			Expect(err).ToNot(HaveOccurred())
 			Expect(len(models.Models)).To(Equal(1))
 			Expect(models.Models[0].ID).To(Equal("testmodel"))
 		})
 		It("can generate completions", func() {
 			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
 			Expect(err).ToNot(HaveOccurred())
 			Expect(len(resp.Choices)).To(Equal(1))
 			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
 		})
 	})
 })
--- a/api/apt_suite_test.go
+++ b/api/apt_suite_test.go
@@ -1,13 +0,0 @@
 package api_test
 import (
 	"testing"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
 func TestLocalAI(t *testing.T) {
 	RegisterFailHandler(Fail)
 	RunSpecs(t, "LocalAI test suite")
 }
--- a/charts/local-ai/Chart.yaml
+++ b/charts/local-ai/Chart.yaml
@@ -1,6 +0,0 @@
 apiVersion: v2
 appVersion: 0.1.0
 description: A Helm chart for LocalAI
 name: local-ai
 type: application
 version: 1.0.0
--- a/charts/local-ai/templates/_helpers.tpl
+++ b/charts/local-ai/templates/_helpers.tpl
@@ -1,44 +0,0 @@
 {{/*
 Expand the name of the chart.
 */}}
 {{- define "local-ai.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
 {{- define "local-ai.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- $name := default .Chart.Name .Values.nameOverride }}
 {{- if contains $name .Release.Name }}
 {{- .Release.Name | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{- end }}
 {{- end }}
 {{/*
 Create chart name and version as used by the chart label.
 */}}
 {{- define "local-ai.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Common labels
 */}}
 {{- define "local-ai.labels" -}}
 helm.sh/chart: {{ include "local-ai.chart" . }}
 app.kubernetes.io/name: {{ include "local-ai.name" . }}
 app.kubernetes.io/instance: "{{ .Release.Name }}"
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 {{- end }}
--- a/charts/local-ai/templates/data-volume.yaml
+++ b/charts/local-ai/templates/data-volume.yaml
@@ -1,39 +0,0 @@
 {{- if .Values.dataVolume.enabled }}
 apiVersion: cdi.kubevirt.io/v1beta1
 kind: DataVolume
 metadata:
  name: {{ template "local-ai.fullname" . }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "local-ai.labels" . | nindent 4 }}
 spec:
  contentType: archive
  source:
    {{ .Values.dataVolume.source.type }}:
      url: {{ .Values.dataVolume.source.url }}
      secretRef: {{ template "local-ai.fullname" . }}
      {{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
      secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
      {{- end }}
      {{- if .Values.dataVolume.source.caCertConfigMap }}
      caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
      {{- end }}
  pvc:
    accessModes: {{ .Values.dataVolume.pvc.accessModes }}
    resources:
      requests:
        storage: {{ .Values.dataVolume.pvc.size }}
 ---
 {{- if .Values.dataVolume.secret.enabled }}
 apiVersion: v1
 kind: Secret
 metadata:
  name: {{ template "local-ai.fullname" . }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "local-ai.labels" . | nindent 4 }}
 data:
  accessKeyId: {{ .Values.dataVolume.secret.username }}
  secretKey: {{ .Values.dataVolume.secret.password }}
 {{- end }}
 {{- end }}
--- a/charts/local-ai/templates/deployment.yaml
+++ b/charts/local-ai/templates/deployment.yaml
@@ -1,39 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ template "local-ai.fullname" . }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "local-ai.labels" . | nindent 4 }}
 spec:
  selector:
    matchLabels:
      app.kubernetes.io/name: {{ include "local-ai.name" . }}
      app.kubernetes.io/instance: {{ .Release.Name }}
  replicas: 1
  template:
    metadata:
      name: {{ template "local-ai.fullname" . }}
      labels:
        app.kubernetes.io/name: {{ include "local-ai.name" . }}
        app.kubernetes.io/instance: {{ .Release.Name }}
    spec:
      containers:
        - name: {{ template "local-ai.fullname" . }}
          image: {{ .Values.deployment.image }}
          env:
          - name: THREADS
            value: {{ .Values.deployment.env.threads | quote }}
          - name: CONTEXT_SIZE
            value: {{ .Values.deployment.env.contextSize | quote }}
          - name: MODELS_PATH
            value: {{ .Values.deployment.env.modelsPath }}
 {{- if .Values.deployment.volume.enabled }}
          volumeMounts:
          - mountPath: {{ .Values.deployment.env.modelsPath }}
            name: models
      volumes:
      - name: models
        persistentVolumeClaim:
          claimName: {{ template "local-ai.fullname" . }}
 {{- end }}
--- a/charts/local-ai/templates/service.yaml
+++ b/charts/local-ai/templates/service.yaml
@@ -1,19 +0,0 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ template "local-ai.fullname" . }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "local-ai.labels" . | nindent 4 }}
 {{- if .Values.service.annotations }}
  annotations:
  {{ toYaml .Values.service.annotations | indent 4 }}
 {{- end }}
 spec:
  selector:
    app.kubernetes.io/name: {{ include "local-ai.name" . }}
  type: "{{ .Values.service.type }}"
  ports:
    - protocol: TCP
      port: 8080
      targetPort: 8080
--- a/charts/local-ai/values.yaml
+++ b/charts/local-ai/values.yaml
@@ -1,38 +0,0 @@
 deployment:
  image: quay.io/go-skynet/local-ai:latest
  env:
    threads: 14
    contextSize: 512
    modelsPath: "/models"
  volume:
    enabled: false
 service:
  type: ClusterIP
  annotations: {}
  # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
  # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
 # Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
 # (requires https://github.com/kubevirt/containerized-data-importer)
 dataVolume:
  enabled: false
  source:
    type: "http" # Source type. One of: [ http | s3 ]
    url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
    # CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
    # and a base64 encoded pem certificate
    caCertConfigMap: ""
    # SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
    # that may include sensitive information. Only applicable for the http source type.
    secretExtraHeaders: []
  pvc:
    accessModes:
    - ReadWriteOnce
    size: 5Gi
  secret:
    enabled: false
    username: "" # base64 encoded
    password: "" # base64 encoded
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -1,15 +0,0 @@
 version: '3.6'
 services:
  api:
    image: quay.io/go-skynet/local-ai:latest
    build:
      context: .
      dockerfile: Dockerfile
    ports:
      - 8080:8080
    env_file:
      - .env
    volumes:
      - ./models:/models:cached
    command: ["/usr/bin/local-ai" ]
--- a/go.mod
+++ b/go.mod
@@ -1,54 +0,0 @@
 module github.com/go-skynet/LocalAI
 go 1.19
 require (
 	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4
 	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94
 	github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead
 	github.com/gofiber/fiber/v2 v2.42.0
 	github.com/jaypipes/ghw v0.10.0
 	github.com/onsi/ginkgo/v2 v2.9.2
 	github.com/onsi/gomega v1.27.6
 	github.com/rs/zerolog v1.29.1
 	github.com/sashabaranov/go-openai v1.9.0
 	github.com/urfave/cli/v2 v2.25.0
 )
 require (
 	github.com/StackExchange/wmi v1.2.1 // indirect
 	github.com/andybalholm/brotli v1.0.4 // indirect
 	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
 	github.com/ghodss/yaml v1.0.0 // indirect
 	github.com/go-logr/logr v1.2.3 // indirect
 	github.com/go-ole/go-ole v1.2.6 // indirect
 	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
 	github.com/google/go-cmp v0.5.9 // indirect
 	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
 	github.com/google/uuid v1.3.0 // indirect
 	github.com/jaypipes/pcidb v1.0.0 // indirect
 	github.com/klauspost/compress v1.15.9 // indirect
 	github.com/kr/text v0.2.0 // indirect
 	github.com/mattn/go-colorable v0.1.13 // indirect
 	github.com/mattn/go-isatty v0.0.17 // indirect
 	github.com/mattn/go-runewidth v0.0.14 // indirect
 	github.com/mitchellh/go-homedir v1.1.0 // indirect
 	github.com/philhofer/fwd v1.1.1 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
 	github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
 	github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d // indirect
 	github.com/tinylib/msgp v1.1.6 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
 	github.com/valyala/fasthttp v1.44.0 // indirect
 	github.com/valyala/tcplisten v1.0.0 // indirect
 	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
 	golang.org/x/net v0.8.0 // indirect
 	golang.org/x/sys v0.6.0 // indirect
 	golang.org/x/text v0.8.0 // indirect
 	golang.org/x/tools v0.7.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	howett.net/plist v1.0.0 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -1,155 +0,0 @@
 github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
 github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
 github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
 github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
 github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
 github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
 github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
 github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4 h1:GkGuqnhDFKlCsT6Bo8sdY00A7rFXCzfU1nBOSS4ZnYM=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94 h1:rtrrMvlIq+g0/ltXjDdLeNtz0uc4wJ4Qs15GFU4ba4c=
 github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead h1:C+lcH1srw+c0qPDx1WF8zjGiiOqoPxVICt7bI1sj5cM=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
 github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
 github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
 github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
 github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8=
 github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk=
 github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
 github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
 github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
 github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
 github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
 github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
 github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
 github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
 github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
 github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
 github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
 github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
 github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
 github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
 github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
 github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
 github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
 github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
 github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sashabaranov/go-openai v1.9.0 h1:NoiO++IISxxJ1pRc0n7uZvMGMake0G+FJ1XPwXtprsA=
 github.com/sashabaranov/go-openai v1.9.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
 github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
 github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
 github.com/urfave/cli/v2 v2.25.0/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasthttp v1.44.0 h1:R+gLUhldIsfg1HokMuQjdQ5bh9nuXHPIfvkYUu9eR5Q=
 github.com/valyala/fasthttp v1.44.0/go.mod h1:f6VbjjoI3z1NDOZOv17o6RvtRSWxC77seBFc2uWtgiY=
 github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
 github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
 golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
 golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
 golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
 golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
 golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
 gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
 howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
--- a/main.go
+++ b/main.go
@@ -1,92 +0,0 @@
 package main
 import (
 	"os"
 	api "github.com/go-skynet/LocalAI/api"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/jaypipes/ghw"
 	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 	"github.com/urfave/cli/v2"
 )
 func main() {
 	log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
 	path, err := os.Getwd()
 	if err != nil {
 		log.Error().Msgf("error: %s", err.Error())
 		os.Exit(1)
 	}
 	threads := 4
 	cpu, err := ghw.CPU()
 	if err == nil {
 		threads = int(cpu.TotalCores)
 	}
 	app := &cli.App{
 		Name:  "LocalAI",
 		Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.",
 		Flags: []cli.Flag{
 			&cli.BoolFlag{
 				Name:    "f16",
 				EnvVars: []string{"F16"},
 			},
 			&cli.BoolFlag{
 				Name:    "debug",
 				EnvVars: []string{"DEBUG"},
 			},
 			&cli.IntFlag{
 				Name:        "threads",
 				DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
 				EnvVars:     []string{"THREADS"},
 				Value:       threads,
 			},
 			&cli.StringFlag{
 				Name:        "models-path",
 				DefaultText: "Path containing models used for inferencing",
 				EnvVars:     []string{"MODELS_PATH"},
 				Value:       path,
 			},
 			&cli.StringFlag{
 				Name:        "address",
 				DefaultText: "Bind address for the API server.",
 				EnvVars:     []string{"ADDRESS"},
 				Value:       ":8080",
 			},
 			&cli.IntFlag{
 				Name:        "context-size",
 				DefaultText: "Default context size of the model",
 				EnvVars:     []string{"CONTEXT_SIZE"},
 				Value:       512,
 			},
 		},
 		Description: `
 LocalAI is a drop-in replacement OpenAI API which runs inference locally.
 Some of the models compatible are:
 - Vicuna
 - Koala
 - GPT4ALL
 - GPT4ALL-J
 - Cerebras
 - Alpaca
 - StableLM (ggml quantized)
 It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
 `,
 		UsageText: `local-ai [options]`,
 		Copyright: "go-skynet authors",
 		Action: func(ctx *cli.Context) error {
 			return api.App(model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
 		},
 	}
 	err = app.Run(os.Args)
 	if err != nil {
 		log.Error().Msgf("error: %s", err.Error())
 		os.Exit(1)
 	}
 }
--- a/models/.keep
+++ b/models/.keep
--- a/pkg/model/loader.go
+++ b/pkg/model/loader.go
@@ -1,274 +0,0 @@
 package model
 import (
 	"bytes"
 	"fmt"
 	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
 	"text/template"
 	"github.com/rs/zerolog/log"
 	gpt2 "github.com/go-skynet/go-gpt2.cpp"
 	gptj "github.com/go-skynet/go-gpt4all-j.cpp"
 	llama "github.com/go-skynet/go-llama.cpp"
 )
 type ModelLoader struct {
 	modelPath string
 	mu        sync.Mutex
 	models            map[string]*llama.LLama
 	gptmodels         map[string]*gptj.GPTJ
 	gpt2models        map[string]*gpt2.GPT2
 	gptstablelmmodels map[string]*gpt2.StableLM
 	promptsTemplates map[string]*template.Template
 }
 func NewModelLoader(modelPath string) *ModelLoader {
 	return &ModelLoader{
 		modelPath:         modelPath,
 		gpt2models:        make(map[string]*gpt2.GPT2),
 		gptmodels:         make(map[string]*gptj.GPTJ),
 		gptstablelmmodels: make(map[string]*gpt2.StableLM),
 		models:            make(map[string]*llama.LLama),
 		promptsTemplates:  make(map[string]*template.Template),
 	}
 }
 func (ml *ModelLoader) ExistsInModelPath(s string) bool {
 	_, err := os.Stat(filepath.Join(ml.modelPath, s))
 	return err == nil
 }
 func (ml *ModelLoader) ListModels() ([]string, error) {
 	files, err := ioutil.ReadDir(ml.modelPath)
 	if err != nil {
 		return []string{}, err
 	}
 	models := []string{}
 	for _, file := range files {
 		// Skip templates, YAML and .keep files
 		if strings.HasSuffix(file.Name(), ".tmpl") || strings.HasSuffix(file.Name(), ".keep") || strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") {
 			continue
 		}
 		models = append(models, file.Name())
 	}
 	return models, nil
 }
 func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string, error) {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()
 	m, ok := ml.promptsTemplates[modelName]
 	if !ok {
 		return "", fmt.Errorf("no prompt template available")
 	}
 	var buf bytes.Buffer
 	if err := m.Execute(&buf, in); err != nil {
 		return "", err
 	}
 	return buf.String(), nil
 }
 func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
 	// Check if the template was already loaded
 	if _, ok := ml.promptsTemplates[modelName]; ok {
 		return nil
 	}
 	// Check if the model path exists
 	// skip any error here - we run anyway if a template is not exist
 	modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName)
 	if !ml.ExistsInModelPath(modelTemplateFile) {
 		return nil
 	}
 	dat, err := os.ReadFile(filepath.Join(ml.modelPath, modelTemplateFile))
 	if err != nil {
 		return err
 	}
 	// Parse the template
 	tmpl, err := template.New("prompt").Parse(string(dat))
 	if err != nil {
 		return err
 	}
 	ml.promptsTemplates[modelName] = tmpl
 	return nil
 }
 func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()
 	// Check if we already have a loaded model
 	if !ml.ExistsInModelPath(modelName) {
 		return nil, fmt.Errorf("model does not exist")
 	}
 	if m, ok := ml.gptstablelmmodels[modelName]; ok {
 		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
 		return m, nil
 	}
 	// Load the model and keep it in memory for later use
 	modelFile := filepath.Join(ml.modelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
 	model, err := gpt2.NewStableLM(modelFile)
 	if err != nil {
 		return nil, err
 	}
 	// If there is a prompt template, load it
 	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
 		return nil, err
 	}
 	ml.gptstablelmmodels[modelName] = model
 	return model, err
 }
 func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()
 	// Check if we already have a loaded model
 	if !ml.ExistsInModelPath(modelName) {
 		return nil, fmt.Errorf("model does not exist")
 	}
 	if m, ok := ml.gpt2models[modelName]; ok {
 		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
 		return m, nil
 	}
 	// TODO: This needs refactoring, it's really bad to have it in here
 	// Check if we have a GPTStable model loaded instead - if we do we return an error so the API tries with StableLM
 	if _, ok := ml.gptstablelmmodels[modelName]; ok {
 		log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
 		return nil, fmt.Errorf("this model is a GPTStableLM one")
 	}
 	// Load the model and keep it in memory for later use
 	modelFile := filepath.Join(ml.modelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
 	model, err := gpt2.New(modelFile)
 	if err != nil {
 		return nil, err
 	}
 	// If there is a prompt template, load it
 	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
 		return nil, err
 	}
 	ml.gpt2models[modelName] = model
 	return model, err
 }
 func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()
 	// Check if we already have a loaded model
 	if !ml.ExistsInModelPath(modelName) {
 		return nil, fmt.Errorf("model does not exist")
 	}
 	if m, ok := ml.gptmodels[modelName]; ok {
 		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
 		return m, nil
 	}
 	// TODO: This needs refactoring, it's really bad to have it in here
 	// Check if we have a GPT2 model loaded instead - if we do we return an error so the API tries with GPT2
 	if _, ok := ml.gpt2models[modelName]; ok {
 		log.Debug().Msgf("Model is GPT2: %s", modelName)
 		return nil, fmt.Errorf("this model is a GPT2 one")
 	}
 	if _, ok := ml.gptstablelmmodels[modelName]; ok {
 		log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
 		return nil, fmt.Errorf("this model is a GPTStableLM one")
 	}
 	// Load the model and keep it in memory for later use
 	modelFile := filepath.Join(ml.modelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
 	model, err := gptj.New(modelFile)
 	if err != nil {
 		return nil, err
 	}
 	// If there is a prompt template, load it
 	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
 		return nil, err
 	}
 	ml.gptmodels[modelName] = model
 	return model, err
 }
 func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
 	ml.mu.Lock()
 	defer ml.mu.Unlock()
 	log.Debug().Msgf("Loading model name: %s", modelName)
 	// Check if we already have a loaded model
 	if !ml.ExistsInModelPath(modelName) {
 		return nil, fmt.Errorf("model does not exist")
 	}
 	if m, ok := ml.models[modelName]; ok {
 		log.Debug().Msgf("Model already loaded in memory: %s", modelName)
 		return m, nil
 	}
 	// TODO: This needs refactoring, it's really bad to have it in here
 	// Check if we have a GPTJ model loaded instead - if we do we return an error so the API tries with GPTJ
 	if _, ok := ml.gptmodels[modelName]; ok {
 		log.Debug().Msgf("Model is GPTJ: %s", modelName)
 		return nil, fmt.Errorf("this model is a GPTJ one")
 	}
 	if _, ok := ml.gpt2models[modelName]; ok {
 		log.Debug().Msgf("Model is GPT2: %s", modelName)
 		return nil, fmt.Errorf("this model is a GPT2 one")
 	}
 	if _, ok := ml.gptstablelmmodels[modelName]; ok {
 		log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
 		return nil, fmt.Errorf("this model is a GPTStableLM one")
 	}
 	// Load the model and keep it in memory for later use
 	modelFile := filepath.Join(ml.modelPath, modelName)
 	log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
 	model, err := llama.New(modelFile, opts...)
 	if err != nil {
 		return nil, err
 	}
 	// If there is a prompt template, load it
 	if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
 		return nil, err
 	}
 	ml.models[modelName] = model
 	return model, err
 }
--- a/prompt-templates/alpaca.tmpl
+++ b/prompt-templates/alpaca.tmpl
@@ -1,6 +0,0 @@
 Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
 {{.Input}}
 ### Response:
--- a/prompt-templates/ggml-gpt4all-j.tmpl
+++ b/prompt-templates/ggml-gpt4all-j.tmpl
@@ -1,4 +0,0 @@
 The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
 ### Prompt:
 {{.Input}}
 ### Response:
--- a/prompt-templates/koala.tmpl
+++ b/prompt-templates/koala.tmpl
@@ -1 +0,0 @@
 BEGINNING OF CONVERSATION: USER: {{.Input}} GPT:
--- a/prompt-templates/vicuna.tmpl
+++ b/prompt-templates/vicuna.tmpl
@@ -1,6 +0,0 @@
 Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
 {{.Input}}
 ### Response:
Author	SHA1	Message	Date
Mauro Morales	eb137c8a84	Cleanup gh-pages branch Signed-off-by: Dimitris Karakasilis <dimitris@spectrocloud.com>	2023-04-26 11:55:04 +03:00
renovate[bot]	4e2061636e	chore(deps): update actions/checkout action to v3 (#82 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-25 07:46:29 +02:00
renovate[bot]	e3ef171968	fix(deps): update module github.com/gofiber/fiber/v2 to v2.44.0 (#81 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-25 07:46:14 +02:00
Ettore Di Giacinto	12d83a4184	feat: Return OpenAI errors and update docs (#80 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-24 23:42:03 +02:00
renovate[bot]	045412e8dd	fix(deps): update module github.com/urfave/cli/v2 to v2.25.1 (#78 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 18:16:23 +02:00
renovate[bot]	9896a9a58b	fix(deps): update github.com/go-skynet/go-llama.cpp digest to e45cebe (#77 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 18:16:10 +02:00
Ettore Di Giacinto	b9011bda59	feat: automatic updates with renovate, docs updates (#76 )	2023-04-24 18:10:58 +02:00
Ettore Di Giacinto	2b2f5fa36a	feat: update llama.cpp (#72 )	2023-04-24 14:15:49 +02:00
renovate[bot]	43c557dc5c	fix(deps): update github.com/go-skynet/go-gpt4all-j.cpp digest to 1f7bff5 (#74 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 14:14:21 +02:00
renovate[bot]	7abb2c9bd7	fix(deps): update github.com/go-skynet/go-gpt2.cpp digest to 245a5bf (#73 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 14:13:04 +02:00
renovate[bot]	7a9ea4480a	Configure Renovate (#71 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-04-24 14:11:39 +02:00
Vladimir Malyutin	31bcc558de	Update README.md (#62 )	2023-04-22 14:42:30 +02:00
		`@@ -1 +0,0 @@`
			`BEGINNING OF CONVERSATION: USER: {{.Input}} GPT:`