fix: make MacOS builds work (#61 )

feature: add devcontainer for live debugging (#60 )
docs: add Discord channel link (#59 )
2026-02-03 11:13:31 -05:00 · 2023-04-22 11:05:23 +02:00 · 2023-04-22 01:20:03 +02:00 · 2023-04-22 00:46:17 +02:00 · 2023-04-22 00:44:52 +02:00 · 2023-04-22 00:29:32 +02:00
25 changed files with 569 additions and 176 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,3 @@
+ARG GO_VERSION=1.20
+FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
+RUN apt-get update && apt-get install -y cmake
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,46 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
+{
+	"name": "Existing Docker Compose (Extend)",
+
+	// Update the 'dockerComposeFile' list if you have more compose files or use different names.
+	// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
+	"dockerComposeFile": [
+		"../docker-compose.yaml",
+		"docker-compose.yml"
+	],
+
+	// The 'service' property is the name of the service for the container that VS Code should
+	// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
+	"service": "api",
+
+	// The optional 'workspaceFolder' property is the path VS Code should open by default when
+	// connected. This is typically a file mount in .devcontainer/docker-compose.yml
+	"workspaceFolder": "/workspace",
+
+	"features": {
+		"ghcr.io/devcontainers/features/go:1": {},
+		"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
+	},
+
+	// Features to add to the dev container. More info: https://containers.dev/features.
+	// "features": {},
+
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+
+	// Uncomment the next line if you want start specific services in your Docker Compose config.
+	// "runServices": [],
+
+	// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
+	// "shutdownAction": "none",
+
+	// Uncomment the next line to run commands after the container is created.
+	"postCreateCommand": "make prepare"
+
+	// Configure tool-specific properties.
+	// "customizations": {},
+
+	// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
+	// "remoteUser": "devcontainer"
+}
--- a/.devcontainer/docker-compose.yml
+++ b/.devcontainer/docker-compose.yml
@@ -0,0 +1,26 @@
+version: '3.6'
+services:
+  # Update this to the name of the service you want to work with in your docker-compose.yml file
+  api:
+    # Uncomment if you want to override the service's Dockerfile to one in the .devcontainer 
+    # folder. Note that the path of the Dockerfile and context is relative to the *primary* 
+    # docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
+    # array). The sample below assumes your primary file is in the root of your project.
+    #
+    build:
+      context: .
+      dockerfile: .devcontainer/Dockerfile
+
+    volumes:
+      # Update this to wherever you want VS Code to mount the folder of your project
+      - .:/workspace:cached
+
+    # Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
+    # cap_add:
+    #   - SYS_PTRACE
+    # security_opt:
+    #   - seccomp:unconfined
+
+    # Overrides default command so things don't shut down after the process ends.
+    command: /bin/sh -c "while sleep 1000; do :; done"
+ 
--- a/.env
+++ b/.env
@@ -1,4 +1,5 @@
-THREADS=14
-CONTEXT_SIZE=512
+# THREADS=14
+# CONTEXT_SIZE=512
 MODELS_PATH=/models
-# DEBUG=true
+# DEBUG=true
+# BUILD_TYPE=generic
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -0,0 +1,44 @@
+---
+name: 'tests'
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+    tags:
+      - '*'
+
+jobs:
+  ubuntu-latest:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v1
+        with: 
+          submodules: true
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install build-essential
+      - name: Test
+        run: |
+          make test
+
+  macOS-latest:
+    runs-on: macOS-latest
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v1
+        with: 
+          submodules: true
+
+      - name: Dependencies
+        run: |
+          brew update
+          brew install sdl2
+      - name: Test
+        run: |
+          make test
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,15 @@
 # go-llama build artifacts
 go-llama
 go-gpt4all-j
+go-gpt2

 # LocalAI build binary
 LocalAI
 local-ai
+# prevent above rules from omitting the helm chart
+!charts/*

 # Ignore models
 models/*.bin
-models/ggml-*
+models/ggml-*
+test-models/
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -1,16 +1,20 @@
 {
    "version": "0.2.0",
    "configurations": [
-    
-    {
-        "name": "Launch Go",
-        "type": "go",
-        "request": "launch",
-        "mode": "debug",
-        "program": "${workspaceFolder}/main.go",
-        "args": [
-            "api"
-        ]
-    }
+        {
+            "name": "Launch Go",
+            "type": "go",
+            "request": "launch",
+            "mode": "debug",
+            "program": "${workspaceFolder}/main.go",
+            "args": [
+                "api"
+            ],
+            "env": {
+                "C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
+                "DEBUG": "true"
+            }
+        }
    ]
-}
+}
--- a/5
+++ b/5
@@ -1,11 +1,12 @@
 ARG GO_VERSION=1.20
 ARG DEBIAN_VERSION=11
+ARG BUILD_TYPE=
+
 FROM golang:$GO_VERSION as builder
 WORKDIR /build
 RUN apt-get update && apt-get install -y cmake
 COPY . .
-ARG BUILD_TYPE=
-RUN make build${BUILD_TYPE}
+RUN make build

 FROM debian:$DEBIAN_VERSION
 COPY --from=builder /build/local-ai /usr/bin/local-ai
--- a/90
+++ b/90
@@ -2,9 +2,9 @@ GOCMD=go
 GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
-GOLLAMA_VERSION?=llama.cpp-5ecff35
-GOGPT4ALLJ_VERSION?=1f548782d80d48b9a0fac33aae6f129358787bc0
-GOGPT2_VERSION?=1c24f5b86ac428cd5e81dae1f1427b1463bd2b06
+GOLLAMA_VERSION?=llama.cpp-8687c1f
+GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
+GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa

 GREEN  := $(shell tput -Txterm setaf 2)
 YELLOW := $(shell tput -Txterm setaf 3)
@@ -12,6 +12,20 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)

+C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
+LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
+
+# Use this if you want to set the default behavior
+ifndef BUILD_TYPE
+	BUILD_TYPE:=default
+endif
+
+ifeq ($(BUILD_TYPE), "generic")
+	GENERIC_PREFIX:=generic-
+else
+	GENERIC_PREFIX:=
+endif
+
 .PHONY: all test build vendor

 all: help
@@ -19,15 +33,18 @@ all: help
 ## Build:

 build: prepare ## Build the project
-	C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp $(GOCMD) build -o $(BINARY_NAME) ./
+	$(info ${GREEN}I local-ai build info:${RESET})
+	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./

-buildgeneric: prepare-generic ## Build the project
-	C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp $(GOCMD) build -o $(BINARY_NAME) ./
+generic-build: ## Build the project using generic
+	BUILD_TYPE="generic" $(MAKE) build

 ## GPT4ALL-J
 go-gpt4all-j:
-	git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j && cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
-# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
+	git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
+	cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
+	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
 	@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
 	@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@@ -38,58 +55,53 @@ go-gpt4all-j:
 	@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +

 go-gpt4all-j/libgptj.a: go-gpt4all-j
-	$(MAKE) -C go-gpt4all-j libgptj.a
-
-go-gpt4all-j/libgptj.a-generic: go-gpt4all-j
-	$(MAKE) -C go-gpt4all-j generic-libgptj.a
+	$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a

 # CEREBRAS GPT
-go-gpt2.cpp:
-	git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2.cpp && cd go-gpt2.cpp && git checkout -b build $(GOGPT2_VERSION)
-# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
-	@find ./go-gpt2.cpp -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
-	@find ./go-gpt2.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
+go-gpt2:
+	git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
+	cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION)
+	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
+	@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
+	@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +

-go-gpt2.cpp/libgpt2.a: go-gpt2.cpp
-	$(MAKE) -C go-gpt2.cpp libgpt2.a
-
-go-gpt2.cpp/libgpt2.a-generic: go-gpt2.cpp
-	$(MAKE) -C go-gpt2.cpp generic-libgpt2.a
+go-gpt2/libgpt2.a: go-gpt2
+	$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
+	

 go-llama:
 	git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
-	$(MAKE) -C go-llama libbinding.a

-go-llama-generic:
-	git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
-	$(MAKE) -C go-llama generic-libbinding.a
+go-llama/libbinding.a: go-llama
+	$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a

 replace:
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2.cpp
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2

-prepare: go-llama go-gpt4all-j/libgptj.a go-gpt2.cpp/libgpt2.a replace
-
-prepare-generic: go-llama-generic go-gpt4all-j/libgptj.a-generic go-gpt2.cpp/libgpt2.a-generic replace
+prepare: go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a replace

 clean: ## Remove build related file
 	rm -fr ./go-llama
 	rm -rf ./go-gpt4all-j
-	rm -rf ./go-gpt2.cpp
+	rm -rf ./go-gpt2
 	rm -rf $(BINARY_NAME)

 ## Run:
 run: prepare
-	$(GOCMD) run ./ api
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go

-## Test:
-test: ## Run the tests of the project
-	$(GOTEST) -v -race ./... $(OUTPUT_OPTIONS)
+test-models/testmodel:
+	mkdir test-models
+	wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
+
+test: prepare test-models/testmodel
+	@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v ./...

 ## Help:
 help: ## Show this help.
--- a/README.md
+++ b/README.md
@@ -15,6 +15,8 @@ LocalAI is a straightforward, drop-in replacement API compatible with OpenAI for
 - Support for prompt templates
 - Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).

+Discord channel: [Discord](https://discord.gg/uJAeKSAGDy)
+
 ## Model compatibility

 It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) supports also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all) and [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml).
@@ -63,6 +65,26 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
   }'
 ```

+## Helm Chart Installation (run LocalAI in Kubernetes)
+The local-ai Helm chart supports two options for the LocalAI server's models directory:
+1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
+
+    Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
+
+2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
+
+    First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
+    ```bash
+    helm install local-ai charts/local-ai -n local-ai --create-namespace
+    ```
+    Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
+
+    Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
+    ```bash
+    helm upgrade local-ai -n local-ai charts/local-ai
+    ```
+    This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.
+
 ## Prompt templates 

 The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
@@ -184,10 +206,6 @@ python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model

 It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2

-### Kubernetes
-
-You can run the API in Kubernetes, see an example deployment in [kubernetes](https://github.com/go-skynet/LocalAI/tree/master/kubernetes)
-
 ### Build locally

 Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
--- a/api/api.go
+++ b/api/api.go
@@ -14,6 +14,7 @@ import (
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
 	"github.com/gofiber/fiber/v2/middleware/recover"
+	"github.com/rs/zerolog"
 	"github.com/rs/zerolog/log"
 )

@@ -48,6 +49,8 @@ type OpenAIRequest struct {
 	// Prompt is read only by completion API calls
 	Prompt string `json:"prompt"`

+	Stop string `json:"stop"`
+
 	// Messages is read only by chat/completion API calls
 	Messages []Message `json:"messages"`

@@ -61,15 +64,17 @@ type OpenAIRequest struct {
 	N int `json:"n"`

 	// Custom parameters - not present in the OpenAI API
-	Batch     int  `json:"batch"`
-	F16       bool `json:"f16kv"`
-	IgnoreEOS bool `json:"ignore_eos"`
+	Batch         int     `json:"batch"`
+	F16           bool    `json:"f16kv"`
+	IgnoreEOS     bool    `json:"ignore_eos"`
+	RepeatPenalty float64 `json:"repeat_penalty"`
+	Keep          int     `json:"n_keep"`

 	Seed int `json:"seed"`
 }

 // https://platform.openai.com/docs/api-reference/completions
-func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
+func openAIEndpoint(chat, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		var err error
 		var model *llama.LLama
@@ -90,16 +95,28 @@ func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16
 		// Set model from bearer token, if available
 		bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
 		bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
+
+		// If no model was specified, take the first available
+		if modelFile == "" {
+			models, _ := loader.ListModels()
+			if len(models) > 0 {
+				modelFile = models[0]
+				log.Debug().Msgf("No model specified, using: %s", modelFile)
+			}
+		}
+
+		// If no model is found or specified, we bail out
 		if modelFile == "" && !bearerExists {
 			return fmt.Errorf("no model specified")
 		}

-		if bearerExists { // model specified in bearer token takes precedence
+		// If a model is found in bearer token takes precedence
+		if bearerExists {
 			log.Debug().Msgf("Using model from bearer token: %s", bearer)
 			modelFile = bearer
 		}

-		// Try to load the model with both
+		// Try to load the model
 		var llamaerr, gpt2err, gptjerr, stableerr error
 		llamaOpts := []llama.ModelOption{}
 		if ctx != 0 {
@@ -269,6 +286,22 @@ func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16
 					llama.SetThreads(threads),
 				}

+				if debug {
+					predictOptions = append(predictOptions, llama.Debug)
+				}
+
+				if input.Stop != "" {
+					predictOptions = append(predictOptions, llama.SetStopWords(input.Stop))
+				}
+
+				if input.RepeatPenalty != 0 {
+					predictOptions = append(predictOptions, llama.SetPenalty(input.RepeatPenalty))
+				}
+
+				if input.Keep != 0 {
+					predictOptions = append(predictOptions, llama.SetNKeep(input.Keep))
+				}
+
 				if input.Batch != 0 {
 					predictOptions = append(predictOptions, llama.SetBatch(input.Batch))
 				}
@@ -341,9 +374,15 @@ func listModels(loader *model.ModelLoader) func(ctx *fiber.Ctx) error {
 	}
 }

-func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f16 bool) error {
+func App(loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
+	zerolog.SetGlobalLevel(zerolog.InfoLevel)
+	if debug {
+		zerolog.SetGlobalLevel(zerolog.DebugLevel)
+	}
+
 	// Return errors as JSON responses
 	app := fiber.New(fiber.Config{
+		DisableStartupMessage: disableMessage,
 		// Override default error handler
 		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
 			// Status code defaults to 500
@@ -371,16 +410,14 @@ func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f
 	var mumutex = &sync.Mutex{}

 	// openAI compatible API endpoint
-	app.Post("/v1/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
-	app.Post("/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
+	app.Post("/v1/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
+	app.Post("/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))

-	app.Post("/v1/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
-	app.Post("/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
+	app.Post("/v1/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
+	app.Post("/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))

 	app.Get("/v1/models", listModels(loader))
 	app.Get("/models", listModels(loader))

-	// Start the server
-	app.Listen(listenAddr)
-	return nil
+	return app
 }
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -0,0 +1,53 @@
+package api_test
+
+import (
+	"context"
+	"os"
+
+	. "github.com/go-skynet/LocalAI/api"
+	"github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/gofiber/fiber/v2"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"github.com/sashabaranov/go-openai"
+)
+
+var _ = Describe("API test", func() {
+
+	var app *fiber.App
+	var modelLoader *model.ModelLoader
+	var client *openai.Client
+	Context("API query", func() {
+		BeforeEach(func() {
+			modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
+			app = App(modelLoader, 1, 512, false, false, true)
+			go app.Listen("127.0.0.1:9090")
+
+			defaultConfig := openai.DefaultConfig("")
+			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
+
+			// Wait for API to be ready
+			client = openai.NewClientWithConfig(defaultConfig)
+			Eventually(func() error {
+				_, err := client.ListModels(context.TODO())
+				return err
+			}, "2m").ShouldNot(HaveOccurred())
+		})
+		AfterEach(func() {
+			app.Shutdown()
+		})
+		It("returns the models list", func() {
+			models, err := client.ListModels(context.TODO())
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(models.Models)).To(Equal(1))
+			Expect(models.Models[0].ID).To(Equal("testmodel"))
+		})
+		It("can generate completions", func() {
+			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
+			Expect(err).ToNot(HaveOccurred())
+			Expect(len(resp.Choices)).To(Equal(1))
+			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
+		})
+	})
+})
--- a/api/apt_suite_test.go
+++ b/api/apt_suite_test.go
@@ -0,0 +1,13 @@
+package api_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestLocalAI(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "LocalAI test suite")
+}
--- a/charts/local-ai/Chart.yaml
+++ b/charts/local-ai/Chart.yaml
@@ -0,0 +1,6 @@
+apiVersion: v2
+appVersion: 0.1.0
+description: A Helm chart for LocalAI
+name: local-ai
+type: application
+version: 1.0.0
--- a/charts/local-ai/templates/_helpers.tpl
+++ b/charts/local-ai/templates/_helpers.tpl
@@ -0,0 +1,44 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "local-ai.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "local-ai.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "local-ai.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "local-ai.labels" -}}
+helm.sh/chart: {{ include "local-ai.chart" . }}
+app.kubernetes.io/name: {{ include "local-ai.name" . }}
+app.kubernetes.io/instance: "{{ .Release.Name }}"
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+{{- end }}
--- a/charts/local-ai/templates/data-volume.yaml
+++ b/charts/local-ai/templates/data-volume.yaml
@@ -0,0 +1,39 @@
+{{- if .Values.dataVolume.enabled }}
+apiVersion: cdi.kubevirt.io/v1beta1
+kind: DataVolume
+metadata:
+  name: {{ template "local-ai.fullname" . }}
+  namespace: {{ .Release.Namespace | quote }}
+  labels:
+    {{- include "local-ai.labels" . | nindent 4 }}
+spec:
+  contentType: archive
+  source:
+    {{ .Values.dataVolume.source.type }}:
+      url: {{ .Values.dataVolume.source.url }}
+      secretRef: {{ template "local-ai.fullname" . }}
+      {{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
+      secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
+      {{- end }}
+      {{- if .Values.dataVolume.source.caCertConfigMap }}
+      caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
+      {{- end }}
+  pvc:
+    accessModes: {{ .Values.dataVolume.pvc.accessModes }}
+    resources:
+      requests:
+        storage: {{ .Values.dataVolume.pvc.size }}
+---
+{{- if .Values.dataVolume.secret.enabled }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ template "local-ai.fullname" . }}
+  namespace: {{ .Release.Namespace | quote }}
+  labels:
+    {{- include "local-ai.labels" . | nindent 4 }}
+data:
+  accessKeyId: {{ .Values.dataVolume.secret.username }}
+  secretKey: {{ .Values.dataVolume.secret.password }}
+{{- end }}
+{{- end }}
--- a/charts/local-ai/templates/deployment.yaml
+++ b/charts/local-ai/templates/deployment.yaml
@@ -0,0 +1,39 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ template "local-ai.fullname" . }}
+  namespace: {{ .Release.Namespace | quote }}
+  labels:
+    {{- include "local-ai.labels" . | nindent 4 }}
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: {{ include "local-ai.name" . }}
+      app.kubernetes.io/instance: {{ .Release.Name }}
+  replicas: 1
+  template:
+    metadata:
+      name: {{ template "local-ai.fullname" . }}
+      labels:
+        app.kubernetes.io/name: {{ include "local-ai.name" . }}
+        app.kubernetes.io/instance: {{ .Release.Name }}
+    spec:
+      containers:
+        - name: {{ template "local-ai.fullname" . }}
+          image: {{ .Values.deployment.image }}
+          env:
+          - name: THREADS
+            value: {{ .Values.deployment.env.threads | quote }}
+          - name: CONTEXT_SIZE
+            value: {{ .Values.deployment.env.contextSize | quote }}
+          - name: MODELS_PATH
+            value: {{ .Values.deployment.env.modelsPath }}
+{{- if .Values.deployment.volume.enabled }}
+          volumeMounts:
+          - mountPath: {{ .Values.deployment.env.modelsPath }}
+            name: models
+      volumes:
+      - name: models
+        persistentVolumeClaim:
+          claimName: {{ template "local-ai.fullname" . }}
+{{- end }}
--- a/charts/local-ai/templates/service.yaml
+++ b/charts/local-ai/templates/service.yaml
@@ -0,0 +1,19 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "local-ai.fullname" . }}
+  namespace: {{ .Release.Namespace | quote }}
+  labels:
+    {{- include "local-ai.labels" . | nindent 4 }}
+{{- if .Values.service.annotations }}
+  annotations:
+  {{ toYaml .Values.service.annotations | indent 4 }}
+{{- end }}
+spec:
+  selector:
+    app.kubernetes.io/name: {{ include "local-ai.name" . }}
+  type: "{{ .Values.service.type }}"
+  ports:
+    - protocol: TCP
+      port: 8080
+      targetPort: 8080
--- a/charts/local-ai/values.yaml
+++ b/charts/local-ai/values.yaml
@@ -0,0 +1,38 @@
+deployment:
+  image: quay.io/go-skynet/local-ai:latest
+  env:
+    threads: 14
+    contextSize: 512
+    modelsPath: "/models"
+  volume:
+    enabled: false
+
+service:
+  type: ClusterIP
+  annotations: {}
+  # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
+  # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
+
+# Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
+# (requires https://github.com/kubevirt/containerized-data-importer)
+dataVolume:
+  enabled: false
+  source:
+    type: "http" # Source type. One of: [ http | s3 ]
+    url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
+
+    # CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
+    # and a base64 encoded pem certificate
+    caCertConfigMap: ""
+
+    # SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
+    # that may include sensitive information. Only applicable for the http source type.
+    secretExtraHeaders: []
+  pvc:
+    accessModes:
+    - ReadWriteOnce
+    size: 5Gi
+  secret:
+    enabled: false
+    username: "" # base64 encoded
+    password: "" # base64 encoded
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -6,14 +6,10 @@ services:
    build:
      context: .
      dockerfile: Dockerfile
-      # args:
-        # BUILD_TYPE: generic # Uncomment to build CPU generic code that works on most HW
    ports:
      - 8080:8080
-    environment:
-      - MODELS_PATH=$MODELS_PATH
-      - CONTEXT_SIZE=$CONTEXT_SIZE
-      - THREADS=$THREADS
-      - DEBUG=$DEBUG
+    env_file:
+      - .env
    volumes:
-      - ./models:/models:cached
+      - ./models:/models:cached
+    command: ["/usr/bin/local-ai" ]
--- a/go.mod
+++ b/go.mod
@@ -5,10 +5,13 @@ go 1.19
 require (
 	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4
 	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead
 	github.com/gofiber/fiber/v2 v2.42.0
 	github.com/jaypipes/ghw v0.10.0
+	github.com/onsi/ginkgo/v2 v2.9.2
+	github.com/onsi/gomega v1.27.6
 	github.com/rs/zerolog v1.29.1
+	github.com/sashabaranov/go-openai v1.9.0
 	github.com/urfave/cli/v2 v2.25.0
 )

@@ -17,7 +20,11 @@ require (
 	github.com/andybalholm/brotli v1.0.4 // indirect
 	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
 	github.com/ghodss/yaml v1.0.0 // indirect
+	github.com/go-logr/logr v1.2.3 // indirect
 	github.com/go-ole/go-ole v1.2.6 // indirect
+	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
+	github.com/google/go-cmp v0.5.9 // indirect
+	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
 	github.com/google/uuid v1.3.0 // indirect
 	github.com/jaypipes/pcidb v1.0.0 // indirect
 	github.com/klauspost/compress v1.15.9 // indirect
@@ -37,7 +44,11 @@ require (
 	github.com/valyala/fasthttp v1.44.0 // indirect
 	github.com/valyala/tcplisten v1.0.0 // indirect
 	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
+	golang.org/x/net v0.8.0 // indirect
 	golang.org/x/sys v0.6.0 // indirect
+	golang.org/x/text v0.8.0 // indirect
+	golang.org/x/tools v0.7.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
 	howett.net/plist v1.0.0 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -2,13 +2,20 @@ github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDO
 github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
 github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
 github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
+github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
 github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
 github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
+github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
 github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
 github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
 github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
@@ -16,16 +23,21 @@ github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4 h1:GkGuqnhDF
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94 h1:rtrrMvlIq+g0/ltXjDdLeNtz0uc4wJ4Qs15GFU4ba4c=
 github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640 h1:8SSVbQ3yvq7JnfLCLF4USV0PkQnnduUkaNCv/hHDa3E=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead h1:C+lcH1srw+c0qPDx1WF8zjGiiOqoPxVICt7bI1sj5cM=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
+github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
 github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
+github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
+github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
 github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
 github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
 github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8=
@@ -48,11 +60,15 @@ github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh
 github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
 github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
 github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
+github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
 github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
+github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
 github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
 github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
@@ -60,10 +76,15 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
 github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
 github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/sashabaranov/go-openai v1.9.0 h1:NoiO++IISxxJ1pRc0n7uZvMGMake0G+FJ1XPwXtprsA=
+github.com/sashabaranov/go-openai v1.9.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
 github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
 github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
@@ -88,11 +109,13 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY
 golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
 golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
+golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -110,18 +133,23 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
 golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
+golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
+golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
 gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
 howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
--- a/kubernetes/data-volume.yaml
+++ b/kubernetes/data-volume.yaml
@@ -1,28 +0,0 @@
-# Create a PVC containing a model binary, sourced from an arbitrary HTTP server
-# (requires https://github.com/kubevirt/containerized-data-importer)
-apiVersion: cdi.kubevirt.io/v1beta1
-kind: DataVolume
-metadata:
-  name: models
-  namespace: local-ai
-spec:
-  contentType: archive
-  source:
-    http:
-      url: http://<model_server>/koala-7B-4bit-128g.GGML.tar
-      secretRef: model-secret
-  pvc:
-    accessModes:
-    - ReadWriteOnce
-    resources:
-      requests:
-        storage: 5Gi
---
-apiVersion: v1
-kind: Secret
-metadata:
-  name: model-secret
-  namespace: local-ai
-data:
-  accessKeyId: <model_server_username_base64_encoded>
-  secretKey: <model_server_password_base64_encoded>
--- a/kubernetes/deployment.yaml
+++ b/kubernetes/deployment.yaml
@@ -1,57 +0,0 @@
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: local-ai
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: local-ai
-  namespace: local-ai
-  labels:
-    app: local-ai
-spec:
-  selector:
-    matchLabels:
-      app: local-ai
-  replicas: 1
-  template:
-    metadata:
-      labels:
-        app: local-ai
-      name: local-ai
-    spec:
-      containers:
-        - name: local-ai
-          image: quay.io/go-skynet/local-ai:latest
-          env:
-          - name: THREADS
-            value: "14"
-          - name: CONTEXT_SIZE
-            value: "512"
-          - name: MODELS_PATH
-            value: /models
-          volumeMounts:
-          - mountPath: /models
-            name: models
-      volumes:
-      - name: models
-        persistentVolumeClaim:
-          claimName: models
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: local-ai
-  namespace: local-ai
-  # If using AWS, you'll need to override the default 60s load balancer idle timeout
-  # annotations:
-  #   service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
-spec:
-  selector:
-    app: local-ai
-  type: LoadBalancer
-  ports:
-    - protocol: TCP
-      port: 8080
-      targetPort: 8080
--- a/main.go
+++ b/main.go
@@ -80,11 +80,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
 		UsageText: `local-ai [options]`,
 		Copyright: "go-skynet authors",
 		Action: func(ctx *cli.Context) error {
-			zerolog.SetGlobalLevel(zerolog.InfoLevel)
-			if ctx.Bool("debug") {
-				zerolog.SetGlobalLevel(zerolog.DebugLevel)
-			}
-			return api.Start(model.NewModelLoader(ctx.String("models-path")), ctx.String("address"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"))
+			return api.App(model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
 		},
 	}
Author	SHA1	Message	Date
Ettore Di Giacinto	676e15f785	fix: make MacOS builds work (#61 )	2023-04-22 11:05:23 +02:00
Marc R Kellerman	3e71c90949	feature: add devcontainer for live debugging (#60 )	2023-04-22 01:20:03 +02:00
Ettore Di Giacinto	550ae9c968	docs: add Discord channel link (#59 )	2023-04-22 00:46:17 +02:00
Ettore Di Giacinto	1c872ec326	feat: add CI/tests (#58 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-22 00:44:52 +02:00
Marc R Kellerman	05f35b182c	fix(makefile): fix go-gpt2 folder and add verification before git clone (#51 ) Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-04-22 00:29:32 +02:00
Ettore Di Giacinto	79791438fe	Use the first available model if not specified (#55 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-21 22:54:43 +02:00
Tyler Gillson	bf20cc34f6	feat: Add helm chart (#56 )	2023-04-21 13:22:03 -07:00
Ettore Di Giacinto	5cba71de70	Add stopwords, debug mode, and other API enhancements (#54 ) Signed-off-by: mudler <mudler@mocaccino.org>	2023-04-21 19:46:59 +02:00
Ettore Di Giacinto	4b7e83056d	Update .env	2023-04-21 01:47:35 +02:00