Compare commits

..

22 Commits

Author SHA1 Message Date
Ettore Di Giacinto
12d83a4184 feat: Return OpenAI errors and update docs (#80)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-24 23:42:03 +02:00
renovate[bot]
045412e8dd fix(deps): update module github.com/urfave/cli/v2 to v2.25.1 (#78)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 18:16:23 +02:00
renovate[bot]
9896a9a58b fix(deps): update github.com/go-skynet/go-llama.cpp digest to e45cebe (#77)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 18:16:10 +02:00
Ettore Di Giacinto
b9011bda59 feat: automatic updates with renovate, docs updates (#76) 2023-04-24 18:10:58 +02:00
Ettore Di Giacinto
2b2f5fa36a feat: update llama.cpp (#72) 2023-04-24 14:15:49 +02:00
renovate[bot]
43c557dc5c fix(deps): update github.com/go-skynet/go-gpt4all-j.cpp digest to 1f7bff5 (#74)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 14:14:21 +02:00
renovate[bot]
7abb2c9bd7 fix(deps): update github.com/go-skynet/go-gpt2.cpp digest to 245a5bf (#73)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 14:13:04 +02:00
renovate[bot]
7a9ea4480a Configure Renovate (#71)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-04-24 14:11:39 +02:00
Vladimir Malyutin
31bcc558de Update README.md (#62) 2023-04-22 14:42:30 +02:00
Ettore Di Giacinto
676e15f785 fix: make MacOS builds work (#61) 2023-04-22 11:05:23 +02:00
Marc R Kellerman
3e71c90949 feature: add devcontainer for live debugging (#60) 2023-04-22 01:20:03 +02:00
Ettore Di Giacinto
550ae9c968 docs: add Discord channel link (#59) 2023-04-22 00:46:17 +02:00
Ettore Di Giacinto
1c872ec326 feat: add CI/tests (#58)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-22 00:44:52 +02:00
Marc R Kellerman
05f35b182c fix(makefile): fix go-gpt2 folder and add verification before git clone (#51)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-04-22 00:29:32 +02:00
Ettore Di Giacinto
79791438fe Use the first available model if not specified (#55)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-21 22:54:43 +02:00
Tyler Gillson
bf20cc34f6 feat: Add helm chart (#56) 2023-04-21 13:22:03 -07:00
Ettore Di Giacinto
5cba71de70 Add stopwords, debug mode, and other API enhancements (#54)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-21 19:46:59 +02:00
Ettore Di Giacinto
4b7e83056d Update .env 2023-04-21 01:47:35 +02:00
Ettore Di Giacinto
ed954d66c3 Do not take all CPU by default (#50) 2023-04-21 00:55:19 +02:00
Ettore Di Giacinto
f816dfae65 Add support for stablelm (#48)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-04-21 00:06:55 +02:00
Ettore Di Giacinto
142bcd66ca Cleanup makefile, fix dep versions (#46)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-20 19:49:06 +02:00
Ettore Di Giacinto
1c4fbaae20 Add support for cerebras (#45)
Signed-off-by: mudler <mudler@c3os.io>
2023-04-20 19:33:36 +02:00
27 changed files with 983 additions and 190 deletions

3
.devcontainer/Dockerfile Normal file
View File

@@ -0,0 +1,3 @@
ARG GO_VERSION=1.20
FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
RUN apt-get update && apt-get install -y cmake

View File

@@ -0,0 +1,46 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
{
"name": "Existing Docker Compose (Extend)",
// Update the 'dockerComposeFile' list if you have more compose files or use different names.
// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
"dockerComposeFile": [
"../docker-compose.yaml",
"docker-compose.yml"
],
// The 'service' property is the name of the service for the container that VS Code should
// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
"service": "api",
// The optional 'workspaceFolder' property is the path VS Code should open by default when
// connected. This is typically a file mount in .devcontainer/docker-compose.yml
"workspaceFolder": "/workspace",
"features": {
"ghcr.io/devcontainers/features/go:1": {},
"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
},
// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Uncomment the next line if you want start specific services in your Docker Compose config.
// "runServices": [],
// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
// "shutdownAction": "none",
// Uncomment the next line to run commands after the container is created.
"postCreateCommand": "make prepare"
// Configure tool-specific properties.
// "customizations": {},
// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "devcontainer"
}

View File

@@ -0,0 +1,26 @@
version: '3.6'
services:
# Update this to the name of the service you want to work with in your docker-compose.yml file
api:
# Uncomment if you want to override the service's Dockerfile to one in the .devcontainer
# folder. Note that the path of the Dockerfile and context is relative to the *primary*
# docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
# array). The sample below assumes your primary file is in the root of your project.
#
build:
context: .
dockerfile: .devcontainer/Dockerfile
volumes:
# Update this to wherever you want VS Code to mount the folder of your project
- .:/workspace:cached
# Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
# cap_add:
# - SYS_PTRACE
# security_opt:
# - seccomp:unconfined
# Overrides default command so things don't shut down after the process ends.
command: /bin/sh -c "while sleep 1000; do :; done"

7
.env
View File

@@ -1,4 +1,5 @@
THREADS=14
CONTEXT_SIZE=512
# THREADS=14
# CONTEXT_SIZE=512
MODELS_PATH=/models
# DEBUG=true
# DEBUG=true
# BUILD_TYPE=generic

44
.github/workflows/test.yml vendored Normal file
View File

@@ -0,0 +1,44 @@
---
name: 'tests'
on:
pull_request:
push:
branches:
- master
tags:
- '*'
jobs:
ubuntu-latest:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v1
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential
- name: Test
run: |
make test
macOS-latest:
runs-on: macOS-latest
steps:
- name: Clone
uses: actions/checkout@v1
with:
submodules: true
- name: Dependencies
run: |
brew update
brew install sdl2
- name: Test
run: |
make test

6
.gitignore vendored
View File

@@ -1,11 +1,15 @@
# go-llama build artifacts
go-llama
go-gpt4all-j
go-gpt2
# LocalAI build binary
LocalAI
local-ai
# prevent above rules from omitting the helm chart
!charts/*
# Ignore models
models/*.bin
models/ggml-*
models/ggml-*
test-models/

28
.vscode/launch.json vendored
View File

@@ -1,16 +1,20 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Launch Go",
"type": "go",
"request": "launch",
"mode": "debug",
"program": "${workspaceFolder}/main.go",
"args": [
"api"
]
}
{
"name": "Launch Go",
"type": "go",
"request": "launch",
"mode": "debug",
"program": "${workspaceFolder}/main.go",
"args": [
"api"
],
"env": {
"C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
"LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
"DEBUG": "true"
}
}
]
}
}

View File

@@ -1,11 +1,12 @@
ARG GO_VERSION=1.20
ARG DEBIAN_VERSION=11
ARG BUILD_TYPE=
FROM golang:$GO_VERSION as builder
WORKDIR /build
RUN apt-get update && apt-get install -y cmake
COPY . .
ARG BUILD_TYPE=
RUN make build${BUILD_TYPE}
RUN make build
FROM debian:$DEBIAN_VERSION
COPY --from=builder /build/local-ai /usr/bin/local-ai

View File

@@ -2,7 +2,12 @@ GOCMD=go
GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
GOLLAMA_VERSION?=llama.cpp-5ecff35
# renovate: datasource=github-tags depName=go-skynet/go-llama.cpp
GOLLAMA_VERSION?=llama.cpp-25d7abb
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt4all-j.cpp currentValueTemplate=master depNameTemplate=go-gpt4all-j.cpp
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt2.cpp currentValueTemplate=master depNameTemplate=go-gpt2.cpp
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
GREEN := $(shell tput -Txterm setaf 2)
YELLOW := $(shell tput -Txterm setaf 3)
@@ -10,6 +15,20 @@ WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
# Use this if you want to set the default behavior
ifndef BUILD_TYPE
BUILD_TYPE:=default
endif
ifeq ($(BUILD_TYPE), "generic")
GENERIC_PREFIX:=generic-
else
GENERIC_PREFIX:=
endif
.PHONY: all test build vendor
all: help
@@ -17,54 +36,75 @@ all: help
## Build:
build: prepare ## Build the project
C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j $(GOCMD) build -o $(BINARY_NAME) ./
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
buildgeneric: prepare-generic ## Build the project
C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j $(GOCMD) build -o $(BINARY_NAME) ./
generic-build: ## Build the project using generic
BUILD_TYPE="generic" $(MAKE) build
## GPT4ALL-J
go-gpt4all-j:
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
go-gpt4all-j/libgptj.a: go-gpt4all-j
$(MAKE) -C go-gpt4all-j libgptj.a
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
go-gpt4all-j/libgptj.a-generic: go-gpt4all-j
$(MAKE) -C go-gpt4all-j generic-libgptj.a
# CEREBRAS GPT
go-gpt2:
git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION)
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
go-gpt2/libgpt2.a: go-gpt2
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
go-llama:
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
$(MAKE) -C go-llama libbinding.a
go-llama-generic:
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
$(MAKE) -C go-llama generic-libbinding.a
go-llama/libbinding.a: go-llama
$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
prepare: go-llama go-gpt4all-j/libgptj.a
replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
prepare: go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a replace
prepare-generic: go-llama-generic go-gpt4all-j/libgptj.a-generic
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
clean: ## Remove build related file
rm -fr ./go-llama
rm -rf ./go-gpt4all-j
rm -rf ./go-gpt2
rm -rf $(BINARY_NAME)
## Run:
run: prepare
$(GOCMD) run ./ api
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
## Test:
test: ## Run the tests of the project
$(GOTEST) -v -race ./... $(OUTPUT_OPTIONS)
test-models/testmodel:
mkdir test-models
wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
test: prepare test-models/testmodel
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v ./...
## Help:
help: ## Show this help.

175
README.md
View File

@@ -7,18 +7,34 @@
> :warning: This project has been renamed from `llama-cli` to `LocalAI` to reflect the fact that we are focusing on a fast drop-in OpenAI API rather on the CLI interface. We think that there are already many projects that can be used as a CLI interface already, for instance [llama.cpp](https://github.com/ggerganov/llama.cpp) and [gpt4all](https://github.com/nomic-ai/gpt4all). If you are were using `llama-cli` for CLI interactions and want to keep using it, use older versions or please open up an issue - contributions are welcome!
LocalAI is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml) [![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)
[![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy)
**LocalAI** is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.
- OpenAI compatible API
- Supports multiple-models
- Once loaded the first time, it keep models loaded in memory for faster inference
- Provides a simple command line interface that allows text generation directly from the terminal
- Support for prompt templates
- Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).
Reddit post: https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/
## Model compatibility
It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) and also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all).
It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) supports also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all) and [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml).
Tested with:
- Vicuna
- Alpaca
- [GPT4ALL](https://github.com/nomic-ai/gpt4all)
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin)
- Koala
- [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
It should also be compatible with StableLM and GPTNeoX ggml models (untested)
Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
@@ -41,7 +57,7 @@ cp your-model.bin models/
# vim .env
# start with docker-compose
docker compose up -d --build
docker-compose up -d --build
# Now API is accessible at localhost:8080
curl http://localhost:8080/v1/models
@@ -54,6 +70,42 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
}'
```
### Example: Use GPT4ALL-J model
<details>
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# Use a template from the examples
cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
# (optional) Edit the .env file to set things like context size and threads
# vim .env
# start with docker-compose
docker-compose up -d --build
# Now API is accessible at localhost:8080
curl http://localhost:8080/v1/models
# {"object":"list","data":[{"id":"ggml-gpt4all-j","object":"model"}]}
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "ggml-gpt4all-j",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.9
}'
# {"model":"ggml-gpt4all-j","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]}
```
</details>
## Prompt templates
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
@@ -97,8 +149,6 @@ And you'll see:
└───────────────────────────────────────────────────┘
```
Note: Models have to end up with `.bin` so can be listed by the `/models` endpoint.
You can control the API server options with command line arguments:
```
@@ -110,9 +160,10 @@ The API takes takes the following parameters:
| Parameter | Environment Variable | Default Value | Description |
| ------------ | -------------------- | ------------- | -------------------------------------- |
| models-path | MODELS_PATH | | The path where you have models (ending with `.bin`). |
| threads | THREADS | CPU cores | The number of threads to use for text generation. |
| threads | THREADS | Number of Physical cores | The number of threads to use for text generation. |
| address | ADDRESS | :8080 | The address and port to listen on. |
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
| debug | DEBUG | false | Enable debug mode. |
Once the server is running, you can start making requests to it using HTTP, using the OpenAI API.
@@ -122,10 +173,16 @@ Once the server is running, you can start making requests to it using HTTP, usin
You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create).
Following the list of endpoints/parameters supported.
Following the list of endpoints/parameters supported.
Note:
- You can also specify the model a part of the OpenAI token.
- If only one model is available, the API will use it for all the requests.
#### Chat completions
<details>
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
```
@@ -137,10 +194,12 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`
</details>
#### Completions
For example, to generate a comletion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
<details>
For example, to generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
```
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
@@ -151,14 +210,19 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
Available additional parameters: `top_p`, `top_k`, `max_tokens`
</details>
#### List models
<details>
You can list all the models available with:
```
curl http://localhost:8080/v1/models
```
</details>
## Using other models
gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
@@ -173,15 +237,32 @@ python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
# There will be a new model with the ".tmp" extension, you have to use that one!
```
### Windows compatibility
## Helm Chart Installation (run LocalAI in Kubernetes)
The local-ai Helm chart supports two options for the LocalAI server's models directory:
1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
```bash
helm install local-ai charts/local-ai -n local-ai --create-namespace
```
Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
```bash
helm upgrade local-ai -n local-ai charts/local-ai
```
This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.
## Windows compatibility
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
### Kubernetes
You can run the API in Kubernetes, see an example deployment in [kubernetes](https://github.com/go-skynet/LocalAI/tree/master/kubernetes)
### Build locally
## Build locally
Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
@@ -199,13 +280,71 @@ Or build the binary with `make`:
make build
```
## Frequently asked questions
Here are answers to some of the most common questions.
### How do I get models?
<details>
Most ggml-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=ggml, or models from gpt4all should also work: https://github.com/nomic-ai/gpt4all.
</details>
### What's the difference with Serge, or XXX?
<details>
LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference, easy to set up locally and deploy to Kubernetes.
</details>
### Can I use it with a Discord bot, or XXX?
<details>
Yes! If the client uses OpenAI and supports setting a different base URL to send requests to, you can use the LocalAI endpoint. This allows to use this with every application that was supposed to work with OpenAI, but without changing the application!
</details>
### Can this leverage GPUs?
<details>
Not currently, as ggml doesn't support GPUs yet: https://github.com/ggerganov/llama.cpp/discussions/915.
</details>
### Where is the webUI?
<details>
We are working on to have a good out of the box experience - however as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
</details>
### Does it work with AutoGPT?
<details>
AutoGPT currently doesn't allow to set a different API URL, but there is a PR open for it, so this should be possible soon!
</details>
## Short-term roadmap
- [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)
- Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
- Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
- [ ] Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
- [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
- [x] Multi-model support
- Have a webUI!
- [ ] Have a webUI!
- [ ] Allow configuration of defaults for models.
- [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models.
## License

View File

@@ -8,14 +8,28 @@ import (
"sync"
model "github.com/go-skynet/LocalAI/pkg/model"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cors"
"github.com/gofiber/fiber/v2/middleware/recover"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
// APIError provides error information returned by the OpenAI API.
type APIError struct {
Code any `json:"code,omitempty"`
Message string `json:"message"`
Param *string `json:"param,omitempty"`
Type string `json:"type"`
}
type ErrorResponse struct {
Error *APIError `json:"error,omitempty"`
}
type OpenAIResponse struct {
Created int `json:"created,omitempty"`
Object string `json:"chat.completion,omitempty"`
@@ -47,6 +61,8 @@ type OpenAIRequest struct {
// Prompt is read only by completion API calls
Prompt string `json:"prompt"`
Stop string `json:"stop"`
// Messages is read only by chat/completion API calls
Messages []Message `json:"messages"`
@@ -60,19 +76,23 @@ type OpenAIRequest struct {
N int `json:"n"`
// Custom parameters - not present in the OpenAI API
Batch int `json:"batch"`
F16 bool `json:"f16kv"`
IgnoreEOS bool `json:"ignore_eos"`
Batch int `json:"batch"`
F16 bool `json:"f16kv"`
IgnoreEOS bool `json:"ignore_eos"`
RepeatPenalty float64 `json:"repeat_penalty"`
Keep int `json:"n_keep"`
Seed int `json:"seed"`
}
// https://platform.openai.com/docs/api-reference/completions
func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
func openAIEndpoint(chat, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
var err error
var model *llama.LLama
var gptModel *gptj.GPTJ
var gpt2Model *gpt2.GPT2
var stableLMModel *gpt2.StableLM
input := new(OpenAIRequest)
// Get input data from the request body
@@ -87,17 +107,29 @@ func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16
// Set model from bearer token, if available
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
// If no model was specified, take the first available
if modelFile == "" {
models, _ := loader.ListModels()
if len(models) > 0 {
modelFile = models[0]
log.Debug().Msgf("No model specified, using: %s", modelFile)
}
}
// If no model is found or specified, we bail out
if modelFile == "" && !bearerExists {
return fmt.Errorf("no model specified")
}
if bearerExists { // model specified in bearer token takes precedence
// If a model is found in bearer token takes precedence
if bearerExists {
log.Debug().Msgf("Using model from bearer token: %s", bearer)
modelFile = bearer
}
// Try to load the model with both
var llamaerr error
// Try to load the model
var llamaerr, gpt2err, gptjerr, stableerr error
llamaOpts := []llama.ModelOption{}
if ctx != 0 {
llamaOpts = append(llamaOpts, llama.SetContext(ctx))
@@ -106,11 +138,18 @@ func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
}
// TODO: this is ugly, better identifying the model somehow! however, it is a good stab for a first implementation..
model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...)
if llamaerr != nil {
gptModel, err = loader.LoadGPTJModel(modelFile)
if err != nil {
return fmt.Errorf("llama: %s gpt: %s", llamaerr.Error(), err.Error()) // llama failed first, so we want to catch both errors
gptModel, gptjerr = loader.LoadGPTJModel(modelFile)
if gptjerr != nil {
gpt2Model, gpt2err = loader.LoadGPT2Model(modelFile)
if gpt2err != nil {
stableLMModel, stableerr = loader.LoadStableLMModel(modelFile)
if stableerr != nil {
return fmt.Errorf("llama: %s gpt: %s gpt2: %s stableLM: %s", llamaerr.Error(), gptjerr.Error(), gpt2err.Error(), stableerr.Error()) // llama failed first, so we want to catch both errors
}
}
}
}
@@ -176,6 +215,54 @@ func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16
var predFunc func() (string, error)
switch {
case stableLMModel != nil:
predFunc = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(temperature),
gpt2.SetTopP(topP),
gpt2.SetTopK(topK),
gpt2.SetTokens(tokens),
gpt2.SetThreads(threads),
}
if input.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
}
if input.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
}
return stableLMModel.Predict(
predInput,
predictOptions...,
)
}
case gpt2Model != nil:
predFunc = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(temperature),
gpt2.SetTopP(topP),
gpt2.SetTopK(topK),
gpt2.SetTokens(tokens),
gpt2.SetThreads(threads),
}
if input.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
}
if input.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
}
return gpt2Model.Predict(
predInput,
predictOptions...,
)
}
case gptModel != nil:
predFunc = func() (string, error) {
// Generate the prediction using the language model
@@ -211,6 +298,22 @@ func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16
llama.SetThreads(threads),
}
if debug {
predictOptions = append(predictOptions, llama.Debug)
}
if input.Stop != "" {
predictOptions = append(predictOptions, llama.SetStopWords(input.Stop))
}
if input.RepeatPenalty != 0 {
predictOptions = append(predictOptions, llama.SetPenalty(input.RepeatPenalty))
}
if input.Keep != 0 {
predictOptions = append(predictOptions, llama.SetNKeep(input.Keep))
}
if input.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(input.Batch))
}
@@ -283,9 +386,15 @@ func listModels(loader *model.ModelLoader) func(ctx *fiber.Ctx) error {
}
}
func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f16 bool) error {
func App(loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
zerolog.SetGlobalLevel(zerolog.InfoLevel)
if debug {
zerolog.SetGlobalLevel(zerolog.DebugLevel)
}
// Return errors as JSON responses
app := fiber.New(fiber.Config{
DisableStartupMessage: disableMessage,
// Override default error handler
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
// Status code defaults to 500
@@ -298,9 +407,11 @@ func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f
}
// Send custom error page
return ctx.Status(code).JSON(struct {
Error string `json:"error"`
}{Error: err.Error()})
return ctx.Status(code).JSON(
ErrorResponse{
Error: &APIError{Message: err.Error(), Code: code},
},
)
},
})
@@ -313,16 +424,14 @@ func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f
var mumutex = &sync.Mutex{}
// openAI compatible API endpoint
app.Post("/v1/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
app.Post("/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
app.Post("/v1/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
app.Post("/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
app.Post("/v1/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
app.Post("/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
app.Post("/v1/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
app.Post("/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
app.Get("/v1/models", listModels(loader))
app.Get("/models", listModels(loader))
// Start the server
app.Listen(listenAddr)
return nil
return app
}

58
api/api_test.go Normal file
View File

@@ -0,0 +1,58 @@
package api_test
import (
"context"
"os"
. "github.com/go-skynet/LocalAI/api"
"github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/sashabaranov/go-openai"
)
var _ = Describe("API test", func() {
var app *fiber.App
var modelLoader *model.ModelLoader
var client *openai.Client
Context("API query", func() {
BeforeEach(func() {
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
app = App(modelLoader, 1, 512, false, false, true)
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
// Wait for API to be ready
client = openai.NewClientWithConfig(defaultConfig)
Eventually(func() error {
_, err := client.ListModels(context.TODO())
return err
}, "2m").ShouldNot(HaveOccurred())
})
AfterEach(func() {
app.Shutdown()
})
It("returns the models list", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(1))
Expect(models.Models[0].ID).To(Equal("testmodel"))
})
It("can generate completions", func() {
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
It("returns errors", func() {
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: llama: model does not exist"))
})
})
})

13
api/apt_suite_test.go Normal file
View File

@@ -0,0 +1,13 @@
package api_test
import (
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestLocalAI(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "LocalAI test suite")
}

View File

@@ -0,0 +1,6 @@
apiVersion: v2
appVersion: 0.1.0
description: A Helm chart for LocalAI
name: local-ai
type: application
version: 1.0.0

View File

@@ -0,0 +1,44 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "local-ai.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "local-ai.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "local-ai.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "local-ai.labels" -}}
helm.sh/chart: {{ include "local-ai.chart" . }}
app.kubernetes.io/name: {{ include "local-ai.name" . }}
app.kubernetes.io/instance: "{{ .Release.Name }}"
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,39 @@
{{- if .Values.dataVolume.enabled }}
apiVersion: cdi.kubevirt.io/v1beta1
kind: DataVolume
metadata:
name: {{ template "local-ai.fullname" . }}
namespace: {{ .Release.Namespace | quote }}
labels:
{{- include "local-ai.labels" . | nindent 4 }}
spec:
contentType: archive
source:
{{ .Values.dataVolume.source.type }}:
url: {{ .Values.dataVolume.source.url }}
secretRef: {{ template "local-ai.fullname" . }}
{{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
{{- end }}
{{- if .Values.dataVolume.source.caCertConfigMap }}
caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
{{- end }}
pvc:
accessModes: {{ .Values.dataVolume.pvc.accessModes }}
resources:
requests:
storage: {{ .Values.dataVolume.pvc.size }}
---
{{- if .Values.dataVolume.secret.enabled }}
apiVersion: v1
kind: Secret
metadata:
name: {{ template "local-ai.fullname" . }}
namespace: {{ .Release.Namespace | quote }}
labels:
{{- include "local-ai.labels" . | nindent 4 }}
data:
accessKeyId: {{ .Values.dataVolume.secret.username }}
secretKey: {{ .Values.dataVolume.secret.password }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,39 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ template "local-ai.fullname" . }}
namespace: {{ .Release.Namespace | quote }}
labels:
{{- include "local-ai.labels" . | nindent 4 }}
spec:
selector:
matchLabels:
app.kubernetes.io/name: {{ include "local-ai.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
replicas: 1
template:
metadata:
name: {{ template "local-ai.fullname" . }}
labels:
app.kubernetes.io/name: {{ include "local-ai.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
containers:
- name: {{ template "local-ai.fullname" . }}
image: {{ .Values.deployment.image }}
env:
- name: THREADS
value: {{ .Values.deployment.env.threads | quote }}
- name: CONTEXT_SIZE
value: {{ .Values.deployment.env.contextSize | quote }}
- name: MODELS_PATH
value: {{ .Values.deployment.env.modelsPath }}
{{- if .Values.deployment.volume.enabled }}
volumeMounts:
- mountPath: {{ .Values.deployment.env.modelsPath }}
name: models
volumes:
- name: models
persistentVolumeClaim:
claimName: {{ template "local-ai.fullname" . }}
{{- end }}

View File

@@ -0,0 +1,19 @@
apiVersion: v1
kind: Service
metadata:
name: {{ template "local-ai.fullname" . }}
namespace: {{ .Release.Namespace | quote }}
labels:
{{- include "local-ai.labels" . | nindent 4 }}
{{- if .Values.service.annotations }}
annotations:
{{ toYaml .Values.service.annotations | indent 4 }}
{{- end }}
spec:
selector:
app.kubernetes.io/name: {{ include "local-ai.name" . }}
type: "{{ .Values.service.type }}"
ports:
- protocol: TCP
port: 8080
targetPort: 8080

View File

@@ -0,0 +1,38 @@
deployment:
image: quay.io/go-skynet/local-ai:latest
env:
threads: 14
contextSize: 512
modelsPath: "/models"
volume:
enabled: false
service:
type: ClusterIP
annotations: {}
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
# Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
# (requires https://github.com/kubevirt/containerized-data-importer)
dataVolume:
enabled: false
source:
type: "http" # Source type. One of: [ http | s3 ]
url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
# CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
# and a base64 encoded pem certificate
caCertConfigMap: ""
# SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
# that may include sensitive information. Only applicable for the http source type.
secretExtraHeaders: []
pvc:
accessModes:
- ReadWriteOnce
size: 5Gi
secret:
enabled: false
username: "" # base64 encoded
password: "" # base64 encoded

View File

@@ -6,14 +6,10 @@ services:
build:
context: .
dockerfile: Dockerfile
# args:
# BUILD_TYPE: generic # Uncomment to build CPU generic code that works on most HW
ports:
- 8080:8080
environment:
- MODELS_PATH=$MODELS_PATH
- CONTEXT_SIZE=$CONTEXT_SIZE
- THREADS=$THREADS
- DEBUG=$DEBUG
env_file:
- .env
volumes:
- ./models:/models:cached
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]

28
go.mod
View File

@@ -3,22 +3,38 @@ module github.com/go-skynet/LocalAI
go 1.19
require (
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94
github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04
github.com/gofiber/fiber/v2 v2.42.0
github.com/jaypipes/ghw v0.10.0
github.com/onsi/ginkgo/v2 v2.9.2
github.com/onsi/gomega v1.27.6
github.com/rs/zerolog v1.29.1
github.com/urfave/cli/v2 v2.25.0
github.com/sashabaranov/go-openai v1.9.0
github.com/urfave/cli/v2 v2.25.1
)
require (
github.com/StackExchange/wmi v1.2.1 // indirect
github.com/andybalholm/brotli v1.0.4 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/ghodss/yaml v1.0.0 // indirect
github.com/go-logr/logr v1.2.3 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/jaypipes/pcidb v1.0.0 // indirect
github.com/klauspost/compress v1.15.9 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.17 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/philhofer/fwd v1.1.1 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
@@ -28,5 +44,11 @@ require (
github.com/valyala/fasthttp v1.44.0 // indirect
github.com/valyala/tcplisten v1.0.0 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
golang.org/x/net v0.8.0 // indirect
golang.org/x/sys v0.6.0 // indirect
golang.org/x/text v0.8.0 // indirect
golang.org/x/tools v0.7.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
howett.net/plist v1.0.0 // indirect
)

69
go.sum
View File

@@ -1,23 +1,59 @@
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4 h1:GkGuqnhDFKlCsT6Bo8sdY00A7rFXCzfU1nBOSS4ZnYM=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ6JsAm9Q1A8I8j9YfNy10bmIfwOiyGyU5wQ=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94 h1:rtrrMvlIq+g0/ltXjDdLeNtz0uc4wJ4Qs15GFU4ba4c=
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640 h1:8SSVbQ3yvq7JnfLCLF4USV0PkQnnduUkaNCv/hHDa3E=
github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead h1:C+lcH1srw+c0qPDx1WF8zjGiiOqoPxVICt7bI1sj5cM=
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04 h1:NPiv7mcIjU71MhEv3v4RRWKSBNXnnCyu6VX4CaaHz2I=
github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8=
github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk=
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
@@ -27,11 +63,18 @@ github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPn
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
@@ -39,14 +82,21 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sashabaranov/go-openai v1.9.0 h1:NoiO++IISxxJ1pRc0n7uZvMGMake0G+FJ1XPwXtprsA=
github.com/sashabaranov/go-openai v1.9.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
github.com/urfave/cli/v2 v2.25.0/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
github.com/urfave/cli/v2 v2.25.1 h1:zw8dSP7ghX0Gmm8vugrs6q9Ku0wzweqPyshy+syu9Gw=
github.com/urfave/cli/v2 v2.25.1/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.44.0 h1:R+gLUhldIsfg1HokMuQjdQ5bh9nuXHPIfvkYUu9eR5Q=
@@ -67,10 +117,13 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -88,11 +141,23 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=

View File

@@ -1,28 +0,0 @@
# Create a PVC containing a model binary, sourced from an arbitrary HTTP server
# (requires https://github.com/kubevirt/containerized-data-importer)
apiVersion: cdi.kubevirt.io/v1beta1
kind: DataVolume
metadata:
name: models
namespace: local-ai
spec:
contentType: archive
source:
http:
url: http://<model_server>/koala-7B-4bit-128g.GGML.tar
secretRef: model-secret
pvc:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: Secret
metadata:
name: model-secret
namespace: local-ai
data:
accessKeyId: <model_server_username_base64_encoded>
secretKey: <model_server_password_base64_encoded>

View File

@@ -1,57 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: local-ai
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: local-ai
namespace: local-ai
labels:
app: local-ai
spec:
selector:
matchLabels:
app: local-ai
replicas: 1
template:
metadata:
labels:
app: local-ai
name: local-ai
spec:
containers:
- name: local-ai
image: quay.io/go-skynet/local-ai:latest
env:
- name: THREADS
value: "14"
- name: CONTEXT_SIZE
value: "512"
- name: MODELS_PATH
value: /models
volumeMounts:
- mountPath: /models
name: models
volumes:
- name: models
persistentVolumeClaim:
claimName: models
---
apiVersion: v1
kind: Service
metadata:
name: local-ai
namespace: local-ai
# If using AWS, you'll need to override the default 60s load balancer idle timeout
# annotations:
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
spec:
selector:
app: local-ai
type: LoadBalancer
ports:
- protocol: TCP
port: 8080
targetPort: 8080

21
main.go
View File

@@ -2,13 +2,12 @@ package main
import (
"os"
"runtime"
api "github.com/go-skynet/LocalAI/api"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/jaypipes/ghw"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/urfave/cli/v2"
)
@@ -21,6 +20,12 @@ func main() {
os.Exit(1)
}
threads := 4
cpu, err := ghw.CPU()
if err == nil {
threads = int(cpu.TotalCores)
}
app := &cli.App{
Name: "LocalAI",
Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.",
@@ -37,7 +42,7 @@ func main() {
Name: "threads",
DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
EnvVars: []string{"THREADS"},
Value: runtime.NumCPU(),
Value: threads,
},
&cli.StringFlag{
Name: "models-path",
@@ -66,18 +71,16 @@ Some of the models compatible are:
- Koala
- GPT4ALL
- GPT4ALL-J
- Cerebras
- Alpaca
- StableLM (ggml quantized)
It uses llama.cpp and gpt4all as backend, supporting all the models supported by both.
It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
`,
UsageText: `local-ai [options]`,
Copyright: "go-skynet authors",
Action: func(ctx *cli.Context) error {
zerolog.SetGlobalLevel(zerolog.InfoLevel)
if ctx.Bool("debug") {
zerolog.SetGlobalLevel(zerolog.DebugLevel)
}
return api.Start(model.NewModelLoader(ctx.String("models-path")), ctx.String("address"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"))
return api.App(model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
},
}

View File

@@ -12,20 +12,32 @@ import (
"github.com/rs/zerolog/log"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
llama "github.com/go-skynet/go-llama.cpp"
)
type ModelLoader struct {
modelPath string
mu sync.Mutex
models map[string]*llama.LLama
gptmodels map[string]*gptj.GPTJ
modelPath string
mu sync.Mutex
models map[string]*llama.LLama
gptmodels map[string]*gptj.GPTJ
gpt2models map[string]*gpt2.GPT2
gptstablelmmodels map[string]*gpt2.StableLM
promptsTemplates map[string]*template.Template
}
func NewModelLoader(modelPath string) *ModelLoader {
return &ModelLoader{modelPath: modelPath, gptmodels: make(map[string]*gptj.GPTJ), models: make(map[string]*llama.LLama), promptsTemplates: make(map[string]*template.Template)}
return &ModelLoader{
modelPath: modelPath,
gpt2models: make(map[string]*gpt2.GPT2),
gptmodels: make(map[string]*gptj.GPTJ),
gptstablelmmodels: make(map[string]*gpt2.StableLM),
models: make(map[string]*llama.LLama),
promptsTemplates: make(map[string]*template.Template),
}
}
func (ml *ModelLoader) ExistsInModelPath(s string) bool {
@@ -98,6 +110,77 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
return nil
}
func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist")
}
if m, ok := ml.gptstablelmmodels[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.modelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model, err := gpt2.NewStableLM(modelFile)
if err != nil {
return nil, err
}
// If there is a prompt template, load it
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
return nil, err
}
ml.gptstablelmmodels[modelName] = model
return model, err
}
func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist")
}
if m, ok := ml.gpt2models[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
}
// TODO: This needs refactoring, it's really bad to have it in here
// Check if we have a GPTStable model loaded instead - if we do we return an error so the API tries with StableLM
if _, ok := ml.gptstablelmmodels[modelName]; ok {
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
return nil, fmt.Errorf("this model is a GPTStableLM one")
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.modelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model, err := gpt2.New(modelFile)
if err != nil {
return nil, err
}
// If there is a prompt template, load it
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
return nil, err
}
ml.gpt2models[modelName] = model
return model, err
}
func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
@@ -112,6 +195,17 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
return m, nil
}
// TODO: This needs refactoring, it's really bad to have it in here
// Check if we have a GPT2 model loaded instead - if we do we return an error so the API tries with GPT2
if _, ok := ml.gpt2models[modelName]; ok {
log.Debug().Msgf("Model is GPT2: %s", modelName)
return nil, fmt.Errorf("this model is a GPT2 one")
}
if _, ok := ml.gptstablelmmodels[modelName]; ok {
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
return nil, fmt.Errorf("this model is a GPTStableLM one")
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.modelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
@@ -152,6 +246,14 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
log.Debug().Msgf("Model is GPTJ: %s", modelName)
return nil, fmt.Errorf("this model is a GPTJ one")
}
if _, ok := ml.gpt2models[modelName]; ok {
log.Debug().Msgf("Model is GPT2: %s", modelName)
return nil, fmt.Errorf("this model is a GPT2 one")
}
if _, ok := ml.gptstablelmmodels[modelName]; ok {
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
return nil, fmt.Errorf("this model is a GPTStableLM one")
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.modelPath, modelName)

17
renovate.json Normal file
View File

@@ -0,0 +1,17 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": [
"config:base"
],
"regexManagers": [
{
"fileMatch": [
"^Makefile$"
],
"matchStrings": [
"#\\s*renovate:\\s*datasource=(?<datasource>.*?) depName=(?<depName>.*?)( datasourceTemplate=(?<datasourceTemplate>.*?))?( packageNameTemplate=(?<packageNameTemplate>.*?))?( depNameTemplate=(?<depNameTemplate>.*?))?( valueTemplate=(?<currentValueTemplate>.*?))?( versioning=(?<versioning>.*?))?\\s+.+_VERSION=(?<currentValue>.*?)\\s"
],
"versioningTemplate": "{{#if versioning}}{{versioning}}{{/if}}"
}
]
}