mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
50 Commits
add/first-
...
v1.6.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c974dad799 | ||
|
|
4eae570ef5 | ||
|
|
67992a7d99 | ||
|
|
0a4899f366 | ||
|
|
1eb02f6c91 | ||
|
|
575874e4fb | ||
|
|
751b7eca62 | ||
|
|
1ae7150810 | ||
|
|
70caf9bf8c | ||
|
|
0b226ac027 | ||
|
|
220d6fd59b | ||
|
|
0a00a4b58e | ||
|
|
156e15a4fa | ||
|
|
271d3f6673 | ||
|
|
fec4ab93c5 | ||
|
|
38a7a7a54d | ||
|
|
0db0704e2c | ||
|
|
88f472e5d2 | ||
|
|
92452d46da | ||
|
|
ac70252d70 | ||
|
|
f6451d2518 | ||
|
|
2473f9d19b | ||
|
|
bc583385a9 | ||
|
|
8286bfbab7 | ||
|
|
d129fabe3b | ||
|
|
2539867247 | ||
|
|
69fedb92d9 | ||
|
|
54b5eadcc4 | ||
|
|
16773e2a35 | ||
|
|
78503c62b7 | ||
|
|
a330c9cee5 | ||
|
|
ff0867996e | ||
|
|
1bf8f996d1 | ||
|
|
52f4d993c1 | ||
|
|
d0ceebc5d7 | ||
|
|
9122af3ae1 | ||
|
|
b8533428bc | ||
|
|
677905334c | ||
|
|
d1d55d29a0 | ||
|
|
e07dba7ad6 | ||
|
|
062f832510 | ||
|
|
d0330bb64b | ||
|
|
91a23ec6ec | ||
|
|
0b000dd043 | ||
|
|
c73ba91a66 | ||
|
|
dfc00f8bc1 | ||
|
|
a18ff9c9b3 | ||
|
|
d0199279ad | ||
|
|
9ede1e12d8 | ||
|
|
c806eae0de |
@@ -1 +1,2 @@
|
||||
models
|
||||
examples/chatbot-ui/models
|
||||
4
.github/workflows/image.yml
vendored
4
.github/workflows/image.yml
vendored
@@ -54,8 +54,8 @@ jobs:
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ secrets.QUAY_USERNAME }}
|
||||
password: ${{ secrets.QUAY_PASSWORD }}
|
||||
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
- name: Build
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/build-push-action@v4
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -10,6 +10,5 @@ local-ai
|
||||
!charts/*
|
||||
|
||||
# Ignore models
|
||||
models/*.bin
|
||||
models/ggml-*
|
||||
models/*
|
||||
test-models/
|
||||
12
Dockerfile
12
Dockerfile
@@ -1,13 +1,9 @@
|
||||
ARG GO_VERSION=1.20
|
||||
ARG DEBIAN_VERSION=11
|
||||
ARG BUILD_TYPE=
|
||||
|
||||
FROM golang:$GO_VERSION as builder
|
||||
FROM golang:$GO_VERSION
|
||||
WORKDIR /build
|
||||
RUN apt-get update && apt-get install -y cmake
|
||||
COPY . .
|
||||
RUN make build
|
||||
|
||||
FROM debian:$DEBIAN_VERSION
|
||||
COPY --from=builder /build/local-ai /usr/bin/local-ai
|
||||
ENTRYPOINT [ "/usr/bin/local-ai" ]
|
||||
RUN make prepare-sources
|
||||
EXPOSE 8080
|
||||
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
||||
|
||||
14
Dockerfile.dev
Normal file
14
Dockerfile.dev
Normal file
@@ -0,0 +1,14 @@
|
||||
ARG GO_VERSION=1.20
|
||||
ARG DEBIAN_VERSION=11
|
||||
ARG BUILD_TYPE=
|
||||
|
||||
FROM golang:$GO_VERSION as builder
|
||||
WORKDIR /build
|
||||
RUN apt-get update && apt-get install -y cmake
|
||||
COPY . .
|
||||
RUN make build
|
||||
|
||||
FROM debian:$DEBIAN_VERSION
|
||||
COPY --from=builder /build/local-ai /usr/bin/local-ai
|
||||
EXPOSE 8080
|
||||
ENTRYPOINT [ "/usr/bin/local-ai" ]
|
||||
71
Makefile
71
Makefile
@@ -3,20 +3,23 @@ GOTEST=$(GOCMD) test
|
||||
GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
# renovate: datasource=github-tags depName=go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=llama.cpp-25d7abb
|
||||
GOLLAMA_VERSION?=llama.cpp-f4cef87
|
||||
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt4all-j.cpp currentValueTemplate=master depNameTemplate=go-gpt4all-j.cpp
|
||||
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
|
||||
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt2.cpp currentValueTemplate=master depNameTemplate=go-gpt2.cpp
|
||||
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
|
||||
|
||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58
|
||||
|
||||
GREEN := $(shell tput -Txterm setaf 2)
|
||||
YELLOW := $(shell tput -Txterm setaf 3)
|
||||
WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
|
||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv
|
||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv
|
||||
|
||||
# Use this if you want to set the default behavior
|
||||
ifndef BUILD_TYPE
|
||||
@@ -33,20 +36,10 @@ endif
|
||||
|
||||
all: help
|
||||
|
||||
## Build:
|
||||
|
||||
build: prepare ## Build the project
|
||||
$(info ${GREEN}I local-ai build info:${RESET})
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
|
||||
|
||||
generic-build: ## Build the project using generic
|
||||
BUILD_TYPE="generic" $(MAKE) build
|
||||
|
||||
## GPT4ALL-J
|
||||
go-gpt4all-j:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
|
||||
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
|
||||
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION) && git submodule update --init --recursive --depth 1
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@@ -57,13 +50,21 @@ go-gpt4all-j:
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
||||
|
||||
## RWKV
|
||||
go-rwkv:
|
||||
git clone --recurse-submodules $(RWKV_REPO) go-rwkv
|
||||
cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
go-rwkv/librwkv.a: go-rwkv
|
||||
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. && cp ggml/src/libggml.a ..
|
||||
|
||||
go-gpt4all-j/libgptj.a: go-gpt4all-j
|
||||
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
|
||||
|
||||
# CEREBRAS GPT
|
||||
go-gpt2:
|
||||
## CEREBRAS GPT
|
||||
go-gpt2:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
|
||||
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION)
|
||||
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@@ -74,37 +75,61 @@ go-gpt2:
|
||||
|
||||
go-gpt2/libgpt2.a: go-gpt2
|
||||
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
|
||||
|
||||
|
||||
go-llama:
|
||||
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||
|
||||
go-llama/libbinding.a: go-llama
|
||||
go-llama/libbinding.a: go-llama
|
||||
$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
||||
|
||||
prepare: go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a replace
|
||||
prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv
|
||||
$(GOCMD) mod download
|
||||
|
||||
## GENERIC
|
||||
rebuild: ## Rebuilds the project
|
||||
$(MAKE) -C go-llama clean
|
||||
$(MAKE) -C go-gpt4all-j clean
|
||||
$(MAKE) -C go-gpt2 clean
|
||||
$(MAKE) -C go-rwkv clean
|
||||
$(MAKE) build
|
||||
|
||||
prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a go-rwkv/librwkv.a replace ## Prepares for building
|
||||
|
||||
clean: ## Remove build related file
|
||||
rm -fr ./go-llama
|
||||
rm -rf ./go-gpt4all-j
|
||||
rm -rf ./go-gpt2
|
||||
rm -rf ./go-rwkv
|
||||
rm -rf $(BINARY_NAME)
|
||||
|
||||
## Run:
|
||||
run: prepare
|
||||
## Build:
|
||||
|
||||
build: prepare ## Build the project
|
||||
$(info ${GREEN}I local-ai build info:${RESET})
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
|
||||
|
||||
generic-build: ## Build the project using generic
|
||||
BUILD_TYPE="generic" $(MAKE) build
|
||||
|
||||
## Run
|
||||
run: prepare ## run local-ai
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
|
||||
|
||||
test-models/testmodel:
|
||||
mkdir test-models
|
||||
wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
|
||||
cp tests/fixtures/* test-models
|
||||
|
||||
test: prepare test-models/testmodel
|
||||
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v ./...
|
||||
cp tests/fixtures/* test-models
|
||||
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v -timeout 30m ./...
|
||||
|
||||
## Help:
|
||||
help: ## Show this help.
|
||||
|
||||
444
README.md
444
README.md
@@ -5,22 +5,34 @@
|
||||
<br>
|
||||
</h1>
|
||||
|
||||
> :warning: This project has been renamed from `llama-cli` to `LocalAI` to reflect the fact that we are focusing on a fast drop-in OpenAI API rather on the CLI interface. We think that there are already many projects that can be used as a CLI interface already, for instance [llama.cpp](https://github.com/ggerganov/llama.cpp) and [gpt4all](https://github.com/nomic-ai/gpt4all). If you are were using `llama-cli` for CLI interactions and want to keep using it, use older versions or please open up an issue - contributions are welcome!
|
||||
|
||||
|
||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml) [](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)
|
||||
|
||||
[](https://discord.gg/uJAeKSAGDy)
|
||||
|
||||
**LocalAI** is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.
|
||||
**LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is licensed under Apache 2.0.
|
||||
|
||||
- OpenAI compatible API
|
||||
- Supports multiple-models
|
||||
- Once loaded the first time, it keep models loaded in memory for faster inference
|
||||
- Support for prompt templates
|
||||
- Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).
|
||||
- Doesn't shell-out, but uses C bindings for a faster inference and better performance.
|
||||
|
||||
Reddit post: https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/
|
||||
LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
|
||||
|
||||
### News
|
||||
|
||||
- 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
|
||||
- 01-05-2023: Support for SSE stream of tokens in `llama.cpp` backends ( https://github.com/go-skynet/LocalAI/pull/152 )
|
||||
|
||||
### Socials and community chatter
|
||||
|
||||
- Follow [@LocalAI_API](https://twitter.com/LocalAI_API) on twitter.
|
||||
|
||||
- [Reddit post](https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/) about LocalAI.
|
||||
|
||||
- [Hacker news post](https://news.ycombinator.com/item?id=35726934) - help us out by voting if you like this project.
|
||||
|
||||
- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters.
|
||||
|
||||
## Model compatibility
|
||||
|
||||
@@ -33,11 +45,26 @@ Tested with:
|
||||
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin)
|
||||
- Koala
|
||||
- [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
|
||||
- [RWKV](https://github.com/BlinkDL/RWKV-LM) models with [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
|
||||
|
||||
It should also be compatible with StableLM and GPTNeoX ggml models (untested)
|
||||
|
||||
Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
|
||||
|
||||
### RWKV
|
||||
|
||||
<details>
|
||||
|
||||
For `rwkv` models, you need to put also the associated tokenizer along with the ggml model:
|
||||
|
||||
```
|
||||
ls models
|
||||
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
|
||||
36464543 -rw-r--r-- 1 mudler mudler 2.4M May 3 10:51 rwkv_small.tokenizer.json
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Usage
|
||||
|
||||
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
|
||||
@@ -50,6 +77,9 @@ git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# copy your models to models/
|
||||
cp your-model.bin models/
|
||||
|
||||
@@ -80,6 +110,9 @@ git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
@@ -106,15 +139,113 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
|
||||
```
|
||||
</details>
|
||||
|
||||
## Prompt templates
|
||||
To build locally, run `make build` (see below).
|
||||
|
||||
### Other examples
|
||||
|
||||

|
||||
|
||||
To see other examples on how to integrate with other projects for instance chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).
|
||||
|
||||
|
||||
### Advanced configuration
|
||||
|
||||
LocalAI can be configured to serve user-defined models with a set of default parameters and templates.
|
||||
|
||||
<details>
|
||||
|
||||
You can create multiple `yaml` files in the models path or either specify a single YAML configuration file.
|
||||
Consider the following `models` folder in the `example/chatbot-ui`:
|
||||
|
||||
```
|
||||
base ❯ ls -liah examples/chatbot-ui/models
|
||||
36487587 drwxr-xr-x 2 mudler mudler 4.0K May 3 12:27 .
|
||||
36487586 drwxr-xr-x 3 mudler mudler 4.0K May 3 10:42 ..
|
||||
36465214 -rw-r--r-- 1 mudler mudler 10 Apr 27 07:46 completion.tmpl
|
||||
36464855 -rw-r--r-- 1 mudler mudler 3.6G Apr 27 00:08 ggml-gpt4all-j
|
||||
36464537 -rw-r--r-- 1 mudler mudler 245 May 3 10:42 gpt-3.5-turbo.yaml
|
||||
36467388 -rw-r--r-- 1 mudler mudler 180 Apr 27 07:46 gpt4all.tmpl
|
||||
```
|
||||
|
||||
In the `gpt-3.5-turbo.yaml` file it is defined the `gpt-3.5-turbo` model which is an alias to use `gpt4all-j` with pre-defined options.
|
||||
|
||||
For instance, consider the following that declares `gpt-3.5-turbo` backed by the `ggml-gpt4all-j` model:
|
||||
|
||||
```yaml
|
||||
name: gpt-3.5-turbo
|
||||
# Default model parameters
|
||||
parameters:
|
||||
# Relative to the models path
|
||||
model: ggml-gpt4all-j
|
||||
# temperature
|
||||
temperature: 0.3
|
||||
# all the OpenAI request options here..
|
||||
|
||||
# Default context size
|
||||
context_size: 512
|
||||
threads: 10
|
||||
# Define a backend (optional). By default it will try to guess the backend the first time the model is interacted with.
|
||||
backend: gptj # available: llama, stablelm, gpt2, gptj rwkv
|
||||
# stopwords (if supported by the backend)
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
# define chat roles
|
||||
roles:
|
||||
user: "HUMAN:"
|
||||
system: "GPT:"
|
||||
template:
|
||||
# template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
|
||||
completion: completion
|
||||
chat: ggml-gpt4all-j
|
||||
```
|
||||
|
||||
Specifying a `config-file` via CLI allows to declare models in a single file as a list, for instance:
|
||||
|
||||
```yaml
|
||||
- name: list1
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
roles:
|
||||
user: "HUMAN:"
|
||||
system: "GPT:"
|
||||
template:
|
||||
completion: completion
|
||||
chat: ggml-gpt4all-j
|
||||
- name: list2
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
roles:
|
||||
user: "HUMAN:"
|
||||
system: "GPT:"
|
||||
template:
|
||||
completion: completion
|
||||
chat: ggml-gpt4all-j
|
||||
```
|
||||
|
||||
See also [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) as an example on how to use config files.
|
||||
|
||||
</details>
|
||||
|
||||
### Prompt templates
|
||||
|
||||
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
|
||||
|
||||
<details>
|
||||
You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
|
||||
You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibling file, `foo.bin.tmpl` which will be used as a default prompt and can be used with alpaca:
|
||||
|
||||
```
|
||||
Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
||||
The below instruction describes a task. Write a response that appropriately completes the request.
|
||||
|
||||
### Instruction:
|
||||
{{.Input}}
|
||||
@@ -122,13 +253,53 @@ Below is an instruction that describes a task. Write a response that appropriate
|
||||
### Response:
|
||||
```
|
||||
|
||||
See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for most popular models.
|
||||
See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for some of the most popular models.
|
||||
|
||||
|
||||
For the edit endpoint, an example template for alpaca-based models can be:
|
||||
|
||||
```yaml
|
||||
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
||||
|
||||
### Instruction:
|
||||
{{.Instruction}}
|
||||
|
||||
### Input:
|
||||
{{.Input}}
|
||||
|
||||
### Response:
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## API
|
||||
### CLI
|
||||
|
||||
`LocalAI` provides an API for running text generation as a service, that follows the OpenAI reference and can be used as a drop-in. The models once loaded the first time will be kept in memory.
|
||||
You can control LocalAI with command line arguments, to specify a binding address, or the number of threads.
|
||||
|
||||
<details>
|
||||
|
||||
Usage:
|
||||
|
||||
```
|
||||
local-ai --models-path <model_path> [--address <address>] [--threads <num_threads>]
|
||||
```
|
||||
|
||||
| Parameter | Environment Variable | Default Value | Description |
|
||||
| ------------ | -------------------- | ------------- | -------------------------------------- |
|
||||
| models-path | MODELS_PATH | | The path where you have models (ending with `.bin`). |
|
||||
| threads | THREADS | Number of Physical cores | The number of threads to use for text generation. |
|
||||
| address | ADDRESS | :8080 | The address and port to listen on. |
|
||||
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
|
||||
| debug | DEBUG | false | Enable debug mode. |
|
||||
| config-file | CONFIG_FILE | empty | Path to a LocalAI config file. |
|
||||
|
||||
</details>
|
||||
|
||||
## Setup
|
||||
|
||||
Currently LocalAI comes as a container image and can be used with docker or a container engine of choice. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
|
||||
|
||||
### Docker
|
||||
|
||||
<details>
|
||||
Example of starting the API with `docker`:
|
||||
@@ -137,7 +308,7 @@ Example of starting the API with `docker`:
|
||||
docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:latest --models-path /path/to/models --context-size 700 --threads 4
|
||||
```
|
||||
|
||||
And you'll see:
|
||||
You should see:
|
||||
```
|
||||
┌───────────────────────────────────────────────────┐
|
||||
│ Fiber v2.42.0 │
|
||||
@@ -149,27 +320,123 @@ And you'll see:
|
||||
└───────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
You can control the API server options with command line arguments:
|
||||
</details>
|
||||
|
||||
### Build locally
|
||||
|
||||
<details>
|
||||
|
||||
In order to build the `LocalAI` container image locally you can use `docker`:
|
||||
|
||||
```
|
||||
local-api --models-path <model_path> [--address <address>] [--threads <num_threads>]
|
||||
# build the image
|
||||
docker build -t LocalAI .
|
||||
docker run LocalAI
|
||||
```
|
||||
|
||||
The API takes takes the following parameters:
|
||||
Or you can build the binary with `make`:
|
||||
|
||||
| Parameter | Environment Variable | Default Value | Description |
|
||||
| ------------ | -------------------- | ------------- | -------------------------------------- |
|
||||
| models-path | MODELS_PATH | | The path where you have models (ending with `.bin`). |
|
||||
| threads | THREADS | Number of Physical cores | The number of threads to use for text generation. |
|
||||
| address | ADDRESS | :8080 | The address and port to listen on. |
|
||||
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
|
||||
| debug | DEBUG | false | Enable debug mode. |
|
||||
|
||||
Once the server is running, you can start making requests to it using HTTP, using the OpenAI API.
|
||||
```
|
||||
make build
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### Supported OpenAI API endpoints
|
||||
### Build on mac
|
||||
|
||||
Building on Mac (M1 or M2) works, but you may need to install some prerequisites using `brew`.
|
||||
|
||||
<details>
|
||||
|
||||
The below has been tested by one mac user and found to work. Note that this doesn't use docker to run the server:
|
||||
|
||||
```
|
||||
# install build dependencies
|
||||
brew install cmake
|
||||
brew install go
|
||||
|
||||
# clone the repo
|
||||
git clone https://github.com/go-skynet/LocalAI.git
|
||||
|
||||
cd LocalAI
|
||||
|
||||
# build the binary
|
||||
make build
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# Use a template from the examples
|
||||
cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
|
||||
|
||||
# Run LocalAI
|
||||
./local-ai --models-path ./models/ --debug
|
||||
|
||||
# Now API is accessible at localhost:8080
|
||||
curl http://localhost:8080/v1/models
|
||||
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "ggml-gpt4all-j",
|
||||
"messages": [{"role": "user", "content": "How are you?"}],
|
||||
"temperature": 0.9
|
||||
}'
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### Windows compatibility
|
||||
|
||||
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
|
||||
|
||||
### Run LocalAI in Kubernetes
|
||||
|
||||
LocalAI can be installed inside Kubernetes with helm.
|
||||
|
||||
<details>
|
||||
|
||||
1. Add the helm repo
|
||||
```bash
|
||||
helm repo add go-skynet https://go-skynet.github.io/helm-charts/
|
||||
```
|
||||
1. Create a values files with your settings:
|
||||
```bash
|
||||
cat <<EOF > values.yaml
|
||||
deployment:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
env:
|
||||
threads: 4
|
||||
contextSize: 1024
|
||||
modelsPath: "/models"
|
||||
# Optionally create a PVC, mount the PV to the LocalAI Deployment,
|
||||
# and download a model to prepopulate the models directory
|
||||
modelsVolume:
|
||||
enabled: true
|
||||
url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
|
||||
pvc:
|
||||
size: 6Gi
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
auth:
|
||||
# Optional value for HTTP basic access authentication header
|
||||
basic: "" # 'username:password' base64 encoded
|
||||
service:
|
||||
type: ClusterIP
|
||||
annotations: {}
|
||||
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
|
||||
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
||||
EOF
|
||||
```
|
||||
3. Install the helm chart:
|
||||
```bash
|
||||
helm repo update
|
||||
helm install local-ai go-skynet/local-ai -f values.yaml
|
||||
```
|
||||
|
||||
Check out also the [helm chart repository on GitHub](https://github.com/go-skynet/helm-charts).
|
||||
|
||||
</details>
|
||||
|
||||
## Supported OpenAI API endpoints
|
||||
|
||||
You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create).
|
||||
|
||||
@@ -177,10 +444,10 @@ Following the list of endpoints/parameters supported.
|
||||
|
||||
Note:
|
||||
|
||||
- You can also specify the model a part of the OpenAI token.
|
||||
- You can also specify the model as part of the OpenAI token.
|
||||
- If only one model is available, the API will use it for all the requests.
|
||||
|
||||
#### Chat completions
|
||||
### Chat completions
|
||||
|
||||
<details>
|
||||
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
|
||||
@@ -196,10 +463,30 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
|
||||
Available additional parameters: `top_p`, `top_k`, `max_tokens`
|
||||
</details>
|
||||
|
||||
#### Completions
|
||||
### Edit completions
|
||||
|
||||
<details>
|
||||
For example, to generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
|
||||
To generate an edit completion you can send a POST request to the `/v1/edits` endpoint with the instruction as the request body:
|
||||
|
||||
```
|
||||
curl http://localhost:8080/v1/edits -H "Content-Type: application/json" -d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"instruction": "rephrase",
|
||||
"input": "Black cat jumped out of the window",
|
||||
"temperature": 0.7
|
||||
}'
|
||||
```
|
||||
|
||||
Available additional parameters: `top_p`, `top_k`, `max_tokens`.
|
||||
|
||||
</details>
|
||||
|
||||
### Completions
|
||||
|
||||
<details>
|
||||
|
||||
To generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as per the request body:
|
||||
|
||||
```
|
||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
@@ -212,7 +499,7 @@ Available additional parameters: `top_p`, `top_k`, `max_tokens`
|
||||
|
||||
</details>
|
||||
|
||||
#### List models
|
||||
### List models
|
||||
|
||||
<details>
|
||||
You can list all the models available with:
|
||||
@@ -223,63 +510,6 @@ curl http://localhost:8080/v1/models
|
||||
|
||||
</details>
|
||||
|
||||
## Using other models
|
||||
|
||||
gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
|
||||
|
||||
```bash
|
||||
wget -O tokenizer.model https://huggingface.co/decapoda-research/llama-30b-hf/resolve/main/tokenizer.model
|
||||
mkdir models
|
||||
cp gpt4all.. models/
|
||||
git clone https://gist.github.com/eiz/828bddec6162a023114ce19146cb2b82
|
||||
pip install sentencepiece
|
||||
python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
|
||||
# There will be a new model with the ".tmp" extension, you have to use that one!
|
||||
```
|
||||
|
||||
|
||||
## Helm Chart Installation (run LocalAI in Kubernetes)
|
||||
The local-ai Helm chart supports two options for the LocalAI server's models directory:
|
||||
1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
|
||||
|
||||
Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
|
||||
|
||||
2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
|
||||
|
||||
First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
|
||||
```bash
|
||||
helm install local-ai charts/local-ai -n local-ai --create-namespace
|
||||
```
|
||||
Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
|
||||
|
||||
Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
|
||||
```bash
|
||||
helm upgrade local-ai -n local-ai charts/local-ai
|
||||
```
|
||||
This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.
|
||||
|
||||
## Windows compatibility
|
||||
|
||||
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
|
||||
|
||||
## Build locally
|
||||
|
||||
Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
|
||||
|
||||
In order to build the `LocalAI` container image locally you can use `docker`:
|
||||
|
||||
```
|
||||
# build the image
|
||||
docker build -t LocalAI .
|
||||
docker run LocalAI
|
||||
```
|
||||
|
||||
Or build the binary with `make`:
|
||||
|
||||
```
|
||||
make build
|
||||
```
|
||||
|
||||
## Frequently asked questions
|
||||
|
||||
Here are answers to some of the most common questions.
|
||||
@@ -335,24 +565,54 @@ AutoGPT currently doesn't allow to set a different API URL, but there is a PR op
|
||||
|
||||
</details>
|
||||
|
||||
## Projects already using LocalAI to run local models
|
||||
|
||||
Feel free to open up a PR to get your project listed!
|
||||
|
||||
- [Kairos](https://github.com/kairos-io/kairos)
|
||||
- [k8sgpt](https://github.com/k8sgpt-ai/k8sgpt#running-local-models)
|
||||
|
||||
## Blog posts and other articles
|
||||
|
||||
- https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65
|
||||
- https://kairos.io/docs/examples/localai/
|
||||
|
||||
## Short-term roadmap
|
||||
|
||||
- [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)
|
||||
- [ ] Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
|
||||
- [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
|
||||
- [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351) and [gpt4all](https://github.com/go-skynet/LocalAI/issues/85)
|
||||
- [x] Multi-model support
|
||||
- [ ] Have a webUI!
|
||||
- [ ] Allow configuration of defaults for models.
|
||||
- [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models.
|
||||
- [x] Have a webUI!
|
||||
- [x] Allow configuration of defaults for models.
|
||||
- [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models, directly from the webui.
|
||||
|
||||
## Star history
|
||||
|
||||
[](https://star-history.com/#go-skynet/LocalAI&Date)
|
||||
|
||||
## License
|
||||
|
||||
LocalAI is a community-driven project. It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
|
||||
|
||||
MIT
|
||||
|
||||
## Golang bindings used
|
||||
|
||||
- [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
|
||||
- [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp)
|
||||
- [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp)
|
||||
- [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp)
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp)
|
||||
- https://github.com/tatsu-lab/stanford_alpaca
|
||||
- https://github.com/cornelk/llama-go for the initial ideas
|
||||
- https://github.com/antimatter15/alpaca.cpp for the light model version (this is compatible and tested only with that checkpoint model!)
|
||||
|
||||
## Contributors
|
||||
|
||||
<a href="https://github.com/go-skynet/LocalAI/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=go-skynet/LocalAI" />
|
||||
</a>
|
||||
|
||||
412
api/api.go
412
api/api.go
@@ -1,16 +1,9 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
||||
@@ -18,375 +11,7 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// APIError provides error information returned by the OpenAI API.
|
||||
type APIError struct {
|
||||
Code any `json:"code,omitempty"`
|
||||
Message string `json:"message"`
|
||||
Param *string `json:"param,omitempty"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type ErrorResponse struct {
|
||||
Error *APIError `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type OpenAIResponse struct {
|
||||
Created int `json:"created,omitempty"`
|
||||
Object string `json:"chat.completion,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Choices []Choice `json:"choices,omitempty"`
|
||||
}
|
||||
|
||||
type Choice struct {
|
||||
Index int `json:"index,omitempty"`
|
||||
FinishReason string `json:"finish_reason,omitempty"`
|
||||
Message *Message `json:"message,omitempty"`
|
||||
Text string `json:"text,omitempty"`
|
||||
}
|
||||
|
||||
type Message struct {
|
||||
Role string `json:"role,omitempty"`
|
||||
Content string `json:"content,omitempty"`
|
||||
}
|
||||
|
||||
type OpenAIModel struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
}
|
||||
|
||||
type OpenAIRequest struct {
|
||||
Model string `json:"model"`
|
||||
|
||||
// Prompt is read only by completion API calls
|
||||
Prompt string `json:"prompt"`
|
||||
|
||||
Stop string `json:"stop"`
|
||||
|
||||
// Messages is read only by chat/completion API calls
|
||||
Messages []Message `json:"messages"`
|
||||
|
||||
Echo bool `json:"echo"`
|
||||
// Common options between all the API calls
|
||||
TopP float64 `json:"top_p"`
|
||||
TopK int `json:"top_k"`
|
||||
Temperature float64 `json:"temperature"`
|
||||
Maxtokens int `json:"max_tokens"`
|
||||
|
||||
N int `json:"n"`
|
||||
|
||||
// Custom parameters - not present in the OpenAI API
|
||||
Batch int `json:"batch"`
|
||||
F16 bool `json:"f16kv"`
|
||||
IgnoreEOS bool `json:"ignore_eos"`
|
||||
RepeatPenalty float64 `json:"repeat_penalty"`
|
||||
Keep int `json:"n_keep"`
|
||||
|
||||
Seed int `json:"seed"`
|
||||
}
|
||||
|
||||
// https://platform.openai.com/docs/api-reference/completions
|
||||
func openAIEndpoint(chat, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
var err error
|
||||
var model *llama.LLama
|
||||
var gptModel *gptj.GPTJ
|
||||
var gpt2Model *gpt2.GPT2
|
||||
var stableLMModel *gpt2.StableLM
|
||||
|
||||
input := new(OpenAIRequest)
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
}
|
||||
modelFile := input.Model
|
||||
received, _ := json.Marshal(input)
|
||||
|
||||
log.Debug().Msgf("Request received: %s", string(received))
|
||||
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
|
||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||
|
||||
// If no model was specified, take the first available
|
||||
if modelFile == "" {
|
||||
models, _ := loader.ListModels()
|
||||
if len(models) > 0 {
|
||||
modelFile = models[0]
|
||||
log.Debug().Msgf("No model specified, using: %s", modelFile)
|
||||
}
|
||||
}
|
||||
|
||||
// If no model is found or specified, we bail out
|
||||
if modelFile == "" && !bearerExists {
|
||||
return fmt.Errorf("no model specified")
|
||||
}
|
||||
|
||||
// If a model is found in bearer token takes precedence
|
||||
if bearerExists {
|
||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||
modelFile = bearer
|
||||
}
|
||||
|
||||
// Try to load the model
|
||||
var llamaerr, gpt2err, gptjerr, stableerr error
|
||||
llamaOpts := []llama.ModelOption{}
|
||||
if ctx != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetContext(ctx))
|
||||
}
|
||||
if f16 {
|
||||
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
||||
}
|
||||
|
||||
// TODO: this is ugly, better identifying the model somehow! however, it is a good stab for a first implementation..
|
||||
model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...)
|
||||
if llamaerr != nil {
|
||||
gptModel, gptjerr = loader.LoadGPTJModel(modelFile)
|
||||
if gptjerr != nil {
|
||||
gpt2Model, gpt2err = loader.LoadGPT2Model(modelFile)
|
||||
if gpt2err != nil {
|
||||
stableLMModel, stableerr = loader.LoadStableLMModel(modelFile)
|
||||
if stableerr != nil {
|
||||
return fmt.Errorf("llama: %s gpt: %s gpt2: %s stableLM: %s", llamaerr.Error(), gptjerr.Error(), gpt2err.Error(), stableerr.Error()) // llama failed first, so we want to catch both errors
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
mutexMap.Lock()
|
||||
l, ok := mutexes[modelFile]
|
||||
if !ok {
|
||||
m := &sync.Mutex{}
|
||||
mutexes[modelFile] = m
|
||||
l = m
|
||||
}
|
||||
mutexMap.Unlock()
|
||||
l.Lock()
|
||||
defer l.Unlock()
|
||||
|
||||
// Set the parameters for the language model prediction
|
||||
topP := input.TopP
|
||||
if topP == 0 {
|
||||
topP = 0.7
|
||||
}
|
||||
topK := input.TopK
|
||||
if topK == 0 {
|
||||
topK = 80
|
||||
}
|
||||
|
||||
temperature := input.Temperature
|
||||
if temperature == 0 {
|
||||
temperature = 0.9
|
||||
}
|
||||
|
||||
tokens := input.Maxtokens
|
||||
if tokens == 0 {
|
||||
tokens = 512
|
||||
}
|
||||
|
||||
predInput := input.Prompt
|
||||
if chat {
|
||||
mess := []string{}
|
||||
// TODO: encode roles
|
||||
for _, i := range input.Messages {
|
||||
mess = append(mess, i.Content)
|
||||
}
|
||||
|
||||
predInput = strings.Join(mess, "\n")
|
||||
}
|
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := loader.TemplatePrefix(modelFile, struct {
|
||||
Input string
|
||||
}{Input: predInput})
|
||||
if err == nil {
|
||||
predInput = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||
}
|
||||
|
||||
result := []Choice{}
|
||||
|
||||
n := input.N
|
||||
|
||||
if input.N == 0 {
|
||||
n = 1
|
||||
}
|
||||
|
||||
var predFunc func() (string, error)
|
||||
switch {
|
||||
case stableLMModel != nil:
|
||||
predFunc = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(temperature),
|
||||
gpt2.SetTopP(topP),
|
||||
gpt2.SetTopK(topK),
|
||||
gpt2.SetTokens(tokens),
|
||||
gpt2.SetThreads(threads),
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
|
||||
}
|
||||
|
||||
if input.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
|
||||
}
|
||||
|
||||
return stableLMModel.Predict(
|
||||
predInput,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case gpt2Model != nil:
|
||||
predFunc = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(temperature),
|
||||
gpt2.SetTopP(topP),
|
||||
gpt2.SetTopK(topK),
|
||||
gpt2.SetTokens(tokens),
|
||||
gpt2.SetThreads(threads),
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
|
||||
}
|
||||
|
||||
if input.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
|
||||
}
|
||||
|
||||
return gpt2Model.Predict(
|
||||
predInput,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case gptModel != nil:
|
||||
predFunc = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gptj.PredictOption{
|
||||
gptj.SetTemperature(temperature),
|
||||
gptj.SetTopP(topP),
|
||||
gptj.SetTopK(topK),
|
||||
gptj.SetTokens(tokens),
|
||||
gptj.SetThreads(threads),
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gptj.SetBatch(input.Batch))
|
||||
}
|
||||
|
||||
if input.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gptj.SetSeed(input.Seed))
|
||||
}
|
||||
|
||||
return gptModel.Predict(
|
||||
predInput,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case model != nil:
|
||||
predFunc = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []llama.PredictOption{
|
||||
llama.SetTemperature(temperature),
|
||||
llama.SetTopP(topP),
|
||||
llama.SetTopK(topK),
|
||||
llama.SetTokens(tokens),
|
||||
llama.SetThreads(threads),
|
||||
}
|
||||
|
||||
if debug {
|
||||
predictOptions = append(predictOptions, llama.Debug)
|
||||
}
|
||||
|
||||
if input.Stop != "" {
|
||||
predictOptions = append(predictOptions, llama.SetStopWords(input.Stop))
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetPenalty(input.RepeatPenalty))
|
||||
}
|
||||
|
||||
if input.Keep != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetNKeep(input.Keep))
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetBatch(input.Batch))
|
||||
}
|
||||
|
||||
if input.F16 {
|
||||
predictOptions = append(predictOptions, llama.EnableF16KV)
|
||||
}
|
||||
|
||||
if input.IgnoreEOS {
|
||||
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
||||
}
|
||||
|
||||
if input.Seed != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetSeed(input.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
predInput,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
prediction, err := predFunc()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if input.Echo {
|
||||
prediction = predInput + prediction
|
||||
}
|
||||
|
||||
if chat {
|
||||
result = append(result, Choice{Message: &Message{Role: "assistant", Content: prediction}})
|
||||
} else {
|
||||
result = append(result, Choice{Text: prediction})
|
||||
}
|
||||
}
|
||||
|
||||
jsonResult, _ := json.Marshal(result)
|
||||
log.Debug().Msgf("Response: %s", jsonResult)
|
||||
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func listModels(loader *model.ModelLoader) func(ctx *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
models, err := loader.ListModels()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dataModels := []OpenAIModel{}
|
||||
for _, m := range models {
|
||||
dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
|
||||
}
|
||||
return c.JSON(struct {
|
||||
Object string `json:"object"`
|
||||
Data []OpenAIModel `json:"data"`
|
||||
}{
|
||||
Object: "list",
|
||||
Data: dataModels,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func App(loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
|
||||
func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
|
||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||
if debug {
|
||||
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
||||
@@ -415,23 +40,38 @@ func App(loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disab
|
||||
},
|
||||
})
|
||||
|
||||
cm := make(ConfigMerger)
|
||||
if err := cm.LoadConfigs(loader.ModelPath); err != nil {
|
||||
log.Error().Msgf("error loading config files: %s", err.Error())
|
||||
}
|
||||
|
||||
if configFile != "" {
|
||||
if err := cm.LoadConfigFile(configFile); err != nil {
|
||||
log.Error().Msgf("error loading config file: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if debug {
|
||||
for k, v := range cm {
|
||||
log.Debug().Msgf("Model: %s (config: %+v)", k, v)
|
||||
}
|
||||
}
|
||||
// Default middleware config
|
||||
app.Use(recover.New())
|
||||
app.Use(cors.New())
|
||||
|
||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
mu := map[string]*sync.Mutex{}
|
||||
var mumutex = &sync.Mutex{}
|
||||
|
||||
// openAI compatible API endpoint
|
||||
app.Post("/v1/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/v1/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
app.Post("/v1/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/v1/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
app.Get("/v1/models", listModels(loader))
|
||||
app.Get("/models", listModels(loader))
|
||||
app.Post("/v1/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
app.Get("/v1/models", listModels(loader, cm))
|
||||
app.Get("/models", listModels(loader, cm))
|
||||
|
||||
return app
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
openaigo "github.com/otiai10/openaigo"
|
||||
"github.com/sashabaranov/go-openai"
|
||||
)
|
||||
|
||||
@@ -18,15 +19,19 @@ var _ = Describe("API test", func() {
|
||||
var app *fiber.App
|
||||
var modelLoader *model.ModelLoader
|
||||
var client *openai.Client
|
||||
var client2 *openaigo.Client
|
||||
Context("API query", func() {
|
||||
BeforeEach(func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
app = App(modelLoader, 1, 512, false, false, true)
|
||||
app = App("", modelLoader, 1, 512, false, true, true)
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
|
||||
defaultConfig := openai.DefaultConfig("")
|
||||
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
|
||||
|
||||
client2 = openaigo.NewClient("")
|
||||
client2.BaseURL = defaultConfig.BaseURL
|
||||
|
||||
// Wait for API to be ready
|
||||
client = openai.NewClientWithConfig(defaultConfig)
|
||||
Eventually(func() error {
|
||||
@@ -40,7 +45,7 @@ var _ = Describe("API test", func() {
|
||||
It("returns the models list", func() {
|
||||
models, err := client.ListModels(context.TODO())
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models.Models)).To(Equal(1))
|
||||
Expect(len(models.Models)).To(Equal(3))
|
||||
Expect(models.Models[0].ID).To(Equal("testmodel"))
|
||||
})
|
||||
It("can generate completions", func() {
|
||||
@@ -49,10 +54,85 @@ var _ = Describe("API test", func() {
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||
})
|
||||
|
||||
It("can generate chat completions ", func() {
|
||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||
})
|
||||
|
||||
It("can generate completions from model configs", func() {
|
||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||
})
|
||||
|
||||
It("can generate chat completions from model configs", func() {
|
||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||
})
|
||||
|
||||
It("returns errors", func() {
|
||||
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: llama: model does not exist"))
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 5 errors occurred:"))
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Context("Config file", func() {
|
||||
BeforeEach(func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true)
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
|
||||
defaultConfig := openai.DefaultConfig("")
|
||||
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
|
||||
client2 = openaigo.NewClient("")
|
||||
client2.BaseURL = defaultConfig.BaseURL
|
||||
// Wait for API to be ready
|
||||
client = openai.NewClientWithConfig(defaultConfig)
|
||||
Eventually(func() error {
|
||||
_, err := client.ListModels(context.TODO())
|
||||
return err
|
||||
}, "2m").ShouldNot(HaveOccurred())
|
||||
})
|
||||
AfterEach(func() {
|
||||
app.Shutdown()
|
||||
})
|
||||
It("can generate chat completions from config file", func() {
|
||||
|
||||
models, err := client.ListModels(context.TODO())
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models.Models)).To(Equal(5))
|
||||
Expect(models.Models[0].ID).To(Equal("testmodel"))
|
||||
})
|
||||
It("can generate chat completions from config file", func() {
|
||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||
})
|
||||
It("can generate chat completions from config file", func() {
|
||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||
})
|
||||
It("can generate edit completions from config file", func() {
|
||||
request := openaigo.EditCreateRequestBody{
|
||||
Model: "list2",
|
||||
Instruction: "foo",
|
||||
Input: "bar",
|
||||
}
|
||||
resp, err := client2.CreateEdit(context.Background(), request)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
102
api/config.go
Normal file
102
api/config.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
OpenAIRequest `yaml:"parameters"`
|
||||
Name string `yaml:"name"`
|
||||
StopWords []string `yaml:"stopwords"`
|
||||
Cutstrings []string `yaml:"cutstrings"`
|
||||
TrimSpace []string `yaml:"trimspace"`
|
||||
ContextSize int `yaml:"context_size"`
|
||||
F16 bool `yaml:"f16"`
|
||||
Threads int `yaml:"threads"`
|
||||
Debug bool `yaml:"debug"`
|
||||
Roles map[string]string `yaml:"roles"`
|
||||
Backend string `yaml:"backend"`
|
||||
TemplateConfig TemplateConfig `yaml:"template"`
|
||||
}
|
||||
|
||||
type TemplateConfig struct {
|
||||
Completion string `yaml:"completion"`
|
||||
Chat string `yaml:"chat"`
|
||||
Edit string `yaml:"edit"`
|
||||
}
|
||||
|
||||
type ConfigMerger map[string]Config
|
||||
|
||||
func ReadConfigFile(file string) ([]*Config, error) {
|
||||
c := &[]*Config{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
|
||||
return *c, nil
|
||||
}
|
||||
|
||||
func ReadConfig(file string) (*Config, error) {
|
||||
c := &Config{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
if err := yaml.Unmarshal(f, c); err != nil {
|
||||
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
|
||||
}
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (cm ConfigMerger) LoadConfigFile(file string) error {
|
||||
c, err := ReadConfigFile(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot load config file: %w", err)
|
||||
}
|
||||
|
||||
for _, cc := range c {
|
||||
cm[cc.Name] = *cc
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm ConfigMerger) LoadConfig(file string) error {
|
||||
c, err := ReadConfig(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
|
||||
cm[c.Name] = *c
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm ConfigMerger) LoadConfigs(path string) error {
|
||||
files, err := ioutil.ReadDir(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
// Skip templates, YAML and .keep files
|
||||
if !strings.Contains(file.Name(), ".yaml") {
|
||||
continue
|
||||
}
|
||||
c, err := ReadConfig(filepath.Join(path, file.Name()))
|
||||
if err == nil {
|
||||
cm[c.Name] = *c
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
486
api/openai.go
Normal file
486
api/openai.go
Normal file
@@ -0,0 +1,486 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
)
|
||||
|
||||
// APIError provides error information returned by the OpenAI API.
|
||||
type APIError struct {
|
||||
Code any `json:"code,omitempty"`
|
||||
Message string `json:"message"`
|
||||
Param *string `json:"param,omitempty"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type ErrorResponse struct {
|
||||
Error *APIError `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type OpenAIUsage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
}
|
||||
|
||||
type OpenAIResponse struct {
|
||||
Created int `json:"created,omitempty"`
|
||||
Object string `json:"object,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Choices []Choice `json:"choices,omitempty"`
|
||||
Usage OpenAIUsage `json:"usage"`
|
||||
}
|
||||
|
||||
type Choice struct {
|
||||
Index int `json:"index,omitempty"`
|
||||
FinishReason string `json:"finish_reason,omitempty"`
|
||||
Message *Message `json:"message,omitempty"`
|
||||
Delta *Message `json:"delta,omitempty"`
|
||||
Text string `json:"text,omitempty"`
|
||||
}
|
||||
|
||||
type Message struct {
|
||||
Role string `json:"role,omitempty" yaml:"role"`
|
||||
Content string `json:"content,omitempty" yaml:"content"`
|
||||
}
|
||||
|
||||
type OpenAIModel struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
}
|
||||
|
||||
type OpenAIRequest struct {
|
||||
Model string `json:"model" yaml:"model"`
|
||||
|
||||
// Prompt is read only by completion API calls
|
||||
Prompt interface{} `json:"prompt" yaml:"prompt"`
|
||||
|
||||
// Edit endpoint
|
||||
Instruction string `json:"instruction" yaml:"instruction"`
|
||||
Input string `json:"input" yaml:"input"`
|
||||
|
||||
Stop interface{} `json:"stop" yaml:"stop"`
|
||||
|
||||
// Messages is read only by chat/completion API calls
|
||||
Messages []Message `json:"messages" yaml:"messages"`
|
||||
|
||||
Stream bool `json:"stream"`
|
||||
Echo bool `json:"echo"`
|
||||
// Common options between all the API calls
|
||||
TopP float64 `json:"top_p" yaml:"top_p"`
|
||||
TopK int `json:"top_k" yaml:"top_k"`
|
||||
Temperature float64 `json:"temperature" yaml:"temperature"`
|
||||
Maxtokens int `json:"max_tokens" yaml:"max_tokens"`
|
||||
|
||||
N int `json:"n"`
|
||||
|
||||
// Custom parameters - not present in the OpenAI API
|
||||
Batch int `json:"batch" yaml:"batch"`
|
||||
F16 bool `json:"f16" yaml:"f16"`
|
||||
IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"`
|
||||
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
|
||||
Keep int `json:"n_keep" yaml:"n_keep"`
|
||||
|
||||
Seed int `json:"seed" yaml:"seed"`
|
||||
}
|
||||
|
||||
func defaultRequest(modelFile string) OpenAIRequest {
|
||||
return OpenAIRequest{
|
||||
TopP: 0.7,
|
||||
TopK: 80,
|
||||
Maxtokens: 512,
|
||||
Temperature: 0.9,
|
||||
Model: modelFile,
|
||||
}
|
||||
}
|
||||
|
||||
func updateConfig(config *Config, input *OpenAIRequest) {
|
||||
if input.Echo {
|
||||
config.Echo = input.Echo
|
||||
}
|
||||
if input.TopK != 0 {
|
||||
config.TopK = input.TopK
|
||||
}
|
||||
if input.TopP != 0 {
|
||||
config.TopP = input.TopP
|
||||
}
|
||||
|
||||
if input.Temperature != 0 {
|
||||
config.Temperature = input.Temperature
|
||||
}
|
||||
|
||||
if input.Maxtokens != 0 {
|
||||
config.Maxtokens = input.Maxtokens
|
||||
}
|
||||
|
||||
switch stop := input.Stop.(type) {
|
||||
case string:
|
||||
if stop != "" {
|
||||
config.StopWords = append(config.StopWords, stop)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range stop {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.StopWords = append(config.StopWords, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
config.RepeatPenalty = input.RepeatPenalty
|
||||
}
|
||||
|
||||
if input.Keep != 0 {
|
||||
config.Keep = input.Keep
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
config.Batch = input.Batch
|
||||
}
|
||||
|
||||
if input.F16 {
|
||||
config.F16 = input.F16
|
||||
}
|
||||
|
||||
if input.IgnoreEOS {
|
||||
config.IgnoreEOS = input.IgnoreEOS
|
||||
}
|
||||
|
||||
if input.Seed != 0 {
|
||||
config.Seed = input.Seed
|
||||
}
|
||||
}
|
||||
|
||||
func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
|
||||
input := new(OpenAIRequest)
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
modelFile := input.Model
|
||||
received, _ := json.Marshal(input)
|
||||
|
||||
log.Debug().Msgf("Request received: %s", string(received))
|
||||
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
|
||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||
|
||||
// If no model was specified, take the first available
|
||||
if modelFile == "" && !bearerExists {
|
||||
models, _ := loader.ListModels()
|
||||
if len(models) > 0 {
|
||||
modelFile = models[0]
|
||||
log.Debug().Msgf("No model specified, using: %s", modelFile)
|
||||
} else {
|
||||
log.Debug().Msgf("No model specified, returning error")
|
||||
return nil, nil, fmt.Errorf("no model specified")
|
||||
}
|
||||
}
|
||||
|
||||
// If a model is found in bearer token takes precedence
|
||||
if bearerExists {
|
||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||
modelFile = bearer
|
||||
}
|
||||
|
||||
// Load a config file if present after the model name
|
||||
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
|
||||
if _, err := os.Stat(modelConfig); err == nil {
|
||||
if err := cm.LoadConfig(modelConfig); err != nil {
|
||||
return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
var config *Config
|
||||
cfg, exists := cm[modelFile]
|
||||
if !exists {
|
||||
config = &Config{
|
||||
OpenAIRequest: defaultRequest(modelFile),
|
||||
}
|
||||
} else {
|
||||
config = &cfg
|
||||
}
|
||||
|
||||
// Set the parameters for the language model prediction
|
||||
updateConfig(config, input)
|
||||
|
||||
if threads != 0 {
|
||||
config.Threads = threads
|
||||
}
|
||||
if ctx != 0 {
|
||||
config.ContextSize = ctx
|
||||
}
|
||||
if f16 {
|
||||
config.F16 = true
|
||||
}
|
||||
|
||||
if debug {
|
||||
config.Debug = true
|
||||
}
|
||||
|
||||
return config, input, nil
|
||||
}
|
||||
|
||||
// https://platform.openai.com/docs/api-reference/completions
|
||||
func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
predInput := []string{}
|
||||
|
||||
switch p := input.Prompt.(type) {
|
||||
case string:
|
||||
predInput = append(predInput, p)
|
||||
case []interface{}:
|
||||
for _, pp := range p {
|
||||
if s, ok := pp.(string); ok {
|
||||
predInput = append(predInput, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
templateFile := config.Model
|
||||
|
||||
if config.TemplateConfig.Completion != "" {
|
||||
templateFile = config.TemplateConfig.Completion
|
||||
}
|
||||
|
||||
var result []Choice
|
||||
for _, i := range predInput {
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
|
||||
Input string
|
||||
}{Input: i})
|
||||
if err == nil {
|
||||
i = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
|
||||
r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
|
||||
*c = append(*c, Choice{Text: s})
|
||||
}, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
result = append(result, r...)
|
||||
}
|
||||
|
||||
resp := &OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
Object: "text_completion",
|
||||
}
|
||||
|
||||
jsonResult, _ := json.Marshal(resp)
|
||||
log.Debug().Msgf("Response: %s", jsonResult)
|
||||
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
}
|
||||
}
|
||||
|
||||
func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
var predInput string
|
||||
|
||||
mess := []string{}
|
||||
for _, i := range input.Messages {
|
||||
r := config.Roles[i.Role]
|
||||
if r == "" {
|
||||
r = i.Role
|
||||
}
|
||||
|
||||
content := fmt.Sprint(r, " ", i.Content)
|
||||
mess = append(mess, content)
|
||||
}
|
||||
|
||||
predInput = strings.Join(mess, "\n")
|
||||
|
||||
if input.Stream {
|
||||
log.Debug().Msgf("Stream request received")
|
||||
c.Context().SetContentType("text/event-stream")
|
||||
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
|
||||
// c.Set("Content-Type", "text/event-stream")
|
||||
c.Set("Cache-Control", "no-cache")
|
||||
c.Set("Connection", "keep-alive")
|
||||
c.Set("Transfer-Encoding", "chunked")
|
||||
}
|
||||
|
||||
templateFile := config.Model
|
||||
|
||||
if config.TemplateConfig.Chat != "" {
|
||||
templateFile = config.TemplateConfig.Chat
|
||||
}
|
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
|
||||
Input string
|
||||
}{Input: predInput})
|
||||
if err == nil {
|
||||
predInput = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||
}
|
||||
|
||||
if input.Stream {
|
||||
responses := make(chan OpenAIResponse)
|
||||
|
||||
go func() {
|
||||
ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
|
||||
resp := OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
|
||||
responses <- resp
|
||||
return true
|
||||
})
|
||||
close(responses)
|
||||
}()
|
||||
|
||||
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
||||
|
||||
for ev := range responses {
|
||||
var buf bytes.Buffer
|
||||
enc := json.NewEncoder(&buf)
|
||||
enc.Encode(ev)
|
||||
|
||||
fmt.Fprintf(w, "event: data\n\n")
|
||||
fmt.Fprintf(w, "data: %v\n\n", buf.String())
|
||||
log.Debug().Msgf("Sending chunk: %s", buf.String())
|
||||
w.Flush()
|
||||
}
|
||||
|
||||
w.WriteString("event: data\n\n")
|
||||
resp := &OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []Choice{{FinishReason: "stop"}},
|
||||
}
|
||||
respData, _ := json.Marshal(resp)
|
||||
|
||||
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
|
||||
w.Flush()
|
||||
}))
|
||||
return nil
|
||||
}
|
||||
|
||||
result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
|
||||
*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
|
||||
}, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp := &OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
Object: "chat.completion",
|
||||
}
|
||||
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
}
|
||||
}
|
||||
|
||||
func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
predInput := input.Input
|
||||
templateFile := config.Model
|
||||
|
||||
if config.TemplateConfig.Edit != "" {
|
||||
templateFile = config.TemplateConfig.Edit
|
||||
}
|
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
|
||||
Input string
|
||||
Instruction string
|
||||
}{Input: predInput, Instruction: input.Instruction})
|
||||
if err == nil {
|
||||
predInput = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||
}
|
||||
|
||||
result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
|
||||
*c = append(*c, Choice{Text: s})
|
||||
}, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp := &OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
Object: "edit",
|
||||
}
|
||||
|
||||
jsonResult, _ := json.Marshal(resp)
|
||||
log.Debug().Msgf("Response: %s", jsonResult)
|
||||
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
}
|
||||
}
|
||||
|
||||
func listModels(loader *model.ModelLoader, cm ConfigMerger) func(ctx *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
models, err := loader.ListModels()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var mm map[string]interface{} = map[string]interface{}{}
|
||||
|
||||
dataModels := []OpenAIModel{}
|
||||
for _, m := range models {
|
||||
mm[m] = nil
|
||||
dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
|
||||
}
|
||||
|
||||
for k := range cm {
|
||||
if _, exists := mm[k]; !exists {
|
||||
dataModels = append(dataModels, OpenAIModel{ID: k, Object: "model"})
|
||||
}
|
||||
}
|
||||
|
||||
return c.JSON(struct {
|
||||
Object string `json:"object"`
|
||||
Data []OpenAIModel `json:"data"`
|
||||
}{
|
||||
Object: "list",
|
||||
Data: dataModels,
|
||||
})
|
||||
}
|
||||
}
|
||||
343
api/prediction.go
Normal file
343
api/prediction.go
Normal file
@@ -0,0 +1,343 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/donomii/go-rwkv.cpp"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
)
|
||||
|
||||
const tokenizerSuffix = ".tokenizer.json"
|
||||
|
||||
// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
var mutexMap sync.Mutex
|
||||
var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
|
||||
|
||||
var loadedModels map[string]interface{} = map[string]interface{}{}
|
||||
var muModels sync.Mutex
|
||||
|
||||
func backendLoader(backendString string, loader *model.ModelLoader, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
|
||||
switch strings.ToLower(backendString) {
|
||||
case "llama":
|
||||
return loader.LoadLLaMAModel(modelFile, llamaOpts...)
|
||||
case "stablelm":
|
||||
return loader.LoadStableLMModel(modelFile)
|
||||
case "gpt2":
|
||||
return loader.LoadGPT2Model(modelFile)
|
||||
case "gptj":
|
||||
return loader.LoadGPTJModel(modelFile)
|
||||
case "rwkv":
|
||||
return loader.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
|
||||
default:
|
||||
return nil, fmt.Errorf("backend unsupported: %s", backendString)
|
||||
}
|
||||
}
|
||||
|
||||
func greedyLoader(loader *model.ModelLoader, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
|
||||
updateModels := func(model interface{}) {
|
||||
muModels.Lock()
|
||||
defer muModels.Unlock()
|
||||
loadedModels[modelFile] = model
|
||||
}
|
||||
|
||||
muModels.Lock()
|
||||
m, exists := loadedModels[modelFile]
|
||||
if exists {
|
||||
muModels.Unlock()
|
||||
return m, nil
|
||||
}
|
||||
muModels.Unlock()
|
||||
|
||||
model, modelerr := loader.LoadLLaMAModel(modelFile, llamaOpts...)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = loader.LoadGPTJModel(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = loader.LoadGPT2Model(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = loader.LoadStableLMModel(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = loader.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
||||
}
|
||||
|
||||
func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
|
||||
supportStreams := false
|
||||
modelFile := c.Model
|
||||
|
||||
// Try to load the model
|
||||
llamaOpts := []llama.ModelOption{}
|
||||
if c.ContextSize != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize))
|
||||
}
|
||||
if c.F16 {
|
||||
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
||||
}
|
||||
|
||||
var inferenceModel interface{}
|
||||
var err error
|
||||
if c.Backend == "" {
|
||||
inferenceModel, err = greedyLoader(loader, modelFile, llamaOpts, uint32(c.Threads))
|
||||
} else {
|
||||
inferenceModel, err = backendLoader(c.Backend, loader, modelFile, llamaOpts, uint32(c.Threads))
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var fn func() (string, error)
|
||||
|
||||
switch model := inferenceModel.(type) {
|
||||
case *rwkv.RwkvState:
|
||||
supportStreams = true
|
||||
|
||||
fn = func() (string, error) {
|
||||
//model.ProcessInput("You are a chatbot that is very good at chatting. blah blah blah")
|
||||
stopWord := "\n"
|
||||
if len(c.StopWords) > 0 {
|
||||
stopWord = c.StopWords[0]
|
||||
}
|
||||
|
||||
response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
|
||||
|
||||
return response, nil
|
||||
}
|
||||
case *gpt2.StableLM:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.GPT2:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gptj.GPTJ:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gptj.PredictOption{
|
||||
gptj.SetTemperature(c.Temperature),
|
||||
gptj.SetTopP(c.TopP),
|
||||
gptj.SetTopK(c.TopK),
|
||||
gptj.SetTokens(c.Maxtokens),
|
||||
gptj.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gptj.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gptj.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *llama.LLama:
|
||||
supportStreams = true
|
||||
fn = func() (string, error) {
|
||||
|
||||
if tokenCallback != nil {
|
||||
model.SetTokenCallback(tokenCallback)
|
||||
}
|
||||
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []llama.PredictOption{
|
||||
llama.SetTemperature(c.Temperature),
|
||||
llama.SetTopP(c.TopP),
|
||||
llama.SetTopK(c.TopK),
|
||||
llama.SetTokens(c.Maxtokens),
|
||||
llama.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Debug {
|
||||
predictOptions = append(predictOptions, llama.Debug)
|
||||
}
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
|
||||
|
||||
if c.RepeatPenalty != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
|
||||
}
|
||||
|
||||
if c.Keep != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.F16 {
|
||||
predictOptions = append(predictOptions, llama.EnableF16KV)
|
||||
}
|
||||
|
||||
if c.IgnoreEOS {
|
||||
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return func() (string, error) {
|
||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
mutexMap.Lock()
|
||||
l, ok := mutexes[modelFile]
|
||||
if !ok {
|
||||
m := &sync.Mutex{}
|
||||
mutexes[modelFile] = m
|
||||
l = m
|
||||
}
|
||||
mutexMap.Unlock()
|
||||
l.Lock()
|
||||
defer l.Unlock()
|
||||
|
||||
res, err := fn()
|
||||
if tokenCallback != nil && !supportStreams {
|
||||
tokenCallback(res)
|
||||
}
|
||||
return res, err
|
||||
}, nil
|
||||
}
|
||||
|
||||
func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) {
|
||||
result := []Choice{}
|
||||
|
||||
n := input.N
|
||||
|
||||
if input.N == 0 {
|
||||
n = 1
|
||||
}
|
||||
|
||||
// get the model function to call for the result
|
||||
predFunc, err := ModelInference(predInput, loader, *config, tokenCallback)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
prediction, err := predFunc()
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
|
||||
prediction = Finetune(*config, predInput, prediction)
|
||||
cb(prediction, &result)
|
||||
|
||||
//result = append(result, Choice{Text: prediction})
|
||||
|
||||
}
|
||||
return result, err
|
||||
}
|
||||
|
||||
var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
|
||||
var mu sync.Mutex = sync.Mutex{}
|
||||
|
||||
func Finetune(config Config, input, prediction string) string {
|
||||
if config.Echo {
|
||||
prediction = input + prediction
|
||||
}
|
||||
|
||||
for _, c := range config.Cutstrings {
|
||||
mu.Lock()
|
||||
reg, ok := cutstrings[c]
|
||||
if !ok {
|
||||
cutstrings[c] = regexp.MustCompile(c)
|
||||
reg = cutstrings[c]
|
||||
}
|
||||
mu.Unlock()
|
||||
prediction = reg.ReplaceAllString(prediction, "")
|
||||
}
|
||||
|
||||
for _, c := range config.TrimSpace {
|
||||
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
|
||||
}
|
||||
return prediction
|
||||
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
apiVersion: v2
|
||||
appVersion: 0.1.0
|
||||
description: A Helm chart for LocalAI
|
||||
name: local-ai
|
||||
type: application
|
||||
version: 1.0.0
|
||||
@@ -1,44 +0,0 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "local-ai.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "local-ai.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "local-ai.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "local-ai.labels" -}}
|
||||
helm.sh/chart: {{ include "local-ai.chart" . }}
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
app.kubernetes.io/instance: "{{ .Release.Name }}"
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -1,39 +0,0 @@
|
||||
{{- if .Values.dataVolume.enabled }}
|
||||
apiVersion: cdi.kubevirt.io/v1beta1
|
||||
kind: DataVolume
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
spec:
|
||||
contentType: archive
|
||||
source:
|
||||
{{ .Values.dataVolume.source.type }}:
|
||||
url: {{ .Values.dataVolume.source.url }}
|
||||
secretRef: {{ template "local-ai.fullname" . }}
|
||||
{{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
|
||||
secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
|
||||
{{- end }}
|
||||
{{- if .Values.dataVolume.source.caCertConfigMap }}
|
||||
caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
|
||||
{{- end }}
|
||||
pvc:
|
||||
accessModes: {{ .Values.dataVolume.pvc.accessModes }}
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.dataVolume.pvc.size }}
|
||||
---
|
||||
{{- if .Values.dataVolume.secret.enabled }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
data:
|
||||
accessKeyId: {{ .Values.dataVolume.secret.username }}
|
||||
secretKey: {{ .Values.dataVolume.secret.password }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -1,39 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
spec:
|
||||
containers:
|
||||
- name: {{ template "local-ai.fullname" . }}
|
||||
image: {{ .Values.deployment.image }}
|
||||
env:
|
||||
- name: THREADS
|
||||
value: {{ .Values.deployment.env.threads | quote }}
|
||||
- name: CONTEXT_SIZE
|
||||
value: {{ .Values.deployment.env.contextSize | quote }}
|
||||
- name: MODELS_PATH
|
||||
value: {{ .Values.deployment.env.modelsPath }}
|
||||
{{- if .Values.deployment.volume.enabled }}
|
||||
volumeMounts:
|
||||
- mountPath: {{ .Values.deployment.env.modelsPath }}
|
||||
name: models
|
||||
volumes:
|
||||
- name: models
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ template "local-ai.fullname" . }}
|
||||
{{- end }}
|
||||
@@ -1,19 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
{{- if .Values.service.annotations }}
|
||||
annotations:
|
||||
{{ toYaml .Values.service.annotations | indent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
type: "{{ .Values.service.type }}"
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
@@ -1,38 +0,0 @@
|
||||
deployment:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
env:
|
||||
threads: 14
|
||||
contextSize: 512
|
||||
modelsPath: "/models"
|
||||
volume:
|
||||
enabled: false
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
annotations: {}
|
||||
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
|
||||
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
||||
|
||||
# Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
|
||||
# (requires https://github.com/kubevirt/containerized-data-importer)
|
||||
dataVolume:
|
||||
enabled: false
|
||||
source:
|
||||
type: "http" # Source type. One of: [ http | s3 ]
|
||||
url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
|
||||
|
||||
# CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
|
||||
# and a base64 encoded pem certificate
|
||||
caCertConfigMap: ""
|
||||
|
||||
# SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
|
||||
# that may include sensitive information. Only applicable for the http source type.
|
||||
secretExtraHeaders: []
|
||||
pvc:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
size: 5Gi
|
||||
secret:
|
||||
enabled: false
|
||||
username: "" # base64 encoded
|
||||
password: "" # base64 encoded
|
||||
@@ -5,11 +5,11 @@ services:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
|
||||
7
entrypoint.sh
Executable file
7
entrypoint.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
cd /build
|
||||
|
||||
make build
|
||||
|
||||
./local-ai "$@"
|
||||
15
examples/README.md
Normal file
15
examples/README.md
Normal file
@@ -0,0 +1,15 @@
|
||||
# Examples
|
||||
|
||||
Here is a list of projects that can easily be integrated with the LocalAI backend.
|
||||
|
||||
## Projects
|
||||
|
||||
- [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/) (by [@mkellerman](https://github.com/mkellerman))
|
||||
- [discord-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/) (by [@mudler](https://github.com/mudler))
|
||||
- [langchain](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/) (by [@dave-gray101](https://github.com/dave-gray101))
|
||||
- [langchain-python](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/) (by [@mudler](https://github.com/mudler))
|
||||
- [slack-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/) (by [@mudler](https://github.com/mudler))
|
||||
|
||||
## Want to contribute?
|
||||
|
||||
Create an issue, and put `Example: <description>` in the title! We will post your examples here.
|
||||
46
examples/chatbot-ui/README.md
Normal file
46
examples/chatbot-ui/README.md
Normal file
@@ -0,0 +1,46 @@
|
||||
# chatbot-ui
|
||||
|
||||
Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
|
||||
|
||||

|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/chatbot-ui
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
## Pointing chatbot-ui to a separately managed LocalAI service
|
||||
|
||||
If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
|
||||
```
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
chatgpt:
|
||||
image: ghcr.io/mckaywrigley/chatbot-ui:main
|
||||
ports:
|
||||
- 3000:3000
|
||||
environment:
|
||||
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
|
||||
- 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
|
||||
```
|
||||
|
||||
Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`.
|
||||
|
||||
## Accessing chatbot-ui
|
||||
|
||||
Open http://localhost:3000 for the Web UI.
|
||||
|
||||
24
examples/chatbot-ui/docker-compose.yaml
Normal file
24
examples/chatbot-ui/docker-compose.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DEBUG=true
|
||||
- MODELS_PATH=/models
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
|
||||
chatgpt:
|
||||
image: ghcr.io/mckaywrigley/chatbot-ui:main
|
||||
ports:
|
||||
- 3000:3000
|
||||
environment:
|
||||
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
|
||||
- 'OPENAI_API_HOST=http://api:8080'
|
||||
1
examples/chatbot-ui/models/completion.tmpl
Normal file
1
examples/chatbot-ui/models/completion.tmpl
Normal file
@@ -0,0 +1 @@
|
||||
{{.Input}}
|
||||
17
examples/chatbot-ui/models/gpt-3.5-turbo.yaml
Normal file
17
examples/chatbot-ui/models/gpt-3.5-turbo.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: ggml-gpt4all-j
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
threads: 14
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
template:
|
||||
completion: completion
|
||||
chat: gpt4all
|
||||
4
examples/chatbot-ui/models/gpt4all.tmpl
Normal file
4
examples/chatbot-ui/models/gpt4all.tmpl
Normal file
@@ -0,0 +1,4 @@
|
||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
||||
### Prompt:
|
||||
{{.Input}}
|
||||
### Response:
|
||||
6
examples/discord-bot/.env.example
Normal file
6
examples/discord-bot/.env.example
Normal file
@@ -0,0 +1,6 @@
|
||||
OPENAI_API_KEY=x
|
||||
DISCORD_BOT_TOKEN=x
|
||||
DISCORD_CLIENT_ID=x
|
||||
OPENAI_API_BASE=http://api:8080
|
||||
ALLOWED_SERVER_IDS=x
|
||||
SERVER_TO_MODERATION_CHANNEL=1:1
|
||||
76
examples/discord-bot/README.md
Normal file
76
examples/discord-bot/README.md
Normal file
@@ -0,0 +1,76 @@
|
||||
# discord-bot
|
||||
|
||||

|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/discord-bot
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# Set the discord bot options (see: https://github.com/go-skynet/gpt-discord-bot#setup)
|
||||
cp -rfv .env.example .env
|
||||
vim .env
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
Note: see setup options here: https://github.com/go-skynet/gpt-discord-bot#setup
|
||||
|
||||
Open up the URL in the console and give permission to the bot in your server. Start a thread with `/chat ..`
|
||||
|
||||
## Kubernetes
|
||||
|
||||
- install the local-ai chart first
|
||||
- change OPENAI_API_BASE to point to the API address and apply the discord-bot manifest:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: discord-bot
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: localai
|
||||
namespace: discord-bot
|
||||
labels:
|
||||
app: localai
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: localai
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: localai
|
||||
name: localai
|
||||
spec:
|
||||
containers:
|
||||
- name: localai-discord
|
||||
env:
|
||||
- name: OPENAI_API_KEY
|
||||
value: "x"
|
||||
- name: DISCORD_BOT_TOKEN
|
||||
value: ""
|
||||
- name: DISCORD_CLIENT_ID
|
||||
value: ""
|
||||
- name: OPENAI_API_BASE
|
||||
value: "http://local-ai.default.svc.cluster.local:8080"
|
||||
- name: ALLOWED_SERVER_IDS
|
||||
value: "xx"
|
||||
- name: SERVER_TO_MODERATION_CHANNEL
|
||||
value: "1:1"
|
||||
image: quay.io/go-skynet/gpt-discord-bot:main
|
||||
```
|
||||
21
examples/discord-bot/docker-compose.yaml
Normal file
21
examples/discord-bot/docker-compose.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DEBUG=true
|
||||
- MODELS_PATH=/models
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
|
||||
bot:
|
||||
image: quay.io/go-skynet/gpt-discord-bot:main
|
||||
env_file:
|
||||
- .env
|
||||
1
examples/discord-bot/models
Symbolic link
1
examples/discord-bot/models
Symbolic link
@@ -0,0 +1 @@
|
||||
../chatbot-ui/models/
|
||||
33
examples/langchain-python/README.md
Normal file
33
examples/langchain-python/README.md
Normal file
@@ -0,0 +1,33 @@
|
||||
## Langchain-python
|
||||
|
||||
Langchain example from [quickstart](https://python.langchain.com/en/latest/getting_started/getting_started.html).
|
||||
|
||||
To interact with langchain, you can just set the `OPENAI_API_BASE` URL and provide a token with a random string.
|
||||
|
||||
See the example below:
|
||||
|
||||
```
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/langchain-python
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
|
||||
|
||||
pip install langchain
|
||||
pip install openai
|
||||
|
||||
export OPENAI_API_BASE=http://localhost:8080
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python test.py
|
||||
# A good company name for a company that makes colorful socks would be "Colorsocks".
|
||||
```
|
||||
16
examples/langchain-python/docker-compose.yaml
Normal file
16
examples/langchain-python/docker-compose.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DEBUG=true
|
||||
- MODELS_PATH=/models
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
1
examples/langchain-python/models
Symbolic link
1
examples/langchain-python/models
Symbolic link
@@ -0,0 +1 @@
|
||||
../chatbot-ui/models
|
||||
6
examples/langchain-python/test.py
Normal file
6
examples/langchain-python/test.py
Normal file
@@ -0,0 +1,6 @@
|
||||
|
||||
from langchain.llms import OpenAI
|
||||
|
||||
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
|
||||
text = "What would be a good company name for a company that makes colorful socks?"
|
||||
print(llm(text))
|
||||
2
examples/langchain/.gitignore
vendored
Normal file
2
examples/langchain/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
models/ggml-koala-13B-4bit-128g
|
||||
models/ggml-gpt4all-j
|
||||
6
examples/langchain/JS.Dockerfile
Normal file
6
examples/langchain/JS.Dockerfile
Normal file
@@ -0,0 +1,6 @@
|
||||
FROM node:latest
|
||||
COPY ./langchainjs-localai-example /app
|
||||
WORKDIR /app
|
||||
RUN npm install
|
||||
RUN npm run build
|
||||
ENTRYPOINT [ "npm", "run", "start" ]
|
||||
31
examples/langchain/README.md
Normal file
31
examples/langchain/README.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# langchain
|
||||
|
||||
Example of using langchain in TypeScript, with the standard OpenAI llm module, and LocalAI.
|
||||
|
||||
Example for python langchain to follow at a later date
|
||||
|
||||
Set up to make it easy to modify the `index.mts` file to look like any langchain example file.
|
||||
|
||||
**Please Note** - This is a tech demo example at this time. ggml-gpt4all-j has pretty terrible results for most langchain applications with the settings used in this example.
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/langchain
|
||||
|
||||
# (optional) - Edit the example code in typescript.
|
||||
# vi ./langchainjs-localai-example/index.ts
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up --build
|
||||
```
|
||||
|
||||
## Copyright
|
||||
|
||||
Some of the example code in index.mts is adapted from the langchainjs project and is Copyright (c) Harrison Chase. Used under the terms of the MIT license, as is the remainder of this code.
|
||||
25
examples/langchain/docker-compose.yaml
Normal file
25
examples/langchain/docker-compose.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DEBUG=true
|
||||
- MODELS_PATH=/models
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
|
||||
langchainjs:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: JS.Dockerfile
|
||||
environment:
|
||||
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
|
||||
- 'OPENAI_API_HOST=http://api:8080/v1'
|
||||
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
|
||||
2
examples/langchain/langchainjs-localai-example/.gitignore
vendored
Normal file
2
examples/langchain/langchainjs-localai-example/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
node_modules/
|
||||
dist/
|
||||
20
examples/langchain/langchainjs-localai-example/.vscode/launch.json
vendored
Normal file
20
examples/langchain/langchainjs-localai-example/.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "Launch Program",
|
||||
// "skipFiles": [
|
||||
// "<node_internals>/**"
|
||||
// ],
|
||||
"program": "${workspaceFolder}\\dist\\index.mjs",
|
||||
"outFiles": [
|
||||
"${workspaceFolder}/**/*.js"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
2313
examples/langchain/langchainjs-localai-example/package-lock.json
generated
Normal file
2313
examples/langchain/langchainjs-localai-example/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
21
examples/langchain/langchainjs-localai-example/package.json
Normal file
21
examples/langchain/langchainjs-localai-example/package.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "langchainjs-localai-example",
|
||||
"version": "0.1.0",
|
||||
"description": "Trivial Example of using langchain + the OpenAI API + LocalAI together",
|
||||
"main": "index.mjs",
|
||||
"scripts": {
|
||||
"build": "tsc --build",
|
||||
"clean": "tsc --build --clean",
|
||||
"start": "node --trace-warnings dist/index.mjs"
|
||||
},
|
||||
"author": "dave@gray101.com",
|
||||
"license": "MIT",
|
||||
"devDependencies": {
|
||||
"@types/node": "^18.16.3",
|
||||
"typescript": "^5.0.4"
|
||||
},
|
||||
"dependencies": {
|
||||
"langchain": "^0.0.67",
|
||||
"typeorm": "^0.3.15"
|
||||
}
|
||||
}
|
||||
79
examples/langchain/langchainjs-localai-example/src/index.mts
Normal file
79
examples/langchain/langchainjs-localai-example/src/index.mts
Normal file
@@ -0,0 +1,79 @@
|
||||
import { OpenAIChat } from "langchain/llms/openai";
|
||||
import { loadQAStuffChain } from "langchain/chains";
|
||||
import { Document } from "langchain/document";
|
||||
import { initializeAgentExecutorWithOptions } from "langchain/agents";
|
||||
import {Calculator} from "langchain/tools/calculator";
|
||||
|
||||
const pathToLocalAi = process.env['OPENAI_API_HOST'] || 'http://api:8080/v1';
|
||||
const fakeApiKey = process.env['OPENAI_API_KEY'] || '-';
|
||||
const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo';
|
||||
|
||||
function getModel(): OpenAIChat {
|
||||
return new OpenAIChat({
|
||||
prefixMessages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are a helpful assistant that answers in pirate language",
|
||||
},
|
||||
],
|
||||
modelName: modelName,
|
||||
maxTokens: 50,
|
||||
openAIApiKey: fakeApiKey,
|
||||
maxRetries: 2
|
||||
}, {
|
||||
basePath: pathToLocalAi,
|
||||
apiKey: fakeApiKey,
|
||||
});
|
||||
}
|
||||
|
||||
// Minimal example.
|
||||
export const run = async () => {
|
||||
const model = getModel();
|
||||
console.log(`about to model.call at ${new Date().toUTCString()}`);
|
||||
const res = await model.call(
|
||||
"What would be a good company name a company that makes colorful socks?"
|
||||
);
|
||||
console.log(`${new Date().toUTCString()}`);
|
||||
console.log({ res });
|
||||
};
|
||||
|
||||
await run();
|
||||
|
||||
// This example uses the `StuffDocumentsChain`
|
||||
export const run2 = async () => {
|
||||
const model = getModel();
|
||||
const chainA = loadQAStuffChain(model);
|
||||
const docs = [
|
||||
new Document({ pageContent: "Harrison went to Harvard." }),
|
||||
new Document({ pageContent: "Ankush went to Princeton." }),
|
||||
];
|
||||
const resA = await chainA.call({
|
||||
input_documents: docs,
|
||||
question: "Where did Harrison go to college?",
|
||||
});
|
||||
console.log({ resA });
|
||||
};
|
||||
|
||||
await run2();
|
||||
|
||||
// Quickly thrown together example of using tools + agents.
|
||||
// This seems like it should work, but it doesn't yet.
|
||||
export const temporarilyBrokenToolTest = async () => {
|
||||
const model = getModel();
|
||||
|
||||
const executor = await initializeAgentExecutorWithOptions([new Calculator(true)], model, {
|
||||
agentType: "zero-shot-react-description",
|
||||
});
|
||||
|
||||
console.log("Loaded agent.");
|
||||
|
||||
const input = `What is the value of (500 *2) + 350 - 13?`;
|
||||
|
||||
console.log(`Executing with input "${input}"...`);
|
||||
|
||||
const result = await executor.call({ input });
|
||||
|
||||
console.log(`Got output ${result.output}`);
|
||||
}
|
||||
|
||||
await temporarilyBrokenToolTest();
|
||||
15
examples/langchain/langchainjs-localai-example/tsconfig.json
Normal file
15
examples/langchain/langchainjs-localai-example/tsconfig.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "es2022",
|
||||
"lib": ["ES2022", "DOM"],
|
||||
"module": "ES2022",
|
||||
"moduleResolution": "node",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"isolatedModules": true,
|
||||
"outDir": "./dist"
|
||||
},
|
||||
"include": ["src", "test"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
1
examples/langchain/models/completion.tmpl
Normal file
1
examples/langchain/models/completion.tmpl
Normal file
@@ -0,0 +1 @@
|
||||
{{.Input}}
|
||||
17
examples/langchain/models/gpt-3.5-turbo.yaml
Normal file
17
examples/langchain/models/gpt-3.5-turbo.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: ggml-gpt4all-j # ggml-koala-13B-4bit-128g
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
threads: 4
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
template:
|
||||
completion: completion
|
||||
chat: completion # gpt4all
|
||||
4
examples/langchain/models/gpt4all.tmpl
Normal file
4
examples/langchain/models/gpt4all.tmpl
Normal file
@@ -0,0 +1,4 @@
|
||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
||||
### Prompt:
|
||||
{{.Input}}
|
||||
### Response:
|
||||
11
examples/slack-bot/.env.example
Normal file
11
examples/slack-bot/.env.example
Normal file
@@ -0,0 +1,11 @@
|
||||
SLACK_APP_TOKEN=xapp-1-...
|
||||
SLACK_BOT_TOKEN=xoxb-...
|
||||
OPENAI_API_KEY=sk-...
|
||||
OPENAI_API_BASE=http://api:8080
|
||||
OPENAI_MODEL=gpt-3.5-turbo
|
||||
OPENAI_TIMEOUT_SECONDS=60
|
||||
#OPENAI_SYSTEM_TEXT="You proofread text. When you receive a message, you will check
|
||||
#for mistakes and make suggestion to improve the language of the given text"
|
||||
USE_SLACK_LANGUAGE=true
|
||||
SLACK_APP_LOG_LEVEL=INFO
|
||||
TRANSLATE_MARKDOWN=true
|
||||
27
examples/slack-bot/README.md
Normal file
27
examples/slack-bot/README.md
Normal file
@@ -0,0 +1,27 @@
|
||||
# Slack bot
|
||||
|
||||
Slackbot using: https://github.com/seratch/ChatGPT-in-Slack
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/slack-bot
|
||||
|
||||
git clone https://github.com/seratch/ChatGPT-in-Slack
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# Set the discord bot options (see: https://github.com/seratch/ChatGPT-in-Slack)
|
||||
cp -rfv .env.example .env
|
||||
vim .env
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
23
examples/slack-bot/docker-compose.yaml
Normal file
23
examples/slack-bot/docker-compose.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DEBUG=true
|
||||
- MODELS_PATH=/models
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
|
||||
bot:
|
||||
build:
|
||||
context: ./ChatGPT-in-Slack
|
||||
dockerfile: Dockerfile
|
||||
env_file:
|
||||
- .env
|
||||
1
examples/slack-bot/models
Symbolic link
1
examples/slack-bot/models
Symbolic link
@@ -0,0 +1 @@
|
||||
../chatbot-ui/models
|
||||
24
go.mod
24
go.mod
@@ -5,27 +5,33 @@ go 1.19
|
||||
require (
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230502121737-8ceb6167e405
|
||||
github.com/gofiber/fiber/v2 v2.44.0
|
||||
github.com/hashicorp/go-multierror v1.1.1
|
||||
github.com/jaypipes/ghw v0.10.0
|
||||
github.com/onsi/ginkgo/v2 v2.9.2
|
||||
github.com/onsi/ginkgo/v2 v2.9.3
|
||||
github.com/onsi/gomega v1.27.6
|
||||
github.com/otiai10/openaigo v1.1.0
|
||||
github.com/rs/zerolog v1.29.1
|
||||
github.com/sashabaranov/go-openai v1.9.0
|
||||
github.com/urfave/cli/v2 v2.25.1
|
||||
github.com/sashabaranov/go-openai v1.9.1
|
||||
github.com/urfave/cli/v2 v2.25.3
|
||||
github.com/valyala/fasthttp v1.47.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/StackExchange/wmi v1.2.1 // indirect
|
||||
github.com/andybalholm/brotli v1.0.5 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d // indirect
|
||||
github.com/ghodss/yaml v1.0.0 // indirect
|
||||
github.com/go-logr/logr v1.2.3 // indirect
|
||||
github.com/go-logr/logr v1.2.4 // indirect
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
||||
github.com/google/go-cmp v0.5.9 // indirect
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.0.0 // indirect
|
||||
github.com/jaypipes/pcidb v1.0.0 // indirect
|
||||
github.com/klauspost/compress v1.16.3 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
@@ -41,14 +47,12 @@ require (
|
||||
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
|
||||
github.com/tinylib/msgp v1.1.8 // indirect
|
||||
github.com/valyala/bytebufferpool v1.0.0 // indirect
|
||||
github.com/valyala/fasthttp v1.45.0 // indirect
|
||||
github.com/valyala/tcplisten v1.0.0 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
||||
golang.org/x/net v0.8.0 // indirect
|
||||
golang.org/x/net v0.9.0 // indirect
|
||||
golang.org/x/sys v0.7.0 // indirect
|
||||
golang.org/x/text v0.8.0 // indirect
|
||||
golang.org/x/tools v0.7.0 // indirect
|
||||
golang.org/x/text v0.9.0 // indirect
|
||||
golang.org/x/tools v0.8.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
howett.net/plist v1.0.0 // indirect
|
||||
)
|
||||
|
||||
65
go.sum
65
go.sum
@@ -1,7 +1,5 @@
|
||||
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
|
||||
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
|
||||
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
|
||||
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
|
||||
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
@@ -14,30 +12,28 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d h1:lSHwlYf1H4WAWYgf7rjEVTGen1qmigUq2Egpu8mnQiY=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d/go.mod h1:H6QBF7/Tz6DAEBDXQged4H1BvsmqY/K5FG9wQRGa01g=
|
||||
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
|
||||
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
||||
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
|
||||
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
|
||||
github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
|
||||
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4 h1:GkGuqnhDFKlCsT6Bo8sdY00A7rFXCzfU1nBOSS4ZnYM=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ6JsAm9Q1A8I8j9YfNy10bmIfwOiyGyU5wQ=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94 h1:rtrrMvlIq+g0/ltXjDdLeNtz0uc4wJ4Qs15GFU4ba4c=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead h1:C+lcH1srw+c0qPDx1WF8zjGiiOqoPxVICt7bI1sj5cM=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04 h1:NPiv7mcIjU71MhEv3v4RRWKSBNXnnCyu6VX4CaaHz2I=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230430075552-377fd245eae2 h1:CYQRCbOfYtC77OxweAyrdxSVwoLIM/EdZ6Ij+xBzta8=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230430075552-377fd245eae2/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230502121737-8ceb6167e405 h1:pbIxJ/eiL1Irdprxk/mquaxjR1XDGCE+7CT9BGJNRaY=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230502121737-8ceb6167e405/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
|
||||
github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
|
||||
github.com/gofiber/fiber/v2 v2.44.0 h1:Z90bEvPcJM5GFJnu1py0E1ojoerkyew3iiNJ78MQCM8=
|
||||
github.com/gofiber/fiber/v2 v2.44.0/go.mod h1:VTMtb/au8g01iqvHyaCzftuM/xmZgKOZCtFzz6CdV9w=
|
||||
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
|
||||
@@ -47,14 +43,16 @@ github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
|
||||
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
|
||||
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
|
||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||
github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
|
||||
github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
|
||||
github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8=
|
||||
github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk=
|
||||
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
|
||||
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
|
||||
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
|
||||
github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
|
||||
github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
|
||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||
@@ -65,8 +63,6 @@ github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxec
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
|
||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
|
||||
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98=
|
||||
github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
|
||||
@@ -75,9 +71,13 @@ github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG
|
||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||
github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
|
||||
github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
|
||||
github.com/onsi/ginkgo/v2 v2.9.3 h1:5X2vl/isiKqkrOYjiaGgp3JQOcLV59g5o5SuTMqCcxU=
|
||||
github.com/onsi/ginkgo/v2 v2.9.3/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
|
||||
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
|
||||
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
|
||||
github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
|
||||
github.com/otiai10/mint v1.4.1 h1:HOVBfKP1oXIc0wWo9hZ8JLdZtyCPWqjvmFDuVZ0yv2Y=
|
||||
github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg=
|
||||
github.com/otiai10/openaigo v1.1.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg=
|
||||
github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
|
||||
github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
|
||||
github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
|
||||
@@ -92,31 +92,25 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
|
||||
github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/sashabaranov/go-openai v1.9.0 h1:NoiO++IISxxJ1pRc0n7uZvMGMake0G+FJ1XPwXtprsA=
|
||||
github.com/sashabaranov/go-openai v1.9.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sashabaranov/go-openai v1.9.1 h1:3N52HkJKo9Zlo/oe1AVv5ZkCOny0ra58/ACvAxkN3MM=
|
||||
github.com/sashabaranov/go-openai v1.9.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
|
||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
|
||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
|
||||
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk=
|
||||
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
|
||||
github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
|
||||
github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
|
||||
github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
|
||||
github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
|
||||
github.com/urfave/cli/v2 v2.25.0/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
|
||||
github.com/urfave/cli/v2 v2.25.1 h1:zw8dSP7ghX0Gmm8vugrs6q9Ku0wzweqPyshy+syu9Gw=
|
||||
github.com/urfave/cli/v2 v2.25.1/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
|
||||
github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY=
|
||||
github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
|
||||
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||
github.com/valyala/fasthttp v1.44.0 h1:R+gLUhldIsfg1HokMuQjdQ5bh9nuXHPIfvkYUu9eR5Q=
|
||||
github.com/valyala/fasthttp v1.44.0/go.mod h1:f6VbjjoI3z1NDOZOv17o6RvtRSWxC77seBFc2uWtgiY=
|
||||
github.com/valyala/fasthttp v1.45.0 h1:zPkkzpIn8tdHZUrVa6PzYd0i5verqiPSkgTd3bSUcpA=
|
||||
github.com/valyala/fasthttp v1.45.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
|
||||
github.com/valyala/fasthttp v1.47.0 h1:y7moDoxYzMooFpT5aHgNgVOQDrS3qlkfiP9mDtGGK9c=
|
||||
github.com/valyala/fasthttp v1.47.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
|
||||
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
|
||||
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
|
||||
@@ -127,7 +121,6 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
@@ -135,12 +128,12 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
|
||||
golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
|
||||
golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
|
||||
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
|
||||
golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
|
||||
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
@@ -151,16 +144,13 @@ golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7w
|
||||
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
|
||||
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
@@ -169,11 +159,12 @@ golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuX
|
||||
golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
|
||||
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
|
||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
@@ -181,6 +172,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
|
||||
golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
|
||||
golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
|
||||
golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
|
||||
golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y=
|
||||
golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
|
||||
7
main.go
7
main.go
@@ -50,6 +50,11 @@ func main() {
|
||||
EnvVars: []string{"MODELS_PATH"},
|
||||
Value: path,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "config-file",
|
||||
DefaultText: "Config file",
|
||||
EnvVars: []string{"CONFIG_FILE"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "address",
|
||||
DefaultText: "Bind address for the API server.",
|
||||
@@ -80,7 +85,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
|
||||
UsageText: `local-ai [options]`,
|
||||
Copyright: "go-skynet authors",
|
||||
Action: func(ctx *cli.Context) error {
|
||||
return api.App(model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
|
||||
return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -12,41 +12,43 @@ import (
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
rwkv "github.com/donomii/go-rwkv.cpp"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
)
|
||||
|
||||
type ModelLoader struct {
|
||||
modelPath string
|
||||
ModelPath string
|
||||
mu sync.Mutex
|
||||
|
||||
models map[string]*llama.LLama
|
||||
gptmodels map[string]*gptj.GPTJ
|
||||
gpt2models map[string]*gpt2.GPT2
|
||||
gptstablelmmodels map[string]*gpt2.StableLM
|
||||
|
||||
promptsTemplates map[string]*template.Template
|
||||
rwkv map[string]*rwkv.RwkvState
|
||||
promptsTemplates map[string]*template.Template
|
||||
}
|
||||
|
||||
func NewModelLoader(modelPath string) *ModelLoader {
|
||||
return &ModelLoader{
|
||||
modelPath: modelPath,
|
||||
ModelPath: modelPath,
|
||||
gpt2models: make(map[string]*gpt2.GPT2),
|
||||
gptmodels: make(map[string]*gptj.GPTJ),
|
||||
gptstablelmmodels: make(map[string]*gpt2.StableLM),
|
||||
models: make(map[string]*llama.LLama),
|
||||
rwkv: make(map[string]*rwkv.RwkvState),
|
||||
promptsTemplates: make(map[string]*template.Template),
|
||||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) ExistsInModelPath(s string) bool {
|
||||
_, err := os.Stat(filepath.Join(ml.modelPath, s))
|
||||
_, err := os.Stat(filepath.Join(ml.ModelPath, s))
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) ListModels() ([]string, error) {
|
||||
files, err := ioutil.ReadDir(ml.modelPath)
|
||||
files, err := ioutil.ReadDir(ml.ModelPath)
|
||||
if err != nil {
|
||||
return []string{}, err
|
||||
}
|
||||
@@ -70,7 +72,19 @@ func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string,
|
||||
|
||||
m, ok := ml.promptsTemplates[modelName]
|
||||
if !ok {
|
||||
return "", fmt.Errorf("no prompt template available")
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
t, exists := ml.promptsTemplates[modelName]
|
||||
if exists {
|
||||
m = t
|
||||
}
|
||||
|
||||
}
|
||||
if m == nil {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
@@ -88,14 +102,14 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
|
||||
}
|
||||
|
||||
// Check if the model path exists
|
||||
// skip any error here - we run anyway if a template is not exist
|
||||
// skip any error here - we run anyway if a template does not exist
|
||||
modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName)
|
||||
|
||||
if !ml.ExistsInModelPath(modelTemplateFile) {
|
||||
return nil
|
||||
}
|
||||
|
||||
dat, err := os.ReadFile(filepath.Join(ml.modelPath, modelTemplateFile))
|
||||
dat, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -125,7 +139,7 @@ func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, erro
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.modelPath, modelName)
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gpt2.NewStableLM(modelFile)
|
||||
@@ -156,15 +170,8 @@ func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// TODO: This needs refactoring, it's really bad to have it in here
|
||||
// Check if we have a GPTStable model loaded instead - if we do we return an error so the API tries with StableLM
|
||||
if _, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPTStableLM one")
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.modelPath, modelName)
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gpt2.New(modelFile)
|
||||
@@ -195,19 +202,8 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// TODO: This needs refactoring, it's really bad to have it in here
|
||||
// Check if we have a GPT2 model loaded instead - if we do we return an error so the API tries with GPT2
|
||||
if _, ok := ml.gpt2models[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPT2: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPT2 one")
|
||||
}
|
||||
if _, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPTStableLM one")
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.modelPath, modelName)
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gptj.New(modelFile)
|
||||
@@ -224,6 +220,36 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (*rwkv.RwkvState, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
log.Debug().Msgf("Loading model name: %s", modelName)
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.rwkv[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
tokenPath := filepath.Join(ml.ModelPath, tokenFile)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model := rwkv.LoadFiles(modelFile, tokenPath, threads)
|
||||
if model == nil {
|
||||
return nil, fmt.Errorf("could not load model")
|
||||
}
|
||||
|
||||
ml.rwkv[modelName] = model
|
||||
return model, nil
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
@@ -240,23 +266,8 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// TODO: This needs refactoring, it's really bad to have it in here
|
||||
// Check if we have a GPTJ model loaded instead - if we do we return an error so the API tries with GPTJ
|
||||
if _, ok := ml.gptmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPTJ: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPTJ one")
|
||||
}
|
||||
if _, ok := ml.gpt2models[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPT2: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPT2 one")
|
||||
}
|
||||
if _, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPTStableLM one")
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.modelPath, modelName)
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := llama.New(modelFile, opts...)
|
||||
|
||||
3
prompt-templates/wizardlm.tmpl
Normal file
3
prompt-templates/wizardlm.tmpl
Normal file
@@ -0,0 +1,3 @@
|
||||
{{.Input}}
|
||||
|
||||
### Response:
|
||||
1
tests/fixtures/completion.tmpl
vendored
Normal file
1
tests/fixtures/completion.tmpl
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{{.Input}}
|
||||
28
tests/fixtures/config.yaml
vendored
Normal file
28
tests/fixtures/config.yaml
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
- name: list1
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
roles:
|
||||
user: "HUMAN:"
|
||||
system: "GPT:"
|
||||
template:
|
||||
completion: completion
|
||||
chat: ggml-gpt4all-j
|
||||
- name: list2
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
roles:
|
||||
user: "HUMAN:"
|
||||
system: "GPT:"
|
||||
template:
|
||||
completion: completion
|
||||
chat: ggml-gpt4all-j
|
||||
4
tests/fixtures/ggml-gpt4all-j.tmpl
vendored
Normal file
4
tests/fixtures/ggml-gpt4all-j.tmpl
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
||||
### Prompt:
|
||||
{{.Input}}
|
||||
### Response:
|
||||
14
tests/fixtures/gpt4.yaml
vendored
Normal file
14
tests/fixtures/gpt4.yaml
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
name: gpt4all
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
roles:
|
||||
user: "HUMAN:"
|
||||
system: "GPT:"
|
||||
template:
|
||||
completion: completion
|
||||
chat: ggml-gpt4all-j
|
||||
14
tests/fixtures/gpt4_2.yaml
vendored
Normal file
14
tests/fixtures/gpt4_2.yaml
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
name: gpt4all-2
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 1024
|
||||
threads: 5
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
roles:
|
||||
user: "HUMAN:"
|
||||
system: "GPT:"
|
||||
template:
|
||||
completion: completion
|
||||
chat: ggml-gpt4all-j
|
||||
Reference in New Issue
Block a user