Compare commits

...

46 Commits

Author SHA1 Message Date
Ettore Di Giacinto
714bfcd45b fix: missing returning error and free callback stream (#187) 2023-05-04 19:49:43 +02:00
renovate[bot]
77ce8b953e fix(deps): update github.com/donomii/go-rwkv.cpp digest to af62fcc (#171)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 18:30:48 +02:00
renovate[bot]
01ada95941 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 2e6ae12 (#172)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 18:30:11 +02:00
ci-robbot [bot]
eabdc5042a ⬆️ Update go-skynet/go-llama.cpp (#184)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-04 18:28:49 +02:00
Dhruv Gera
96267d9437 localai: Include the WebUI project example (#130)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-04 18:27:58 +02:00
Ettore Di Giacinto
9497a24127 fix: hardcode default number of cores to '4' (#186) 2023-05-04 18:14:58 +02:00
Ettore Di Giacinto
fdf75c6d0e rwkv fixes and examples (#185) 2023-05-04 17:32:23 +02:00
mudler
6352308882 ci: minor fixups 2023-05-04 15:08:20 +02:00
mudler
a8172a0f4e ci: fix typo 2023-05-04 15:04:41 +02:00
mudler
ebcd10d66f ci: manually update deps 2023-05-04 15:01:29 +02:00
mudler
885642915f ci: add renovate suffix 2023-05-04 12:26:59 +02:00
mudler
2e424491c0 ci: lookupNameTemplate -> depNameTemplate 2023-05-04 12:23:05 +02:00
mudler
aa6faef8f7 ci: versioning -> versioningTemplate 2023-05-04 12:07:29 +02:00
mudler
b3254baf60 ci: add versioning 2023-05-04 12:05:39 +02:00
mudler
0a43d27f0e ci: update renovate 2023-05-04 12:02:19 +02:00
Ettore Di Giacinto
3fe11fe24d ci: attempt to configure renovate with custom regexes (#178) 2023-05-04 11:55:14 +02:00
renovate[bot]
af18fdc749 fix(deps): update module github.com/sashabaranov/go-openai to v1.9.3 (#174)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 08:44:02 +02:00
renovate[bot]
32b5eddd7d fix(deps): update module github.com/onsi/ginkgo/v2 to v2.9.4 (#173)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 08:41:51 +02:00
Dave
07c3aa1869 Dockerized Langchain / PY example (#175) 2023-05-04 08:41:13 +02:00
renovate[bot]
e59bad89e7 fix(deps): update module github.com/sashabaranov/go-openai to v1.9.2 (#164)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-03 23:05:50 +02:00
Jeremy Price
b971807980 Looks for models in $CWD/models/ dir by default (#169) 2023-05-03 23:03:31 +02:00
Ettore Di Giacinto
c974dad799 Return usage in the API responses (#166) 2023-05-03 17:29:18 +02:00
Ettore Di Giacinto
4eae570ef5 Update docs (#163) 2023-05-03 15:51:54 +02:00
Ettore Di Giacinto
67992a7d99 feat: support slices or strings in the prompt completion endpoint (#162)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-03 13:13:31 +02:00
renovate[bot]
0a4899f366 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 8ceb616 (#150)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-03 11:48:06 +02:00
renovate[bot]
1eb02f6c91 fix(deps): update module github.com/onsi/ginkgo/v2 to v2.9.3 (#161)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-03 11:47:54 +02:00
mudler
575874e4fb readme: minor update 2023-05-03 11:46:29 +02:00
Ettore Di Giacinto
751b7eca62 feat: add rwkv support (#158)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-03 11:45:22 +02:00
Ettore Di Giacinto
1ae7150810 feat: allow to specify default backend for model (#156)
Signed-off-by: mudler <mudler@c3os.io>
2023-05-03 00:31:28 +02:00
Ettore Di Giacinto
70caf9bf8c feat: support stopwords both string and arrays (#154) 2023-05-02 23:30:00 +02:00
Dave
0b226ac027 Stop parameter of OpenAIRequest changed to String Array (#153) 2023-05-02 22:02:45 +02:00
Ettore Di Giacinto
220d6fd59b feat: add stream events (#152) 2023-05-02 20:03:35 +02:00
antongisli
0a00a4b58e adding mac build and example (#151)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-02 19:24:45 +02:00
Ettore Di Giacinto
156e15a4fa Bump llama.cpp, downgrade gpt4all-j (#149) 2023-05-02 16:07:18 +02:00
renovate[bot]
271d3f6673 fix(deps): update module github.com/valyala/fasthttp to v1.47.0 (#143)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-01 23:36:58 +02:00
Ettore Di Giacinto
fec4ab93c5 docs: Add langchain to the example index (#147)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-01 23:21:07 +02:00
renovate[bot]
38a7a7a54d fix(deps): update github.com/go-skynet/go-gpt4all-j.cpp digest to 77bf8c1 (#141)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-01 23:18:41 +02:00
Ettore Di Giacinto
0db0704e2c docs: Add slack-bot example (#145)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-01 23:18:24 +02:00
Dave
88f472e5d2 Add LangchainJS Examples (#146) 2023-05-01 23:18:14 +02:00
Ettore Di Giacinto
92452d46da feat: add new gpt4all-j binding (#142) 2023-05-01 20:00:15 +02:00
Ettore Di Giacinto
ac70252d70 drop: remove helm charts, now in separate repo (#134)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-01 18:07:41 +02:00
renovate[bot]
f6451d2518 fix(deps): update module github.com/urfave/cli/v2 to v2.25.3 (#140)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-01 18:07:29 +02:00
Ettore Di Giacinto
2473f9d19b docs: add discord-bot preview (#137) 2023-05-01 11:03:34 +02:00
renovate[bot]
bc583385a9 fix(deps): update module github.com/urfave/cli/v2 to v2.25.2 (#136)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-01 07:53:48 +02:00
renovate[bot]
8286bfbab7 fix(deps): update module github.com/sashabaranov/go-openai to v1.9.1 (#135)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-01 07:52:20 +02:00
Ettore Di Giacinto
d129fabe3b docs: enhancements (#133) 2023-04-30 23:27:02 +02:00
60 changed files with 3791 additions and 560 deletions

9
.github/bump_deps.sh vendored Executable file
View File

@@ -0,0 +1,9 @@
#!/bin/bash
set -xe
REPO=$1
BRANCH=$2
VAR=$3
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"

42
.github/workflows/bump_deps.yaml vendored Normal file
View File

@@ -0,0 +1,42 @@
name: Bump dependencies
on:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
bump:
strategy:
fail-fast: false
matrix:
include:
- repository: "go-skynet/go-gpt4all-j.cpp"
variable: "GOGPT4ALLJ_VERSION"
branch: "master"
- repository: "go-skynet/go-llama.cpp"
variable: "GOLLAMA_VERSION"
branch: "master"
- repository: "go-skynet/go-gpt2.cpp"
variable: "GOGPT2_VERSION"
branch: "master"
- repository: "donomii/go-rwkv.cpp"
variable: "RWKV_VERSION"
branch: "main"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Bump dependencies 🔧
run: |
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
title: ':arrow_up: Update ${{ matrix.repository }}'
branch: "update/${{ matrix.variable }}"
body: Bump of ${{ matrix.repository }} version
signoff: true

View File

@@ -54,8 +54,8 @@ jobs:
uses: docker/login-action@v2
with:
registry: quay.io
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
- name: Build
if: github.event_name != 'pull_request'
uses: docker/build-push-action@v4

View File

@@ -2,12 +2,12 @@ GOCMD=go
GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
# renovate: datasource=github-tags depName=go-skynet/go-llama.cpp
GOLLAMA_VERSION?=llama.cpp-7f15c5c
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt4all-j.cpp currentValueTemplate=master depNameTemplate=go-gpt4all-j.cpp
GOLLAMA_VERSION?=2e6ae1269e035886fc64e268a6dda9d8c4ba8c75
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt2.cpp currentValueTemplate=master depNameTemplate=go-gpt2.cpp
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58
GREEN := $(shell tput -Txterm setaf 2)
YELLOW := $(shell tput -Txterm setaf 3)
@@ -15,8 +15,8 @@ WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv
# Use this if you want to set the default behavior
ifndef BUILD_TYPE
@@ -33,20 +33,10 @@ endif
all: help
## Build:
build: prepare ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
generic-build: ## Build the project using generic
BUILD_TYPE="generic" $(MAKE) build
## GPT4ALL-J
go-gpt4all-j:
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION) && git submodule update --init --recursive --depth 1
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@@ -57,13 +47,21 @@ go-gpt4all-j:
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
## RWKV
go-rwkv:
git clone --recurse-submodules $(RWKV_REPO) go-rwkv
cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
go-rwkv/librwkv.a: go-rwkv
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. && cp ggml/src/libggml.a ..
go-gpt4all-j/libgptj.a: go-gpt4all-j
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
# CEREBRAS GPT
go-gpt2:
## CEREBRAS GPT
go-gpt2:
git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION)
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@@ -74,10 +72,10 @@ go-gpt2:
go-gpt2/libgpt2.a: go-gpt2
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
go-llama:
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
go-llama/libbinding.a: go-llama
$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
@@ -86,26 +84,40 @@ replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
prepare-sources: go-llama go-gpt2 go-gpt4all-j
prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv
$(GOCMD) mod download
rebuild:
## GENERIC
rebuild: ## Rebuilds the project
$(MAKE) -C go-llama clean
$(MAKE) -C go-gpt4all-j clean
$(MAKE) -C go-gpt2 clean
$(MAKE) -C go-rwkv clean
$(MAKE) build
prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a replace
prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a go-rwkv/librwkv.a replace ## Prepares for building
clean: ## Remove build related file
rm -fr ./go-llama
rm -rf ./go-gpt4all-j
rm -rf ./go-gpt2
rm -rf ./go-rwkv
rm -rf $(BINARY_NAME)
## Run:
run: prepare
## Build:
build: prepare ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
generic-build: ## Build the project using generic
BUILD_TYPE="generic" $(MAKE) build
## Run
run: prepare ## run local-ai
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
test-models/testmodel:

505
README.md
View File

@@ -9,17 +9,23 @@
[![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy)
**LocalAI** is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is licensed under Apache 2.0.
**LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is licensed under Apache 2.0.
- OpenAI compatible API
- Supports multiple-models
- Once loaded the first time, it keep models loaded in memory for faster inference
- Support for prompt templates
- Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).
- Doesn't shell-out, but uses C bindings for a faster inference and better performance.
LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
### News
- 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
- 01-05-2023: Support for SSE stream of tokens in `llama.cpp` backends ( https://github.com/go-skynet/LocalAI/pull/152 )
### Socials and community chatter
- Follow [@LocalAI_API](https://twitter.com/LocalAI_API) on twitter.
- [Reddit post](https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/) about LocalAI.
@@ -39,11 +45,45 @@ Tested with:
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin)
- Koala
- [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
- WizardLM
- [RWKV](https://github.com/BlinkDL/RWKV-LM) models with [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
It should also be compatible with StableLM and GPTNeoX ggml models (untested)
### Vicuna, Alpaca, LLaMa...
[llama.cpp](https://github.com/ggerganov/llama.cpp) based models are compatible
### GPT4ALL
Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
### GPT4ALL-J
No changes required to the model.
### RWKV
<details>
A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
Note: rwkv models have an associated tokenizer along that needs to be provided with it:
```
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
36464543 -rw-r--r-- 1 mudler mudler 2.4M May 3 10:51 rwkv_small.tokenizer.json
```
</details>
### Others
It should also be compatible with StableLM and GPTNeoX ggml models (untested).
### Hardware requirements
Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
## Usage
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
@@ -120,184 +160,59 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
To build locally, run `make build` (see below).
## Other examples
![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
### Other examples
To see other examples on how to integrate with other projects for instance chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).
## Prompt templates
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
<details>
You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibling file, `foo.bin.tmpl` which will be used as a default prompt and can be used with alpaca:
```
The below instruction describes a task. Write a response that appropriately completes the request.
### Instruction:
{{.Input}}
### Response:
```
See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for some of the most popular models.
</details>
## Installation
Currently LocalAI comes as container images and can be used with docker or a containre engine of choice.
### Run LocalAI in Kubernetes
LocalAI can be installed inside Kubernetes with helm.
<details>
The local-ai Helm chart supports two options for the LocalAI server's models directory:
1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
```bash
helm install local-ai charts/local-ai -n local-ai --create-namespace
```
Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
```bash
helm upgrade local-ai -n local-ai charts/local-ai
```
This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.
</details>
## API
`LocalAI` provides an API for running text generation as a service, that follows the OpenAI reference and can be used as a drop-in. The models once loaded the first time will be kept in memory.
<details>
Example of starting the API with `docker`:
```bash
docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:latest --models-path /path/to/models --context-size 700 --threads 4
```
You should see:
```
┌───────────────────────────────────────────────────┐
│ Fiber v2.42.0 │
│ http://127.0.0.1:8080 │
│ (bound on host 0.0.0.0 and port 8080) │
│ │
│ Handlers ............. 1 Processes ........... 1 │
│ Prefork ....... Disabled PID ................. 1 │
└───────────────────────────────────────────────────┘
```
You can control the API server options with command line arguments:
```
local-api --models-path <model_path> [--address <address>] [--threads <num_threads>]
```
The API takes takes the following parameters:
| Parameter | Environment Variable | Default Value | Description |
| ------------ | -------------------- | ------------- | -------------------------------------- |
| models-path | MODELS_PATH | | The path where you have models (ending with `.bin`). |
| threads | THREADS | Number of Physical cores | The number of threads to use for text generation. |
| address | ADDRESS | :8080 | The address and port to listen on. |
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
| debug | DEBUG | false | Enable debug mode. |
| config-file | CONFIG_FILE | empty | Path to a LocalAI config file. |
Once the server is running, you can start making requests to it using HTTP, using the OpenAI API.
</details>
### Supported OpenAI API endpoints
You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create).
Following the list of endpoints/parameters supported.
Note:
- You can also specify the model as part of the OpenAI token.
- If only one model is available, the API will use it for all the requests.
#### Chat completions
<details>
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
```
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`
</details>
#### Completions
<details>
To generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as per the request body:
```
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"prompt": "A long time ago in a galaxy far, far away",
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`
</details>
#### List models
<details>
You can list all the models available with:
```
curl http://localhost:8080/v1/models
```
</details>
## Advanced configuration
### Advanced configuration
LocalAI can be configured to serve user-defined models with a set of default parameters and templates.
<details>
You can create multiple `yaml` files in the models path or either specify a single YAML configuration file.
For instance, a configuration file (`gpt-3.5-turbo.yaml`) can be declaring the "gpt-3.5-turbo" model but backed by the "testmodel" model file:
You can create multiple `yaml` files in the models path or either specify a single YAML configuration file.
Consider the following `models` folder in the `example/chatbot-ui`:
```
base ls -liah examples/chatbot-ui/models
36487587 drwxr-xr-x 2 mudler mudler 4.0K May 3 12:27 .
36487586 drwxr-xr-x 3 mudler mudler 4.0K May 3 10:42 ..
36465214 -rw-r--r-- 1 mudler mudler 10 Apr 27 07:46 completion.tmpl
36464855 -rw-r--r-- 1 mudler mudler 3.6G Apr 27 00:08 ggml-gpt4all-j
36464537 -rw-r--r-- 1 mudler mudler 245 May 3 10:42 gpt-3.5-turbo.yaml
36467388 -rw-r--r-- 1 mudler mudler 180 Apr 27 07:46 gpt4all.tmpl
```
In the `gpt-3.5-turbo.yaml` file it is defined the `gpt-3.5-turbo` model which is an alias to use `gpt4all-j` with pre-defined options.
For instance, consider the following that declares `gpt-3.5-turbo` backed by the `ggml-gpt4all-j` model:
```yaml
name: gpt-3.5-turbo
# Default model parameters
parameters:
model: testmodel
# Relative to the models path
model: ggml-gpt4all-j
# temperature
temperature: 0.3
# all the OpenAI request options here..
# Default context size
context_size: 512
threads: 10
# Define a backend (optional). By default it will try to guess the backend the first time the model is interacted with.
backend: gptj # available: llama, stablelm, gpt2, gptj rwkv
# stopwords (if supported by the backend)
stopwords:
- "HUMAN:"
- "### Response:"
# define chat roles
roles:
user: "HUMAN:"
system: "GPT:"
template:
# template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
completion: completion
chat: ggml-gpt4all-j
```
@@ -332,20 +247,101 @@ Specifying a `config-file` via CLI allows to declare models in a single file as
system: "GPT:"
template:
completion: completion
chat: ggml-gpt4all-j
chat: ggml-gpt4all-j
```
See also [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) as an example on how to use config files.
</details>
## Windows compatibility
### Prompt templates
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
## Build locally
<details>
You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibling file, `foo.bin.tmpl` which will be used as a default prompt and can be used with alpaca:
Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
```
The below instruction describes a task. Write a response that appropriately completes the request.
### Instruction:
{{.Input}}
### Response:
```
See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for some of the most popular models.
For the edit endpoint, an example template for alpaca-based models can be:
```yaml
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{{.Instruction}}
### Input:
{{.Input}}
### Response:
```
</details>
### CLI
You can control LocalAI with command line arguments, to specify a binding address, or the number of threads.
<details>
Usage:
```
local-ai --models-path <model_path> [--address <address>] [--threads <num_threads>]
```
| Parameter | Environment Variable | Default Value | Description |
| ------------ | -------------------- | ------------- | -------------------------------------- |
| models-path | MODELS_PATH | | The path where you have models (ending with `.bin`). |
| threads | THREADS | Number of Physical cores | The number of threads to use for text generation. |
| address | ADDRESS | :8080 | The address and port to listen on. |
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
| debug | DEBUG | false | Enable debug mode. |
| config-file | CONFIG_FILE | empty | Path to a LocalAI config file. |
</details>
## Setup
Currently LocalAI comes as a container image and can be used with docker or a container engine of choice. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
### Docker
<details>
Example of starting the API with `docker`:
```bash
docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:latest --models-path /path/to/models --context-size 700 --threads 4
```
You should see:
```
┌───────────────────────────────────────────────────┐
│ Fiber v2.42.0 │
│ http://127.0.0.1:8080 │
│ (bound on host 0.0.0.0 and port 8080) │
│ │
│ Handlers ............. 1 Processes ........... 1 │
│ Prefork ....... Disabled PID ................. 1 │
└───────────────────────────────────────────────────┘
```
</details>
### Build locally
<details>
In order to build the `LocalAI` container image locally you can use `docker`:
@@ -355,12 +351,182 @@ docker build -t LocalAI .
docker run LocalAI
```
Or build the binary with `make`:
Or you can build the binary with `make`:
```
make build
```
</details>
### Build on mac
Building on Mac (M1 or M2) works, but you may need to install some prerequisites using `brew`.
<details>
The below has been tested by one mac user and found to work. Note that this doesn't use docker to run the server:
```
# install build dependencies
brew install cmake
brew install go
# clone the repo
git clone https://github.com/go-skynet/LocalAI.git
cd LocalAI
# build the binary
make build
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# Use a template from the examples
cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
# Run LocalAI
./local-ai --models-path ./models/ --debug
# Now API is accessible at localhost:8080
curl http://localhost:8080/v1/models
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "ggml-gpt4all-j",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.9
}'
```
</details>
### Windows compatibility
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
### Run LocalAI in Kubernetes
LocalAI can be installed inside Kubernetes with helm.
<details>
1. Add the helm repo
```bash
helm repo add go-skynet https://go-skynet.github.io/helm-charts/
```
1. Create a values files with your settings:
```bash
cat <<EOF > values.yaml
deployment:
image: quay.io/go-skynet/local-ai:latest
env:
threads: 4
contextSize: 1024
modelsPath: "/models"
# Optionally create a PVC, mount the PV to the LocalAI Deployment,
# and download a model to prepopulate the models directory
modelsVolume:
enabled: true
url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
pvc:
size: 6Gi
accessModes:
- ReadWriteOnce
auth:
# Optional value for HTTP basic access authentication header
basic: "" # 'username:password' base64 encoded
service:
type: ClusterIP
annotations: {}
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
EOF
```
3. Install the helm chart:
```bash
helm repo update
helm install local-ai go-skynet/local-ai -f values.yaml
```
Check out also the [helm chart repository on GitHub](https://github.com/go-skynet/helm-charts).
</details>
## Supported OpenAI API endpoints
You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create).
Following the list of endpoints/parameters supported.
Note:
- You can also specify the model as part of the OpenAI token.
- If only one model is available, the API will use it for all the requests.
### Chat completions
<details>
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
```
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`
</details>
### Edit completions
<details>
To generate an edit completion you can send a POST request to the `/v1/edits` endpoint with the instruction as the request body:
```
curl http://localhost:8080/v1/edits -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"instruction": "rephrase",
"input": "Black cat jumped out of the window",
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`.
</details>
### Completions
<details>
To generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as per the request body:
```
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"prompt": "A long time ago in a galaxy far, far away",
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`
</details>
### List models
<details>
You can list all the models available with:
```
curl http://localhost:8080/v1/models
```
</details>
## Frequently asked questions
Here are answers to some of the most common questions.
@@ -404,7 +570,7 @@ Not currently, as ggml doesn't support GPUs yet: https://github.com/ggerganov/ll
### Where is the webUI?
<details>
We are working on to have a good out of the box experience - however as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
There is the availability of localai-webui and chatbot-ui in the examples section and can be setup as per the instructions. However as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
</details>
@@ -448,6 +614,13 @@ LocalAI is a community-driven project. It was initially created by [mudler](http
MIT
## Golang bindings used
- [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
- [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp)
- [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp)
- [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp)
## Acknowledgements
- [llama.cpp](https://github.com/ggerganov/llama.cpp)

View File

@@ -79,7 +79,7 @@ var _ = Describe("API test", func() {
It("returns errors", func() {
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: llama: model does not exist"))
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 5 errors occurred:"))
})
})

View File

@@ -21,6 +21,7 @@ type Config struct {
Threads int `yaml:"threads"`
Debug bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"`
Backend string `yaml:"backend"`
TemplateConfig TemplateConfig `yaml:"template"`
}

View File

@@ -2,6 +2,7 @@ package api
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"os"
@@ -26,12 +27,19 @@ type ErrorResponse struct {
Error *APIError `json:"error,omitempty"`
}
type OpenAIUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
type OpenAIResponse struct {
Created int `json:"created,omitempty"`
Object string `json:"object,omitempty"`
ID string `json:"id,omitempty"`
Model string `json:"model,omitempty"`
Choices []Choice `json:"choices,omitempty"`
Created int `json:"created,omitempty"`
Object string `json:"object,omitempty"`
ID string `json:"id,omitempty"`
Model string `json:"model,omitempty"`
Choices []Choice `json:"choices,omitempty"`
Usage OpenAIUsage `json:"usage"`
}
type Choice struct {
@@ -56,13 +64,13 @@ type OpenAIRequest struct {
Model string `json:"model" yaml:"model"`
// Prompt is read only by completion API calls
Prompt string `json:"prompt" yaml:"prompt"`
Prompt interface{} `json:"prompt" yaml:"prompt"`
// Edit endpoint
Instruction string `json:"instruction" yaml:"instruction"`
Input string `json:"input" yaml:"input"`
Stop string `json:"stop" yaml:"stop"`
Stop interface{} `json:"stop" yaml:"stop"`
// Messages is read only by chat/completion API calls
Messages []Message `json:"messages" yaml:"messages"`
@@ -116,8 +124,17 @@ func updateConfig(config *Config, input *OpenAIRequest) {
config.Maxtokens = input.Maxtokens
}
if input.Stop != "" {
config.StopWords = append(config.StopWords, input.Stop)
switch stop := input.Stop.(type) {
case string:
if stop != "" {
config.StopWords = append(config.StopWords, stop)
}
case []interface{}:
for _, pp := range stop {
if s, ok := pp.(string); ok {
config.StopWords = append(config.StopWords, s)
}
}
}
if input.RepeatPenalty != 0 {
@@ -227,27 +244,44 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
log.Debug().Msgf("Parameter Config: %+v", config)
predInput := input.Prompt
predInput := []string{}
switch p := input.Prompt.(type) {
case string:
predInput = append(predInput, p)
case []interface{}:
for _, pp := range p {
if s, ok := pp.(string); ok {
predInput = append(predInput, s)
}
}
}
templateFile := config.Model
if config.TemplateConfig.Completion != "" {
templateFile = config.TemplateConfig.Completion
}
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
Input string
}{Input: predInput})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
}
var result []Choice
for _, i := range predInput {
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
Input string
}{Input: i})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Text: s})
})
if err != nil {
return err
r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Text: s})
}, nil)
if err != nil {
return err
}
result = append(result, r...)
}
resp := &OpenAIResponse{
@@ -265,6 +299,21 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
}
func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
resp := OpenAIResponse{
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
Object: "chat.completion.chunk",
}
log.Debug().Msgf("Sending goroutine: %s", s)
responses <- resp
return true
})
close(responses)
}
return func(c *fiber.Ctx) error {
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
if err != nil {
@@ -290,8 +339,9 @@ func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
if input.Stream {
log.Debug().Msgf("Stream request received")
c.Context().SetContentType("text/event-stream")
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
c.Set("Content-Type", "text/event-stream; charset=utf-8")
// c.Set("Content-Type", "text/event-stream")
c.Set("Cache-Control", "no-cache")
c.Set("Connection", "keep-alive")
c.Set("Transfer-Encoding", "chunked")
@@ -312,13 +362,40 @@ func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
log.Debug().Msgf("Template found, input modified to: %s", predInput)
}
if input.Stream {
responses := make(chan OpenAIResponse)
go process(predInput, input, config, loader, responses)
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
for ev := range responses {
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
enc.Encode(ev)
fmt.Fprintf(w, "event: data\n\n")
fmt.Fprintf(w, "data: %v\n\n", buf.String())
log.Debug().Msgf("Sending chunk: %s", buf.String())
w.Flush()
}
w.WriteString("event: data\n\n")
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{FinishReason: "stop"}},
}
respData, _ := json.Marshal(resp)
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
w.Flush()
}))
return nil
}
result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
if input.Stream {
*c = append(*c, Choice{Delta: &Message{Role: "assistant", Content: s}})
} else {
*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
}
})
*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
}, nil)
if err != nil {
return err
}
@@ -328,36 +405,8 @@ func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
Choices: result,
Object: "chat.completion",
}
if input.Stream {
resp.Object = "chat.completion.chunk"
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
log.Debug().Msgf("Handling stream request")
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
fmt.Fprintf(w, "event: data\n")
w.Flush()
fmt.Fprintf(w, "data: %s\n\n", jsonResult)
w.Flush()
fmt.Fprintf(w, "event: data\n")
w.Flush()
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{FinishReason: "stop"}},
}
respData, _ := json.Marshal(resp)
fmt.Fprintf(w, "data: %s\n\n", respData)
w.Flush()
// fmt.Fprintf(w, "data: [DONE]\n\n")
// w.Flush()
}))
return nil
}
respData, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", respData)
// Return the prediction in the response body
return c.JSON(resp)
@@ -392,7 +441,7 @@ func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Text: s})
})
}, nil)
if err != nil {
return err
}

View File

@@ -6,26 +6,103 @@ import (
"strings"
"sync"
"github.com/donomii/go-rwkv.cpp"
model "github.com/go-skynet/LocalAI/pkg/model"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/hashicorp/go-multierror"
)
const tokenizerSuffix = ".tokenizer.json"
// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
var mutexMap sync.Mutex
var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (string, error), error) {
var model *llama.LLama
var gptModel *gptj.GPTJ
var gpt2Model *gpt2.GPT2
var stableLMModel *gpt2.StableLM
var loadedModels map[string]interface{} = map[string]interface{}{}
var muModels sync.Mutex
func backendLoader(backendString string, loader *model.ModelLoader, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
switch strings.ToLower(backendString) {
case "llama":
return loader.LoadLLaMAModel(modelFile, llamaOpts...)
case "stablelm":
return loader.LoadStableLMModel(modelFile)
case "gpt2":
return loader.LoadGPT2Model(modelFile)
case "gptj":
return loader.LoadGPTJModel(modelFile)
case "rwkv":
return loader.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
default:
return nil, fmt.Errorf("backend unsupported: %s", backendString)
}
}
func greedyLoader(loader *model.ModelLoader, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
updateModels := func(model interface{}) {
muModels.Lock()
defer muModels.Unlock()
loadedModels[modelFile] = model
}
muModels.Lock()
m, exists := loadedModels[modelFile]
if exists {
muModels.Unlock()
return m, nil
}
muModels.Unlock()
model, modelerr := loader.LoadLLaMAModel(modelFile, llamaOpts...)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
model, modelerr = loader.LoadGPTJModel(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
model, modelerr = loader.LoadGPT2Model(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
model, modelerr = loader.LoadStableLMModel(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
model, modelerr = loader.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
}
func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
supportStreams := false
modelFile := c.Model
// Try to load the model
var llamaerr, gpt2err, gptjerr, stableerr error
llamaOpts := []llama.ModelOption{}
if c.ContextSize != 0 {
llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize))
@@ -34,25 +111,38 @@ func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (stri
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
}
// TODO: this is ugly, better identifying the model somehow! however, it is a good stab for a first implementation..
model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...)
if llamaerr != nil {
gptModel, gptjerr = loader.LoadGPTJModel(modelFile)
if gptjerr != nil {
gpt2Model, gpt2err = loader.LoadGPT2Model(modelFile)
if gpt2err != nil {
stableLMModel, stableerr = loader.LoadStableLMModel(modelFile)
if stableerr != nil {
return nil, fmt.Errorf("llama: %s gpt: %s gpt2: %s stableLM: %s", llamaerr.Error(), gptjerr.Error(), gpt2err.Error(), stableerr.Error()) // llama failed first, so we want to catch both errors
}
}
}
var inferenceModel interface{}
var err error
if c.Backend == "" {
inferenceModel, err = greedyLoader(loader, modelFile, llamaOpts, uint32(c.Threads))
} else {
inferenceModel, err = backendLoader(c.Backend, loader, modelFile, llamaOpts, uint32(c.Threads))
}
if err != nil {
return nil, err
}
var fn func() (string, error)
switch {
case stableLMModel != nil:
switch model := inferenceModel.(type) {
case *rwkv.RwkvState:
supportStreams = true
fn = func() (string, error) {
stopWord := "\n"
if len(c.StopWords) > 0 {
stopWord = c.StopWords[0]
}
if err := model.ProcessInput(s); err != nil {
return "", err
}
response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
return response, nil
}
case *gpt2.StableLM:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
@@ -71,12 +161,12 @@ func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (stri
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return stableLMModel.Predict(
return model.Predict(
s,
predictOptions...,
)
}
case gpt2Model != nil:
case *gpt2.GPT2:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
@@ -95,12 +185,12 @@ func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (stri
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return gpt2Model.Predict(
return model.Predict(
s,
predictOptions...,
)
}
case gptModel != nil:
case *gptj.GPTJ:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gptj.PredictOption{
@@ -119,13 +209,19 @@ func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (stri
predictOptions = append(predictOptions, gptj.SetSeed(c.Seed))
}
return gptModel.Predict(
return model.Predict(
s,
predictOptions...,
)
}
case model != nil:
case *llama.LLama:
supportStreams = true
fn = func() (string, error) {
if tokenCallback != nil {
model.SetTokenCallback(tokenCallback)
}
// Generate the prediction using the language model
predictOptions := []llama.PredictOption{
llama.SetTemperature(c.Temperature),
@@ -165,10 +261,15 @@ func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (stri
predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
}
return model.Predict(
str, er := model.Predict(
s,
predictOptions...,
)
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
// after a stream event has occurred
model.SetTokenCallback(nil)
return str, er
}
}
@@ -185,11 +286,15 @@ func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (stri
l.Lock()
defer l.Unlock()
return fn()
res, err := fn()
if tokenCallback != nil && !supportStreams {
tokenCallback(res)
}
return res, err
}, nil
}
func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice)) ([]Choice, error) {
func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) {
result := []Choice{}
n := input.N
@@ -199,7 +304,7 @@ func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, load
}
// get the model function to call for the result
predFunc, err := ModelInference(predInput, loader, *config)
predFunc, err := ModelInference(predInput, loader, *config, tokenCallback)
if err != nil {
return result, err
}

View File

@@ -1,6 +0,0 @@
apiVersion: v2
appVersion: 0.1.0
description: A Helm chart for LocalAI
name: local-ai
type: application
version: 1.0.0

View File

@@ -1,44 +0,0 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "local-ai.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "local-ai.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "local-ai.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "local-ai.labels" -}}
helm.sh/chart: {{ include "local-ai.chart" . }}
app.kubernetes.io/name: {{ include "local-ai.name" . }}
app.kubernetes.io/instance: "{{ .Release.Name }}"
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
{{- end }}

View File

@@ -1,39 +0,0 @@
{{- if .Values.dataVolume.enabled }}
apiVersion: cdi.kubevirt.io/v1beta1
kind: DataVolume
metadata:
name: {{ template "local-ai.fullname" . }}
namespace: {{ .Release.Namespace | quote }}
labels:
{{- include "local-ai.labels" . | nindent 4 }}
spec:
contentType: archive
source:
{{ .Values.dataVolume.source.type }}:
url: {{ .Values.dataVolume.source.url }}
secretRef: {{ template "local-ai.fullname" . }}
{{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
{{- end }}
{{- if .Values.dataVolume.source.caCertConfigMap }}
caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
{{- end }}
pvc:
accessModes: {{ .Values.dataVolume.pvc.accessModes }}
resources:
requests:
storage: {{ .Values.dataVolume.pvc.size }}
---
{{- if .Values.dataVolume.secret.enabled }}
apiVersion: v1
kind: Secret
metadata:
name: {{ template "local-ai.fullname" . }}
namespace: {{ .Release.Namespace | quote }}
labels:
{{- include "local-ai.labels" . | nindent 4 }}
data:
accessKeyId: {{ .Values.dataVolume.secret.username }}
secretKey: {{ .Values.dataVolume.secret.password }}
{{- end }}
{{- end }}

View File

@@ -1,39 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ template "local-ai.fullname" . }}
namespace: {{ .Release.Namespace | quote }}
labels:
{{- include "local-ai.labels" . | nindent 4 }}
spec:
selector:
matchLabels:
app.kubernetes.io/name: {{ include "local-ai.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
replicas: 1
template:
metadata:
name: {{ template "local-ai.fullname" . }}
labels:
app.kubernetes.io/name: {{ include "local-ai.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
containers:
- name: {{ template "local-ai.fullname" . }}
image: {{ .Values.deployment.image }}
env:
- name: THREADS
value: {{ .Values.deployment.env.threads | quote }}
- name: CONTEXT_SIZE
value: {{ .Values.deployment.env.contextSize | quote }}
- name: MODELS_PATH
value: {{ .Values.deployment.env.modelsPath }}
{{- if .Values.deployment.volume.enabled }}
volumeMounts:
- mountPath: {{ .Values.deployment.env.modelsPath }}
name: models
volumes:
- name: models
persistentVolumeClaim:
claimName: {{ template "local-ai.fullname" . }}
{{- end }}

View File

@@ -1,19 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: {{ template "local-ai.fullname" . }}
namespace: {{ .Release.Namespace | quote }}
labels:
{{- include "local-ai.labels" . | nindent 4 }}
{{- if .Values.service.annotations }}
annotations:
{{ toYaml .Values.service.annotations | indent 4 }}
{{- end }}
spec:
selector:
app.kubernetes.io/name: {{ include "local-ai.name" . }}
type: "{{ .Values.service.type }}"
ports:
- protocol: TCP
port: 8080
targetPort: 8080

View File

@@ -1,38 +0,0 @@
deployment:
image: quay.io/go-skynet/local-ai:latest
env:
threads: 14
contextSize: 512
modelsPath: "/models"
volume:
enabled: false
service:
type: ClusterIP
annotations: {}
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
# Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
# (requires https://github.com/kubevirt/containerized-data-importer)
dataVolume:
enabled: false
source:
type: "http" # Source type. One of: [ http | s3 ]
url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
# CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
# and a base64 encoded pem certificate
caCertConfigMap: ""
# SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
# that may include sensitive information. Only applicable for the http source type.
secretExtraHeaders: []
pvc:
accessModes:
- ReadWriteOnce
size: 5Gi
secret:
enabled: false
username: "" # base64 encoded
password: "" # base64 encoded

View File

@@ -6,6 +6,11 @@ Here is a list of projects that can easily be integrated with the LocalAI backen
- [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/) (by [@mkellerman](https://github.com/mkellerman))
- [discord-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/) (by [@mudler](https://github.com/mudler))
- [langchain](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/) (by [@dave-gray101](https://github.com/dave-gray101))
- [langchain-python](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/) (by [@mudler](https://github.com/mudler))
- [localai-webui](https://github.com/go-skynet/LocalAI/tree/master/examples/localai-webui/) (by [@dhruvgera](https://github.com/dhruvgera))
- [rwkv](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/) (by [@mudler](https://github.com/mudler))
- [slack-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/) (by [@mudler](https://github.com/mudler))
## Want to contribute?

View File

@@ -22,5 +22,25 @@ wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
docker-compose up -d --build
```
## Pointing chatbot-ui to a separately managed LocalAI service
If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
```
version: '3.6'
services:
chatgpt:
image: ghcr.io/mckaywrigley/chatbot-ui:main
ports:
- 3000:3000
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
```
Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`.
## Accessing chatbot-ui
Open http://localhost:3000 for the Web UI.

View File

@@ -1,5 +1,7 @@
# discord-bot
![Screenshot from 2023-05-01 07-58-19](https://user-images.githubusercontent.com/2420543/235413924-0cb2e75b-f2d6-4119-8610-44386e44afb8.png)
## Setup
```bash
@@ -8,15 +10,13 @@ git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/discord-bot
git clone https://github.com/go-skynet/gpt-discord-bot.git
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# Set the discord bot options
# Set the discord bot options (see: https://github.com/go-skynet/gpt-discord-bot#setup)
cp -rfv .env.example .env
vim .env
@@ -24,5 +24,53 @@ vim .env
docker-compose up -d --build
```
Note: see setup options here: https://github.com/go-skynet/gpt-discord-bot#setup
Open up the URL in the console and give permission to the bot in your server. Start a thread with `/chat ..`
## Kubernetes
- install the local-ai chart first
- change OPENAI_API_BASE to point to the API address and apply the discord-bot manifest:
```yaml
apiVersion: v1
kind: Namespace
metadata:
name: discord-bot
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: localai
namespace: discord-bot
labels:
app: localai
spec:
selector:
matchLabels:
app: localai
replicas: 1
template:
metadata:
labels:
app: localai
name: localai
spec:
containers:
- name: localai-discord
env:
- name: OPENAI_API_KEY
value: "x"
- name: DISCORD_BOT_TOKEN
value: ""
- name: DISCORD_CLIENT_ID
value: ""
- name: OPENAI_API_BASE
value: "http://local-ai.default.svc.cluster.local:8080"
- name: ALLOWED_SERVER_IDS
value: "xx"
- name: SERVER_TO_MODERATION_CHANNEL
value: "1:1"
image: quay.io/go-skynet/gpt-discord-bot:main
```

View File

@@ -16,8 +16,6 @@ services:
command: ["/usr/bin/local-ai" ]
bot:
build:
context: ./gpt-discord-bot
dockerfile: Dockerfile
image: quay.io/go-skynet/gpt-discord-bot:main
env_file:
- .env

View File

@@ -0,0 +1,33 @@
## Langchain-python
Langchain example from [quickstart](https://python.langchain.com/en/latest/getting_started/getting_started.html).
To interact with langchain, you can just set the `OPENAI_API_BASE` URL and provide a token with a random string.
See the example below:
```
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/langchain-python
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose
docker-compose up -d --build
pip install langchain
pip install openai
export OPENAI_API_BASE=http://localhost:8080
export OPENAI_API_KEY=sk-
python test.py
# A good company name for a company that makes colorful socks would be "Colorsocks".
```

View File

@@ -0,0 +1,16 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile.dev
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]

View File

@@ -0,0 +1 @@
../chatbot-ui/models

View File

@@ -0,0 +1,6 @@
from langchain.llms import OpenAI
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
text = "What would be a good company name for a company that makes colorful socks?"
print(llm(text))

2
examples/langchain/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
models/ggml-koala-13B-4bit-128g
models/ggml-gpt4all-j

View File

@@ -0,0 +1,6 @@
FROM node:latest
COPY ./langchainjs-localai-example /app
WORKDIR /app
RUN npm install
RUN npm run build
ENTRYPOINT [ "npm", "run", "start" ]

View File

@@ -0,0 +1,5 @@
FROM python:3.10-bullseye
COPY ./langchainpy-localai-example /app
WORKDIR /app
RUN pip install --no-cache-dir -r requirements.txt
ENTRYPOINT [ "python", "./simple_demo.py" ];

View File

@@ -0,0 +1,30 @@
# langchain
Example of using langchain, with the standard OpenAI llm module, and LocalAI. Has docker compose profiles for both the Typescript and Python versions.
**Please Note** - This is a tech demo example at this time. ggml-gpt4all-j has pretty terrible results for most langchain applications with the settings used in this example.
## Setup
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/langchain
# (optional) - Edit the example code in typescript.
# vi ./langchainjs-localai-example/index.ts
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose for typescript!
docker-compose --profile ts up --build
# or start with docker-compose for python!
docker-compose --profile py up --build
```
## Copyright
Some of the example code in index.mts is adapted from the langchainjs project and is Copyright (c) Harrison Chase. Used under the terms of the MIT license, as is the remainder of this code.

View File

@@ -0,0 +1,43 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile.dev
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]
js:
build:
context: .
dockerfile: JS.Dockerfile
profiles:
- js
- ts
depends_on:
- "api"
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_BASE=http://api:8080/v1'
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
py:
build:
context: .
dockerfile: PY.Dockerfile
profiles:
- py
depends_on:
- "api"
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_BASE=http://api:8080/v1'
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'

View File

@@ -0,0 +1,2 @@
node_modules/
dist/

View File

@@ -0,0 +1,20 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Launch Program",
// "skipFiles": [
// "<node_internals>/**"
// ],
"program": "${workspaceFolder}\\dist\\index.mjs",
"outFiles": [
"${workspaceFolder}/**/*.js"
]
}
]
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
{
"name": "langchainjs-localai-example",
"version": "0.1.0",
"description": "Trivial Example of using langchain + the OpenAI API + LocalAI together",
"main": "index.mjs",
"scripts": {
"build": "tsc --build",
"clean": "tsc --build --clean",
"start": "node --trace-warnings dist/index.mjs"
},
"author": "dave@gray101.com",
"license": "MIT",
"devDependencies": {
"@types/node": "^18.16.3",
"typescript": "^5.0.4"
},
"dependencies": {
"langchain": "^0.0.67",
"typeorm": "^0.3.15"
}
}

View File

@@ -0,0 +1,79 @@
import { OpenAIChat } from "langchain/llms/openai";
import { loadQAStuffChain } from "langchain/chains";
import { Document } from "langchain/document";
import { initializeAgentExecutorWithOptions } from "langchain/agents";
import {Calculator} from "langchain/tools/calculator";
const pathToLocalAi = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1';
const fakeApiKey = process.env['OPENAI_API_KEY'] || '-';
const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo';
function getModel(): OpenAIChat {
return new OpenAIChat({
prefixMessages: [
{
role: "system",
content: "You are a helpful assistant that answers in pirate language",
},
],
modelName: modelName,
maxTokens: 50,
openAIApiKey: fakeApiKey,
maxRetries: 2
}, {
basePath: pathToLocalAi,
apiKey: fakeApiKey,
});
}
// Minimal example.
export const run = async () => {
const model = getModel();
console.log(`about to model.call at ${new Date().toUTCString()}`);
const res = await model.call(
"What would be a good company name a company that makes colorful socks?"
);
console.log(`${new Date().toUTCString()}`);
console.log({ res });
};
await run();
// This example uses the `StuffDocumentsChain`
export const run2 = async () => {
const model = getModel();
const chainA = loadQAStuffChain(model);
const docs = [
new Document({ pageContent: "Harrison went to Harvard." }),
new Document({ pageContent: "Ankush went to Princeton." }),
];
const resA = await chainA.call({
input_documents: docs,
question: "Where did Harrison go to college?",
});
console.log({ resA });
};
await run2();
// Quickly thrown together example of using tools + agents.
// This seems like it should work, but it doesn't yet.
export const temporarilyBrokenToolTest = async () => {
const model = getModel();
const executor = await initializeAgentExecutorWithOptions([new Calculator(true)], model, {
agentType: "zero-shot-react-description",
});
console.log("Loaded agent.");
const input = `What is the value of (500 *2) + 350 - 13?`;
console.log(`Executing with input "${input}"...`);
const result = await executor.call({ input });
console.log(`Got output ${result.output}`);
}
await temporarilyBrokenToolTest();

View File

@@ -0,0 +1,15 @@
{
"compilerOptions": {
"target": "es2022",
"lib": ["ES2022", "DOM"],
"module": "ES2022",
"moduleResolution": "node",
"strict": true,
"esModuleInterop": true,
"allowSyntheticDefaultImports": true,
"isolatedModules": true,
"outDir": "./dist"
},
"include": ["src", "test"],
"exclude": ["node_modules", "dist"]
}

View File

@@ -0,0 +1,24 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"redirectOutput": true,
"justMyCode": false
},
{
"name": "Python: Attach to Port 5678",
"type": "python",
"request": "attach",
"connect": {
"host": "localhost",
"port": 5678
},
"justMyCode": false
}
]
}

View File

@@ -0,0 +1,3 @@
{
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python"
}

View File

@@ -0,0 +1,39 @@
import os
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
AIMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
print('Langchain + LocalAI PYTHON Tests')
base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
key = os.environ.get('OPENAI_API_KEY', '-')
model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
chat = ChatOpenAI(temperature=0, openai_api_base=base_path, openai_api_key=key, model_name=model_name, max_tokens=100)
print("Created ChatOpenAI for ", chat.model_name)
template = "You are a helpful assistant that translates {input_language} to {output_language}."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
print("ABOUT to execute")
# get a chat completion from the formatted messages
chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
print(".");

View File

@@ -0,0 +1,32 @@
aiohttp==3.8.4
aiosignal==1.3.1
async-timeout==4.0.2
attrs==23.1.0
certifi==2022.12.7
charset-normalizer==3.1.0
colorama==0.4.6
dataclasses-json==0.5.7
debugpy==1.6.7
frozenlist==1.3.3
greenlet==2.0.2
idna==3.4
langchain==0.0.157
marshmallow==3.19.0
marshmallow-enum==1.5.1
multidict==6.0.4
mypy-extensions==1.0.0
numexpr==2.8.4
numpy==1.24.3
openai==0.27.6
openapi-schema-pydantic==1.2.4
packaging==23.1
pydantic==1.10.7
PyYAML==6.0
requests==2.29.0
SQLAlchemy==2.0.12
tenacity==8.2.2
tqdm==4.65.0
typing-inspect==0.8.0
typing_extensions==4.5.0
urllib3==1.26.15
yarl==1.9.2

View File

@@ -0,0 +1,6 @@
from langchain.llms import OpenAI
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
text = "What would be a good company name for a company that makes colorful socks?"
print(llm(text))

View File

@@ -0,0 +1 @@
{{.Input}}

View File

@@ -0,0 +1,18 @@
name: gpt-3.5-turbo
parameters:
model: ggml-gpt4all-j # ggml-koala-13B-4bit-128g
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
threads: 4
stopwords:
- "HUMAN:"
- "GPT:"
roles:
user: " "
system: " "
backend: "gptj"
template:
completion: completion
chat: completion # gpt4all

View File

@@ -0,0 +1,4 @@
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
### Prompt:
{{.Input}}
### Response:

View File

@@ -0,0 +1,26 @@
# localai-webui
Example of integration with [dhruvgera/localai-frontend](https://github.com/Dhruvgera/LocalAI-frontend).
![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
## Setup
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/localai-webui
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download any desired models to models/ in the parent LocalAI project dir
# For example: wget https://gpt4all.io/models/ggml-gpt4all-j.bin
# start with docker-compose
docker-compose up -d --build
```
Open http://localhost:3000 for the Web UI.

View File

@@ -0,0 +1,20 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: .
dockerfile: Dockerfile
ports:
- 8080:8080
env_file:
- .env
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai"]
frontend:
image: quay.io/go-skynet/localai-frontend:master
ports:
- 3000:3000

View File

@@ -0,0 +1,10 @@
FROM python
# convert the model (one-off)
RUN pip3 install torch numpy
WORKDIR /build
COPY ./scripts/ .
RUN git clone --recurse-submodules https://github.com/saharNooby/rwkv.cpp && cd rwkv.cpp && cmake . && cmake --build . --config Release
ENTRYPOINT [ "/build/build.sh" ]

59
examples/rwkv/README.md Normal file
View File

@@ -0,0 +1,59 @@
# rwkv
Example of how to run rwkv models.
## Run models
Setup:
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/rwkv
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# build the tooling image to convert an rwkv model locally:
docker build -t rwkv-converter -f Dockerfile.build .
# download and convert a model (one-off) - it's going to be fast on CPU too!
docker run -ti --name converter -v $PWD:/data rwkv-converter https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096.pth /data/models/rwkv
# Get the tokenizer
wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O models/rwkv.tokenizer.json
# start with docker-compose
docker-compose up -d --build
```
Test it out:
```bash
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "gpt-3.5-turbo",
"prompt": "A long time ago, in a galaxy far away",
"max_tokens": 100,
"temperature": 0.9, "top_p": 0.8, "top_k": 80
}'
# {"object":"text_completion","model":"gpt-3.5-turbo","choices":[{"text":", there was a small group of five friends: Annie, Bryan, Charlie, Emily, and Jesse."}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.9, "top_p": 0.8, "top_k": 80
}'
# {"object":"chat.completion","model":"gpt-3.5-turbo","choices":[{"message":{"role":"assistant","content":" Good, thanks. I am about to go to bed. I' ll talk to you later.Bye."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
```
### Fine tuning
See [RWKV-LM](https://github.com/BlinkDL/RWKV-LM#training--fine-tuning). There is also a Google [colab](https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_v4_RNN_Pile_Fine_Tuning.ipynb).
## See also
- [RWKV-LM](https://github.com/BlinkDL/RWKV-LM)
- [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)

View File

@@ -0,0 +1,16 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile.dev
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]

View File

@@ -0,0 +1,19 @@
name: gpt-3.5-turbo
parameters:
model: rwkv
top_k: 80
temperature: 0.9
max_tokens: 100
top_p: 0.8
context_size: 1024
threads: 14
backend: "rwkv"
cutwords:
- "Bob:.*"
roles:
user: "Bob:"
system: "Alice:"
assistant: "Alice:"
template:
completion: rwkv_completion
chat: rwkv_chat

View File

@@ -0,0 +1,13 @@
The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
Bob: Hello Alice, how are you doing?
Alice: Hi Bob! Thanks, I'm fine. What about you?
Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
Alice: Not at all! I'm listening.
{{.Input}}
Alice:

View File

@@ -0,0 +1 @@
Complete the following sentence: {{.Input}}

View File

@@ -0,0 +1,11 @@
SLACK_APP_TOKEN=xapp-1-...
SLACK_BOT_TOKEN=xoxb-...
OPENAI_API_KEY=sk-...
OPENAI_API_BASE=http://api:8080
OPENAI_MODEL=gpt-3.5-turbo
OPENAI_TIMEOUT_SECONDS=60
#OPENAI_SYSTEM_TEXT="You proofread text. When you receive a message, you will check
#for mistakes and make suggestion to improve the language of the given text"
USE_SLACK_LANGUAGE=true
SLACK_APP_LOG_LEVEL=INFO
TRANSLATE_MARKDOWN=true

View File

@@ -0,0 +1,27 @@
# Slack bot
Slackbot using: https://github.com/seratch/ChatGPT-in-Slack
## Setup
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/slack-bot
git clone https://github.com/seratch/ChatGPT-in-Slack
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# Set the discord bot options (see: https://github.com/seratch/ChatGPT-in-Slack)
cp -rfv .env.example .env
vim .env
# start with docker-compose
docker-compose up -d --build
```

View File

@@ -0,0 +1,23 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile.dev
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]
bot:
build:
context: ./ChatGPT-in-Slack
dockerfile: Dockerfile
env_file:
- .env

1
examples/slack-bot/models Symbolic link
View File

@@ -0,0 +1 @@
../chatbot-ui/models

21
go.mod
View File

@@ -3,18 +3,20 @@ module github.com/go-skynet/LocalAI
go 1.19
require (
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
github.com/go-skynet/go-llama.cpp v0.0.0-20230430075552-377fd245eae2
github.com/go-skynet/go-llama.cpp v0.0.0-20230503200855-2e6ae1269e03
github.com/gofiber/fiber/v2 v2.44.0
github.com/hashicorp/go-multierror v1.1.1
github.com/jaypipes/ghw v0.10.0
github.com/onsi/ginkgo/v2 v2.9.2
github.com/onsi/ginkgo/v2 v2.9.4
github.com/onsi/gomega v1.27.6
github.com/otiai10/openaigo v1.1.0
github.com/rs/zerolog v1.29.1
github.com/sashabaranov/go-openai v1.9.0
github.com/urfave/cli/v2 v2.25.1
github.com/valyala/fasthttp v1.46.0
github.com/sashabaranov/go-openai v1.9.3
github.com/urfave/cli/v2 v2.25.3
github.com/valyala/fasthttp v1.47.0
gopkg.in/yaml.v3 v3.0.1
)
@@ -23,12 +25,13 @@ require (
github.com/andybalholm/brotli v1.0.5 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/ghodss/yaml v1.0.0 // indirect
github.com/go-logr/logr v1.2.3 // indirect
github.com/go-logr/logr v1.2.4 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/jaypipes/pcidb v1.0.0 // indirect
github.com/klauspost/compress v1.16.3 // indirect
github.com/kr/text v0.2.0 // indirect
@@ -46,10 +49,10 @@ require (
github.com/valyala/bytebufferpool v1.0.0 // indirect
github.com/valyala/tcplisten v1.0.0 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
golang.org/x/net v0.8.0 // indirect
golang.org/x/net v0.9.0 // indirect
golang.org/x/sys v0.7.0 // indirect
golang.org/x/text v0.8.0 // indirect
golang.org/x/tools v0.7.0 // indirect
golang.org/x/text v0.9.0 // indirect
golang.org/x/tools v0.8.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
howett.net/plist v1.0.0 // indirect
)

46
go.sum
View File

@@ -12,10 +12,16 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d h1:lSHwlYf1H4WAWYgf7rjEVTGen1qmigUq2Egpu8mnQiY=
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d/go.mod h1:H6QBF7/Tz6DAEBDXQged4H1BvsmqY/K5FG9wQRGa01g=
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
@@ -23,14 +29,12 @@ github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
github.com/go-skynet/go-llama.cpp v0.0.0-20230428071219-3d084e4299e9 h1:N/0SBefkMFao6GiGhIF7+5EdYOMHn4KnCG2AFcIXPt0=
github.com/go-skynet/go-llama.cpp v0.0.0-20230428071219-3d084e4299e9/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
github.com/go-skynet/go-llama.cpp v0.0.0-20230429125915-9bf702fe56b9 h1:20/tdOA4+b7Y7lCob+q2sczfOSz0pp+14L32adYJ+uQ=
github.com/go-skynet/go-llama.cpp v0.0.0-20230429125915-9bf702fe56b9/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
github.com/go-skynet/go-llama.cpp v0.0.0-20230429225431-361b9f87de6d h1:7KDq1Uylm1mXphQ+M2qztekXAvODtXvJDHrXQguRw9k=
github.com/go-skynet/go-llama.cpp v0.0.0-20230429225431-361b9f87de6d/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
github.com/go-skynet/go-llama.cpp v0.0.0-20230430075552-377fd245eae2 h1:CYQRCbOfYtC77OxweAyrdxSVwoLIM/EdZ6Ij+xBzta8=
github.com/go-skynet/go-llama.cpp v0.0.0-20230430075552-377fd245eae2/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
github.com/go-skynet/go-llama.cpp v0.0.0-20230502121737-8ceb6167e405 h1:pbIxJ/eiL1Irdprxk/mquaxjR1XDGCE+7CT9BGJNRaY=
github.com/go-skynet/go-llama.cpp v0.0.0-20230502121737-8ceb6167e405/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
github.com/go-skynet/go-llama.cpp v0.0.0-20230503200855-2e6ae1269e03 h1:j9fhITFhkz4SczJU0jIaMYo5tdTVTrj+zdhEgWHEr40=
github.com/go-skynet/go-llama.cpp v0.0.0-20230503200855-2e6ae1269e03/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
@@ -43,6 +47,10 @@ github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
@@ -67,6 +75,10 @@ github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
github.com/onsi/ginkgo/v2 v2.9.3 h1:5X2vl/isiKqkrOYjiaGgp3JQOcLV59g5o5SuTMqCcxU=
github.com/onsi/ginkgo/v2 v2.9.3/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
github.com/otiai10/mint v1.4.1 h1:HOVBfKP1oXIc0wWo9hZ8JLdZtyCPWqjvmFDuVZ0yv2Y=
@@ -86,8 +98,12 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sashabaranov/go-openai v1.9.0 h1:NoiO++IISxxJ1pRc0n7uZvMGMake0G+FJ1XPwXtprsA=
github.com/sashabaranov/go-openai v1.9.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/sashabaranov/go-openai v1.9.1 h1:3N52HkJKo9Zlo/oe1AVv5ZkCOny0ra58/ACvAxkN3MM=
github.com/sashabaranov/go-openai v1.9.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/sashabaranov/go-openai v1.9.2 h1:7//Glm9EiMBjelgmBb00yYzKYqm1jckHWWTDLahfeuQ=
github.com/sashabaranov/go-openai v1.9.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
@@ -99,12 +115,12 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
github.com/urfave/cli/v2 v2.25.1 h1:zw8dSP7ghX0Gmm8vugrs6q9Ku0wzweqPyshy+syu9Gw=
github.com/urfave/cli/v2 v2.25.1/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY=
github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.46.0 h1:6ZRhrFg8zBXTRYY6vdzbFhqsBd7FVv123pV2m9V87U4=
github.com/valyala/fasthttp v1.46.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
github.com/valyala/fasthttp v1.47.0 h1:y7moDoxYzMooFpT5aHgNgVOQDrS3qlkfiP9mDtGGK9c=
github.com/valyala/fasthttp v1.47.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
@@ -126,6 +142,8 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug
golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -155,6 +173,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
@@ -162,6 +182,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y=
golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

14
main.go
View File

@@ -1,11 +1,12 @@
package main
import (
"fmt"
"os"
"path/filepath"
api "github.com/go-skynet/LocalAI/api"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/jaypipes/ghw"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/urfave/cli/v2"
@@ -20,12 +21,6 @@ func main() {
os.Exit(1)
}
threads := 4
cpu, err := ghw.CPU()
if err == nil {
threads = int(cpu.TotalCores)
}
app := &cli.App{
Name: "LocalAI",
Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.",
@@ -42,13 +37,13 @@ func main() {
Name: "threads",
DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
EnvVars: []string{"THREADS"},
Value: threads,
Value: 4,
},
&cli.StringFlag{
Name: "models-path",
DefaultText: "Path containing models used for inferencing",
EnvVars: []string{"MODELS_PATH"},
Value: path,
Value: filepath.Join(path, "models"),
},
&cli.StringFlag{
Name: "config-file",
@@ -85,6 +80,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
UsageText: `local-ai [options]`,
Copyright: "go-skynet authors",
Action: func(ctx *cli.Context) error {
fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
},
}

View File

@@ -12,6 +12,7 @@ import (
"github.com/rs/zerolog/log"
rwkv "github.com/donomii/go-rwkv.cpp"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
llama "github.com/go-skynet/go-llama.cpp"
@@ -25,8 +26,8 @@ type ModelLoader struct {
gptmodels map[string]*gptj.GPTJ
gpt2models map[string]*gpt2.GPT2
gptstablelmmodels map[string]*gpt2.StableLM
promptsTemplates map[string]*template.Template
rwkv map[string]*rwkv.RwkvState
promptsTemplates map[string]*template.Template
}
func NewModelLoader(modelPath string) *ModelLoader {
@@ -36,6 +37,7 @@ func NewModelLoader(modelPath string) *ModelLoader {
gptmodels: make(map[string]*gptj.GPTJ),
gptstablelmmodels: make(map[string]*gpt2.StableLM),
models: make(map[string]*llama.LLama),
rwkv: make(map[string]*rwkv.RwkvState),
promptsTemplates: make(map[string]*template.Template),
}
}
@@ -79,10 +81,9 @@ func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string,
if exists {
m = t
}
}
if m == nil {
return "", nil
return "", fmt.Errorf("failed loading any template")
}
var buf bytes.Buffer
@@ -168,13 +169,6 @@ func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
return m, nil
}
// TODO: This needs refactoring, it's really bad to have it in here
// Check if we have a GPTStable model loaded instead - if we do we return an error so the API tries with StableLM
if _, ok := ml.gptstablelmmodels[modelName]; ok {
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
return nil, fmt.Errorf("this model is a GPTStableLM one")
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
@@ -207,17 +201,6 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
return m, nil
}
// TODO: This needs refactoring, it's really bad to have it in here
// Check if we have a GPT2 model loaded instead - if we do we return an error so the API tries with GPT2
if _, ok := ml.gpt2models[modelName]; ok {
log.Debug().Msgf("Model is GPT2: %s", modelName)
return nil, fmt.Errorf("this model is a GPT2 one")
}
if _, ok := ml.gptstablelmmodels[modelName]; ok {
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
return nil, fmt.Errorf("this model is a GPTStableLM one")
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
@@ -236,6 +219,36 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
return model, err
}
func (ml *ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (*rwkv.RwkvState, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
log.Debug().Msgf("Loading model name: %s", modelName)
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist")
}
if m, ok := ml.rwkv[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
tokenPath := filepath.Join(ml.ModelPath, tokenFile)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model := rwkv.LoadFiles(modelFile, tokenPath, threads)
if model == nil {
return nil, fmt.Errorf("could not load model")
}
ml.rwkv[modelName] = model
return model, nil
}
func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
@@ -252,21 +265,6 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
return m, nil
}
// TODO: This needs refactoring, it's really bad to have it in here
// Check if we have a GPTJ model loaded instead - if we do we return an error so the API tries with GPTJ
if _, ok := ml.gptmodels[modelName]; ok {
log.Debug().Msgf("Model is GPTJ: %s", modelName)
return nil, fmt.Errorf("this model is a GPTJ one")
}
if _, ok := ml.gpt2models[modelName]; ok {
log.Debug().Msgf("Model is GPT2: %s", modelName)
return nil, fmt.Errorf("this model is a GPT2 one")
}
if _, ok := ml.gptstablelmmodels[modelName]; ok {
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
return nil, fmt.Errorf("this model is a GPTStableLM one")
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)

View File

@@ -0,0 +1,3 @@
{{.Input}}
### Response:

View File

@@ -1,17 +1,4 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": [
"config:base"
],
"regexManagers": [
{
"fileMatch": [
"^Makefile$"
],
"matchStrings": [
"#\\s*renovate:\\s*datasource=(?<datasource>.*?) depName=(?<depName>.*?)( datasourceTemplate=(?<datasourceTemplate>.*?))?( packageNameTemplate=(?<packageNameTemplate>.*?))?( depNameTemplate=(?<depNameTemplate>.*?))?( valueTemplate=(?<currentValueTemplate>.*?))?( versioning=(?<versioning>.*?))?\\s+.+_VERSION=(?<currentValue>.*?)\\s"
],
"versioningTemplate": "{{#if versioning}}{{versioning}}{{/if}}"
}
]
"extends": ["config:base"]
}