Compare commits

..

22 Commits

Author SHA1 Message Date
Ettore Di Giacinto
714bfcd45b fix: missing returning error and free callback stream (#187) 2023-05-04 19:49:43 +02:00
renovate[bot]
77ce8b953e fix(deps): update github.com/donomii/go-rwkv.cpp digest to af62fcc (#171)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 18:30:48 +02:00
renovate[bot]
01ada95941 fix(deps): update github.com/go-skynet/go-llama.cpp digest to 2e6ae12 (#172)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 18:30:11 +02:00
ci-robbot [bot]
eabdc5042a ⬆️ Update go-skynet/go-llama.cpp (#184)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-04 18:28:49 +02:00
Dhruv Gera
96267d9437 localai: Include the WebUI project example (#130)
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
2023-05-04 18:27:58 +02:00
Ettore Di Giacinto
9497a24127 fix: hardcode default number of cores to '4' (#186) 2023-05-04 18:14:58 +02:00
Ettore Di Giacinto
fdf75c6d0e rwkv fixes and examples (#185) 2023-05-04 17:32:23 +02:00
mudler
6352308882 ci: minor fixups 2023-05-04 15:08:20 +02:00
mudler
a8172a0f4e ci: fix typo 2023-05-04 15:04:41 +02:00
mudler
ebcd10d66f ci: manually update deps 2023-05-04 15:01:29 +02:00
mudler
885642915f ci: add renovate suffix 2023-05-04 12:26:59 +02:00
mudler
2e424491c0 ci: lookupNameTemplate -> depNameTemplate 2023-05-04 12:23:05 +02:00
mudler
aa6faef8f7 ci: versioning -> versioningTemplate 2023-05-04 12:07:29 +02:00
mudler
b3254baf60 ci: add versioning 2023-05-04 12:05:39 +02:00
mudler
0a43d27f0e ci: update renovate 2023-05-04 12:02:19 +02:00
Ettore Di Giacinto
3fe11fe24d ci: attempt to configure renovate with custom regexes (#178) 2023-05-04 11:55:14 +02:00
renovate[bot]
af18fdc749 fix(deps): update module github.com/sashabaranov/go-openai to v1.9.3 (#174)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 08:44:02 +02:00
renovate[bot]
32b5eddd7d fix(deps): update module github.com/onsi/ginkgo/v2 to v2.9.4 (#173)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-04 08:41:51 +02:00
Dave
07c3aa1869 Dockerized Langchain / PY example (#175) 2023-05-04 08:41:13 +02:00
renovate[bot]
e59bad89e7 fix(deps): update module github.com/sashabaranov/go-openai to v1.9.2 (#164)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-03 23:05:50 +02:00
Jeremy Price
b971807980 Looks for models in $CWD/models/ dir by default (#169) 2023-05-03 23:03:31 +02:00
Ettore Di Giacinto
c974dad799 Return usage in the API responses (#166) 2023-05-03 17:29:18 +02:00
34 changed files with 500 additions and 72 deletions

9
.github/bump_deps.sh vendored Executable file
View File

@@ -0,0 +1,9 @@
#!/bin/bash
set -xe
REPO=$1
BRANCH=$2
VAR=$3
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"

42
.github/workflows/bump_deps.yaml vendored Normal file
View File

@@ -0,0 +1,42 @@
name: Bump dependencies
on:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
bump:
strategy:
fail-fast: false
matrix:
include:
- repository: "go-skynet/go-gpt4all-j.cpp"
variable: "GOGPT4ALLJ_VERSION"
branch: "master"
- repository: "go-skynet/go-llama.cpp"
variable: "GOLLAMA_VERSION"
branch: "master"
- repository: "go-skynet/go-gpt2.cpp"
variable: "GOGPT2_VERSION"
branch: "master"
- repository: "donomii/go-rwkv.cpp"
variable: "RWKV_VERSION"
branch: "main"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Bump dependencies 🔧
run: |
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
title: ':arrow_up: Update ${{ matrix.repository }}'
branch: "update/${{ matrix.variable }}"
body: Bump of ${{ matrix.repository }} version
signoff: true

View File

@@ -2,13 +2,10 @@ GOCMD=go
GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
# renovate: datasource=github-tags depName=go-skynet/go-llama.cpp
GOLLAMA_VERSION?=llama.cpp-f4cef87
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt4all-j.cpp currentValueTemplate=master depNameTemplate=go-gpt4all-j.cpp
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt2.cpp currentValueTemplate=master depNameTemplate=go-gpt2.cpp
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
GOLLAMA_VERSION?=2e6ae1269e035886fc64e268a6dda9d8c4ba8c75
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58
@@ -77,7 +74,8 @@ go-gpt2/libgpt2.a: go-gpt2
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
go-llama:
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
go-llama/libbinding.a: go-llama
$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a

View File

@@ -45,26 +45,45 @@ Tested with:
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin)
- Koala
- [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
- WizardLM
- [RWKV](https://github.com/BlinkDL/RWKV-LM) models with [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
It should also be compatible with StableLM and GPTNeoX ggml models (untested)
### Vicuna, Alpaca, LLaMa...
[llama.cpp](https://github.com/ggerganov/llama.cpp) based models are compatible
### GPT4ALL
Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
### GPT4ALL-J
No changes required to the model.
### RWKV
<details>
For `rwkv` models, you need to put also the associated tokenizer along with the ggml model:
A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
Note: rwkv models have an associated tokenizer along that needs to be provided with it:
```
ls models
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
36464543 -rw-r--r-- 1 mudler mudler 2.4M May 3 10:51 rwkv_small.tokenizer.json
```
</details>
### Others
It should also be compatible with StableLM and GPTNeoX ggml models (untested).
### Hardware requirements
Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
## Usage
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
@@ -143,8 +162,6 @@ To build locally, run `make build` (see below).
### Other examples
![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
To see other examples on how to integrate with other projects for instance chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).
@@ -553,7 +570,7 @@ Not currently, as ggml doesn't support GPUs yet: https://github.com/ggerganov/ll
### Where is the webUI?
<details>
We are working on to have a good out of the box experience - however as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
There is the availability of localai-webui and chatbot-ui in the examples section and can be setup as per the instructions. However as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
</details>

View File

@@ -27,12 +27,19 @@ type ErrorResponse struct {
Error *APIError `json:"error,omitempty"`
}
type OpenAIUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
type OpenAIResponse struct {
Created int `json:"created,omitempty"`
Object string `json:"object,omitempty"`
ID string `json:"id,omitempty"`
Model string `json:"model,omitempty"`
Choices []Choice `json:"choices,omitempty"`
Created int `json:"created,omitempty"`
Object string `json:"object,omitempty"`
ID string `json:"id,omitempty"`
Model string `json:"model,omitempty"`
Choices []Choice `json:"choices,omitempty"`
Usage OpenAIUsage `json:"usage"`
}
type Choice struct {
@@ -292,6 +299,21 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
}
func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
resp := OpenAIResponse{
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
Object: "chat.completion.chunk",
}
log.Debug().Msgf("Sending goroutine: %s", s)
responses <- resp
return true
})
close(responses)
}
return func(c *fiber.Ctx) error {
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
if err != nil {
@@ -343,19 +365,7 @@ func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
if input.Stream {
responses := make(chan OpenAIResponse)
go func() {
ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
resp := OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
Object: "chat.completion.chunk",
}
responses <- resp
return true
})
close(responses)
}()
go process(predInput, input, config, loader, responses)
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
@@ -395,6 +405,8 @@ func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
Choices: result,
Object: "chat.completion",
}
respData, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", respData)
// Return the prediction in the response body
return c.JSON(resp)

View File

@@ -129,12 +129,15 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
supportStreams = true
fn = func() (string, error) {
//model.ProcessInput("You are a chatbot that is very good at chatting. blah blah blah")
stopWord := "\n"
if len(c.StopWords) > 0 {
stopWord = c.StopWords[0]
}
if err := model.ProcessInput(s); err != nil {
return "", err
}
response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
return response, nil
@@ -258,10 +261,15 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
}
return model.Predict(
str, er := model.Predict(
s,
predictOptions...,
)
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
// after a stream event has occurred
model.SetTokenCallback(nil)
return str, er
}
}

View File

@@ -7,6 +7,9 @@ Here is a list of projects that can easily be integrated with the LocalAI backen
- [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/) (by [@mkellerman](https://github.com/mkellerman))
- [discord-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/) (by [@mudler](https://github.com/mudler))
- [langchain](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/) (by [@dave-gray101](https://github.com/dave-gray101))
- [langchain-python](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/) (by [@mudler](https://github.com/mudler))
- [localai-webui](https://github.com/go-skynet/LocalAI/tree/master/examples/localai-webui/) (by [@dhruvgera](https://github.com/dhruvgera))
- [rwkv](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/) (by [@mudler](https://github.com/mudler))
- [slack-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/) (by [@mudler](https://github.com/mudler))
## Want to contribute?

View File

@@ -0,0 +1,33 @@
## Langchain-python
Langchain example from [quickstart](https://python.langchain.com/en/latest/getting_started/getting_started.html).
To interact with langchain, you can just set the `OPENAI_API_BASE` URL and provide a token with a random string.
See the example below:
```
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/langchain-python
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose
docker-compose up -d --build
pip install langchain
pip install openai
export OPENAI_API_BASE=http://localhost:8080
export OPENAI_API_KEY=sk-
python test.py
# A good company name for a company that makes colorful socks would be "Colorsocks".
```

View File

@@ -0,0 +1,16 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile.dev
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]

View File

@@ -0,0 +1 @@
../chatbot-ui/models

View File

@@ -0,0 +1,6 @@
from langchain.llms import OpenAI
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
text = "What would be a good company name for a company that makes colorful socks?"
print(llm(text))

View File

@@ -0,0 +1,5 @@
FROM python:3.10-bullseye
COPY ./langchainpy-localai-example /app
WORKDIR /app
RUN pip install --no-cache-dir -r requirements.txt
ENTRYPOINT [ "python", "./simple_demo.py" ];

View File

@@ -1,10 +1,6 @@
# langchain
Example of using langchain in TypeScript, with the standard OpenAI llm module, and LocalAI.
Example for python langchain to follow at a later date
Set up to make it easy to modify the `index.mts` file to look like any langchain example file.
Example of using langchain, with the standard OpenAI llm module, and LocalAI. Has docker compose profiles for both the Typescript and Python versions.
**Please Note** - This is a tech demo example at this time. ggml-gpt4all-j has pretty terrible results for most langchain applications with the settings used in this example.
@@ -22,8 +18,11 @@ cd LocalAI/examples/langchain
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose
docker-compose up --build
# start with docker-compose for typescript!
docker-compose --profile ts up --build
# or start with docker-compose for python!
docker-compose --profile py up --build
```
## Copyright

View File

@@ -15,11 +15,29 @@ services:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]
langchainjs:
js:
build:
context: .
dockerfile: JS.Dockerfile
profiles:
- js
- ts
depends_on:
- "api"
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_HOST=http://api:8080/v1'
- 'OPENAI_API_BASE=http://api:8080/v1'
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
py:
build:
context: .
dockerfile: PY.Dockerfile
profiles:
- py
depends_on:
- "api"
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_BASE=http://api:8080/v1'
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'

View File

@@ -4,7 +4,7 @@ import { Document } from "langchain/document";
import { initializeAgentExecutorWithOptions } from "langchain/agents";
import {Calculator} from "langchain/tools/calculator";
const pathToLocalAi = process.env['OPENAI_API_HOST'] || 'http://api:8080/v1';
const pathToLocalAi = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1';
const fakeApiKey = process.env['OPENAI_API_KEY'] || '-';
const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo';

View File

@@ -0,0 +1,24 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"redirectOutput": true,
"justMyCode": false
},
{
"name": "Python: Attach to Port 5678",
"type": "python",
"request": "attach",
"connect": {
"host": "localhost",
"port": 5678
},
"justMyCode": false
}
]
}

View File

@@ -0,0 +1,3 @@
{
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python"
}

View File

@@ -0,0 +1,39 @@
import os
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
AIMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
print('Langchain + LocalAI PYTHON Tests')
base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
key = os.environ.get('OPENAI_API_KEY', '-')
model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
chat = ChatOpenAI(temperature=0, openai_api_base=base_path, openai_api_key=key, model_name=model_name, max_tokens=100)
print("Created ChatOpenAI for ", chat.model_name)
template = "You are a helpful assistant that translates {input_language} to {output_language}."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
print("ABOUT to execute")
# get a chat completion from the formatted messages
chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
print(".");

View File

@@ -0,0 +1,32 @@
aiohttp==3.8.4
aiosignal==1.3.1
async-timeout==4.0.2
attrs==23.1.0
certifi==2022.12.7
charset-normalizer==3.1.0
colorama==0.4.6
dataclasses-json==0.5.7
debugpy==1.6.7
frozenlist==1.3.3
greenlet==2.0.2
idna==3.4
langchain==0.0.157
marshmallow==3.19.0
marshmallow-enum==1.5.1
multidict==6.0.4
mypy-extensions==1.0.0
numexpr==2.8.4
numpy==1.24.3
openai==0.27.6
openapi-schema-pydantic==1.2.4
packaging==23.1
pydantic==1.10.7
PyYAML==6.0
requests==2.29.0
SQLAlchemy==2.0.12
tenacity==8.2.2
tqdm==4.65.0
typing-inspect==0.8.0
typing_extensions==4.5.0
urllib3==1.26.15
yarl==1.9.2

View File

@@ -0,0 +1,6 @@
from langchain.llms import OpenAI
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
text = "What would be a good company name for a company that makes colorful socks?"
print(llm(text))

View File

@@ -12,6 +12,7 @@ stopwords:
roles:
user: " "
system: " "
backend: "gptj"
template:
completion: completion
chat: completion # gpt4all

View File

@@ -0,0 +1,26 @@
# localai-webui
Example of integration with [dhruvgera/localai-frontend](https://github.com/Dhruvgera/LocalAI-frontend).
![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
## Setup
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/localai-webui
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download any desired models to models/ in the parent LocalAI project dir
# For example: wget https://gpt4all.io/models/ggml-gpt4all-j.bin
# start with docker-compose
docker-compose up -d --build
```
Open http://localhost:3000 for the Web UI.

View File

@@ -0,0 +1,20 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: .
dockerfile: Dockerfile
ports:
- 8080:8080
env_file:
- .env
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai"]
frontend:
image: quay.io/go-skynet/localai-frontend:master
ports:
- 3000:3000

View File

@@ -0,0 +1,10 @@
FROM python
# convert the model (one-off)
RUN pip3 install torch numpy
WORKDIR /build
COPY ./scripts/ .
RUN git clone --recurse-submodules https://github.com/saharNooby/rwkv.cpp && cd rwkv.cpp && cmake . && cmake --build . --config Release
ENTRYPOINT [ "/build/build.sh" ]

59
examples/rwkv/README.md Normal file
View File

@@ -0,0 +1,59 @@
# rwkv
Example of how to run rwkv models.
## Run models
Setup:
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/rwkv
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# build the tooling image to convert an rwkv model locally:
docker build -t rwkv-converter -f Dockerfile.build .
# download and convert a model (one-off) - it's going to be fast on CPU too!
docker run -ti --name converter -v $PWD:/data rwkv-converter https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096.pth /data/models/rwkv
# Get the tokenizer
wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O models/rwkv.tokenizer.json
# start with docker-compose
docker-compose up -d --build
```
Test it out:
```bash
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "gpt-3.5-turbo",
"prompt": "A long time ago, in a galaxy far away",
"max_tokens": 100,
"temperature": 0.9, "top_p": 0.8, "top_k": 80
}'
# {"object":"text_completion","model":"gpt-3.5-turbo","choices":[{"text":", there was a small group of five friends: Annie, Bryan, Charlie, Emily, and Jesse."}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.9, "top_p": 0.8, "top_k": 80
}'
# {"object":"chat.completion","model":"gpt-3.5-turbo","choices":[{"message":{"role":"assistant","content":" Good, thanks. I am about to go to bed. I' ll talk to you later.Bye."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
```
### Fine tuning
See [RWKV-LM](https://github.com/BlinkDL/RWKV-LM#training--fine-tuning). There is also a Google [colab](https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_v4_RNN_Pile_Fine_Tuning.ipynb).
## See also
- [RWKV-LM](https://github.com/BlinkDL/RWKV-LM)
- [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)

View File

@@ -0,0 +1,16 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile.dev
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]

View File

@@ -0,0 +1,19 @@
name: gpt-3.5-turbo
parameters:
model: rwkv
top_k: 80
temperature: 0.9
max_tokens: 100
top_p: 0.8
context_size: 1024
threads: 14
backend: "rwkv"
cutwords:
- "Bob:.*"
roles:
user: "Bob:"
system: "Alice:"
assistant: "Alice:"
template:
completion: rwkv_completion
chat: rwkv_chat

View File

@@ -0,0 +1,13 @@
The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
Bob: Hello Alice, how are you doing?
Alice: Hi Bob! Thanks, I'm fine. What about you?
Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
Alice: Not at all! I'm listening.
{{.Input}}
Alice:

View File

@@ -0,0 +1 @@
Complete the following sentence: {{.Input}}

8
go.mod
View File

@@ -3,17 +3,18 @@ module github.com/go-skynet/LocalAI
go 1.19
require (
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
github.com/go-skynet/go-llama.cpp v0.0.0-20230502121737-8ceb6167e405
github.com/go-skynet/go-llama.cpp v0.0.0-20230503200855-2e6ae1269e03
github.com/gofiber/fiber/v2 v2.44.0
github.com/hashicorp/go-multierror v1.1.1
github.com/jaypipes/ghw v0.10.0
github.com/onsi/ginkgo/v2 v2.9.3
github.com/onsi/ginkgo/v2 v2.9.4
github.com/onsi/gomega v1.27.6
github.com/otiai10/openaigo v1.1.0
github.com/rs/zerolog v1.29.1
github.com/sashabaranov/go-openai v1.9.1
github.com/sashabaranov/go-openai v1.9.3
github.com/urfave/cli/v2 v2.25.3
github.com/valyala/fasthttp v1.47.0
gopkg.in/yaml.v3 v3.0.1
@@ -23,7 +24,6 @@ require (
github.com/StackExchange/wmi v1.2.1 // indirect
github.com/andybalholm/brotli v1.0.5 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d // indirect
github.com/ghodss/yaml v1.0.0 // indirect
github.com/go-logr/logr v1.2.4 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect

10
go.sum
View File

@@ -14,6 +14,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d h1:lSHwlYf1H4WAWYgf7rjEVTGen1qmigUq2Egpu8mnQiY=
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d/go.mod h1:H6QBF7/Tz6DAEBDXQged4H1BvsmqY/K5FG9wQRGa01g=
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
@@ -31,6 +33,8 @@ github.com/go-skynet/go-llama.cpp v0.0.0-20230430075552-377fd245eae2 h1:CYQRCbOf
github.com/go-skynet/go-llama.cpp v0.0.0-20230430075552-377fd245eae2/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
github.com/go-skynet/go-llama.cpp v0.0.0-20230502121737-8ceb6167e405 h1:pbIxJ/eiL1Irdprxk/mquaxjR1XDGCE+7CT9BGJNRaY=
github.com/go-skynet/go-llama.cpp v0.0.0-20230502121737-8ceb6167e405/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
github.com/go-skynet/go-llama.cpp v0.0.0-20230503200855-2e6ae1269e03 h1:j9fhITFhkz4SczJU0jIaMYo5tdTVTrj+zdhEgWHEr40=
github.com/go-skynet/go-llama.cpp v0.0.0-20230503200855-2e6ae1269e03/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
@@ -73,6 +77,8 @@ github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
github.com/onsi/ginkgo/v2 v2.9.3 h1:5X2vl/isiKqkrOYjiaGgp3JQOcLV59g5o5SuTMqCcxU=
github.com/onsi/ginkgo/v2 v2.9.3/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
github.com/otiai10/mint v1.4.1 h1:HOVBfKP1oXIc0wWo9hZ8JLdZtyCPWqjvmFDuVZ0yv2Y=
@@ -94,6 +100,10 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sashabaranov/go-openai v1.9.1 h1:3N52HkJKo9Zlo/oe1AVv5ZkCOny0ra58/ACvAxkN3MM=
github.com/sashabaranov/go-openai v1.9.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/sashabaranov/go-openai v1.9.2 h1:7//Glm9EiMBjelgmBb00yYzKYqm1jckHWWTDLahfeuQ=
github.com/sashabaranov/go-openai v1.9.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=

14
main.go
View File

@@ -1,11 +1,12 @@
package main
import (
"fmt"
"os"
"path/filepath"
api "github.com/go-skynet/LocalAI/api"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/jaypipes/ghw"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/urfave/cli/v2"
@@ -20,12 +21,6 @@ func main() {
os.Exit(1)
}
threads := 4
cpu, err := ghw.CPU()
if err == nil {
threads = int(cpu.TotalCores)
}
app := &cli.App{
Name: "LocalAI",
Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.",
@@ -42,13 +37,13 @@ func main() {
Name: "threads",
DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
EnvVars: []string{"THREADS"},
Value: threads,
Value: 4,
},
&cli.StringFlag{
Name: "models-path",
DefaultText: "Path containing models used for inferencing",
EnvVars: []string{"MODELS_PATH"},
Value: path,
Value: filepath.Join(path, "models"),
},
&cli.StringFlag{
Name: "config-file",
@@ -85,6 +80,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
UsageText: `local-ai [options]`,
Copyright: "go-skynet authors",
Action: func(ctx *cli.Context) error {
fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
},
}

View File

@@ -81,10 +81,9 @@ func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string,
if exists {
m = t
}
}
if m == nil {
return "", nil
return "", fmt.Errorf("failed loading any template")
}
var buf bytes.Buffer

View File

@@ -1,17 +1,4 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": [
"config:base"
],
"regexManagers": [
{
"fileMatch": [
"^Makefile$"
],
"matchStrings": [
"#\\s*renovate:\\s*datasource=(?<datasource>.*?) depName=(?<depName>.*?)( datasourceTemplate=(?<datasourceTemplate>.*?))?( packageNameTemplate=(?<packageNameTemplate>.*?))?( depNameTemplate=(?<depNameTemplate>.*?))?( valueTemplate=(?<currentValueTemplate>.*?))?( versioning=(?<versioning>.*?))?\\s+.+_VERSION=(?<currentValue>.*?)\\s"
],
"versioningTemplate": "{{#if versioning}}{{versioning}}{{/if}}"
}
]
"extends": ["config:base"]
}