mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 19:22:39 -05:00
Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9497a24127 | ||
|
|
fdf75c6d0e | ||
|
|
6352308882 | ||
|
|
a8172a0f4e | ||
|
|
ebcd10d66f | ||
|
|
885642915f | ||
|
|
2e424491c0 | ||
|
|
aa6faef8f7 | ||
|
|
b3254baf60 | ||
|
|
0a43d27f0e | ||
|
|
3fe11fe24d | ||
|
|
af18fdc749 | ||
|
|
32b5eddd7d | ||
|
|
07c3aa1869 | ||
|
|
e59bad89e7 | ||
|
|
b971807980 | ||
|
|
c974dad799 |
9
.github/bump_deps.sh
vendored
Executable file
9
.github/bump_deps.sh
vendored
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
set -xe
|
||||
REPO=$1
|
||||
BRANCH=$2
|
||||
VAR=$3
|
||||
|
||||
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
||||
|
||||
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
||||
42
.github/workflows/bump_deps.yaml
vendored
Normal file
42
.github/workflows/bump_deps.yaml
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
name: Bump dependencies
|
||||
on:
|
||||
schedule:
|
||||
- cron: 0 20 * * *
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
bump:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- repository: "go-skynet/go-gpt4all-j.cpp"
|
||||
variable: "GOGPT4ALLJ_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/go-llama.cpp"
|
||||
variable: "GOLLAMA_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/go-gpt2.cpp"
|
||||
variable: "GOGPT2_VERSION"
|
||||
branch: "master"
|
||||
- repository: "donomii/go-rwkv.cpp"
|
||||
variable: "RWKV_VERSION"
|
||||
branch: "main"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Bump dependencies 🔧
|
||||
run: |
|
||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v5
|
||||
with:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
||||
title: ':arrow_up: Update ${{ matrix.repository }}'
|
||||
branch: "update/${{ matrix.variable }}"
|
||||
body: Bump of ${{ matrix.repository }} version
|
||||
signoff: true
|
||||
|
||||
|
||||
|
||||
12
Makefile
12
Makefile
@@ -2,13 +2,10 @@ GOCMD=go
|
||||
GOTEST=$(GOCMD) test
|
||||
GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
# renovate: datasource=github-tags depName=go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=llama.cpp-f4cef87
|
||||
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt4all-j.cpp currentValueTemplate=master depNameTemplate=go-gpt4all-j.cpp
|
||||
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
|
||||
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt2.cpp currentValueTemplate=master depNameTemplate=go-gpt2.cpp
|
||||
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
|
||||
|
||||
GOLLAMA_VERSION?=llama.cpp-f4cef87
|
||||
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
|
||||
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
|
||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58
|
||||
|
||||
@@ -77,7 +74,8 @@ go-gpt2/libgpt2.a: go-gpt2
|
||||
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
|
||||
|
||||
go-llama:
|
||||
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
go-llama/libbinding.a: go-llama
|
||||
$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
|
||||
|
||||
25
README.md
25
README.md
@@ -45,26 +45,45 @@ Tested with:
|
||||
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin)
|
||||
- Koala
|
||||
- [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
|
||||
- WizardLM
|
||||
- [RWKV](https://github.com/BlinkDL/RWKV-LM) models with [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
|
||||
|
||||
It should also be compatible with StableLM and GPTNeoX ggml models (untested)
|
||||
### Vicuna, Alpaca, LLaMa...
|
||||
|
||||
[llama.cpp](https://github.com/ggerganov/llama.cpp) based models are compatible
|
||||
|
||||
### GPT4ALL
|
||||
|
||||
Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
|
||||
|
||||
### GPT4ALL-J
|
||||
|
||||
No changes required to the model.
|
||||
|
||||
### RWKV
|
||||
|
||||
<details>
|
||||
|
||||
For `rwkv` models, you need to put also the associated tokenizer along with the ggml model:
|
||||
A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
|
||||
|
||||
Note: rwkv models have an associated tokenizer along that needs to be provided with it:
|
||||
|
||||
```
|
||||
ls models
|
||||
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
|
||||
36464543 -rw-r--r-- 1 mudler mudler 2.4M May 3 10:51 rwkv_small.tokenizer.json
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### Others
|
||||
|
||||
It should also be compatible with StableLM and GPTNeoX ggml models (untested).
|
||||
|
||||
### Hardware requirements
|
||||
|
||||
Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
|
||||
|
||||
@@ -27,12 +27,19 @@ type ErrorResponse struct {
|
||||
Error *APIError `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type OpenAIUsage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
}
|
||||
|
||||
type OpenAIResponse struct {
|
||||
Created int `json:"created,omitempty"`
|
||||
Object string `json:"object,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Choices []Choice `json:"choices,omitempty"`
|
||||
Created int `json:"created,omitempty"`
|
||||
Object string `json:"object,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Choices []Choice `json:"choices,omitempty"`
|
||||
Usage OpenAIUsage `json:"usage"`
|
||||
}
|
||||
|
||||
type Choice struct {
|
||||
@@ -395,6 +402,8 @@ func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
|
||||
Choices: result,
|
||||
Object: "chat.completion",
|
||||
}
|
||||
respData, _ := json.Marshal(resp)
|
||||
log.Debug().Msgf("Response: %s", respData)
|
||||
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
|
||||
@@ -129,12 +129,15 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
|
||||
supportStreams = true
|
||||
|
||||
fn = func() (string, error) {
|
||||
//model.ProcessInput("You are a chatbot that is very good at chatting. blah blah blah")
|
||||
stopWord := "\n"
|
||||
if len(c.StopWords) > 0 {
|
||||
stopWord = c.StopWords[0]
|
||||
}
|
||||
|
||||
if err := model.ProcessInput(s); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
|
||||
|
||||
return response, nil
|
||||
|
||||
@@ -7,6 +7,8 @@ Here is a list of projects that can easily be integrated with the LocalAI backen
|
||||
- [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/) (by [@mkellerman](https://github.com/mkellerman))
|
||||
- [discord-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/) (by [@mudler](https://github.com/mudler))
|
||||
- [langchain](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/) (by [@dave-gray101](https://github.com/dave-gray101))
|
||||
- [langchain-python](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/) (by [@mudler](https://github.com/mudler))
|
||||
- [rwkv](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/) (by [@mudler](https://github.com/mudler))
|
||||
- [slack-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/) (by [@mudler](https://github.com/mudler))
|
||||
|
||||
## Want to contribute?
|
||||
|
||||
33
examples/langchain-python/README.md
Normal file
33
examples/langchain-python/README.md
Normal file
@@ -0,0 +1,33 @@
|
||||
## Langchain-python
|
||||
|
||||
Langchain example from [quickstart](https://python.langchain.com/en/latest/getting_started/getting_started.html).
|
||||
|
||||
To interact with langchain, you can just set the `OPENAI_API_BASE` URL and provide a token with a random string.
|
||||
|
||||
See the example below:
|
||||
|
||||
```
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/langchain-python
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
|
||||
|
||||
pip install langchain
|
||||
pip install openai
|
||||
|
||||
export OPENAI_API_BASE=http://localhost:8080
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python test.py
|
||||
# A good company name for a company that makes colorful socks would be "Colorsocks".
|
||||
```
|
||||
16
examples/langchain-python/docker-compose.yaml
Normal file
16
examples/langchain-python/docker-compose.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DEBUG=true
|
||||
- MODELS_PATH=/models
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
1
examples/langchain-python/models
Symbolic link
1
examples/langchain-python/models
Symbolic link
@@ -0,0 +1 @@
|
||||
../chatbot-ui/models
|
||||
6
examples/langchain-python/test.py
Normal file
6
examples/langchain-python/test.py
Normal file
@@ -0,0 +1,6 @@
|
||||
|
||||
from langchain.llms import OpenAI
|
||||
|
||||
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
|
||||
text = "What would be a good company name for a company that makes colorful socks?"
|
||||
print(llm(text))
|
||||
5
examples/langchain/PY.Dockerfile
Normal file
5
examples/langchain/PY.Dockerfile
Normal file
@@ -0,0 +1,5 @@
|
||||
FROM python:3.10-bullseye
|
||||
COPY ./langchainpy-localai-example /app
|
||||
WORKDIR /app
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
ENTRYPOINT [ "python", "./simple_demo.py" ];
|
||||
@@ -1,10 +1,6 @@
|
||||
# langchain
|
||||
|
||||
Example of using langchain in TypeScript, with the standard OpenAI llm module, and LocalAI.
|
||||
|
||||
Example for python langchain to follow at a later date
|
||||
|
||||
Set up to make it easy to modify the `index.mts` file to look like any langchain example file.
|
||||
Example of using langchain, with the standard OpenAI llm module, and LocalAI. Has docker compose profiles for both the Typescript and Python versions.
|
||||
|
||||
**Please Note** - This is a tech demo example at this time. ggml-gpt4all-j has pretty terrible results for most langchain applications with the settings used in this example.
|
||||
|
||||
@@ -22,8 +18,11 @@ cd LocalAI/examples/langchain
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up --build
|
||||
# start with docker-compose for typescript!
|
||||
docker-compose --profile ts up --build
|
||||
|
||||
# or start with docker-compose for python!
|
||||
docker-compose --profile py up --build
|
||||
```
|
||||
|
||||
## Copyright
|
||||
|
||||
@@ -15,11 +15,29 @@ services:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
|
||||
langchainjs:
|
||||
js:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: JS.Dockerfile
|
||||
profiles:
|
||||
- js
|
||||
- ts
|
||||
depends_on:
|
||||
- "api"
|
||||
environment:
|
||||
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
|
||||
- 'OPENAI_API_HOST=http://api:8080/v1'
|
||||
- 'OPENAI_API_BASE=http://api:8080/v1'
|
||||
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
|
||||
|
||||
py:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: PY.Dockerfile
|
||||
profiles:
|
||||
- py
|
||||
depends_on:
|
||||
- "api"
|
||||
environment:
|
||||
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
|
||||
- 'OPENAI_API_BASE=http://api:8080/v1'
|
||||
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
|
||||
@@ -4,7 +4,7 @@ import { Document } from "langchain/document";
|
||||
import { initializeAgentExecutorWithOptions } from "langchain/agents";
|
||||
import {Calculator} from "langchain/tools/calculator";
|
||||
|
||||
const pathToLocalAi = process.env['OPENAI_API_HOST'] || 'http://api:8080/v1';
|
||||
const pathToLocalAi = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1';
|
||||
const fakeApiKey = process.env['OPENAI_API_KEY'] || '-';
|
||||
const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo';
|
||||
|
||||
|
||||
24
examples/langchain/langchainpy-localai-example/.vscode/launch.json
vendored
Normal file
24
examples/langchain/langchainpy-localai-example/.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Current File",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"redirectOutput": true,
|
||||
"justMyCode": false
|
||||
},
|
||||
{
|
||||
"name": "Python: Attach to Port 5678",
|
||||
"type": "python",
|
||||
"request": "attach",
|
||||
"connect": {
|
||||
"host": "localhost",
|
||||
"port": 5678
|
||||
},
|
||||
"justMyCode": false
|
||||
}
|
||||
]
|
||||
}
|
||||
3
examples/langchain/langchainpy-localai-example/.vscode/settings.json
vendored
Normal file
3
examples/langchain/langchainpy-localai-example/.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python"
|
||||
}
|
||||
39
examples/langchain/langchainpy-localai-example/full_demo.py
Normal file
39
examples/langchain/langchainpy-localai-example/full_demo.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import os
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain import PromptTemplate, LLMChain
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
AIMessagePromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import (
|
||||
AIMessage,
|
||||
HumanMessage,
|
||||
SystemMessage
|
||||
)
|
||||
|
||||
print('Langchain + LocalAI PYTHON Tests')
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
|
||||
key = os.environ.get('OPENAI_API_KEY', '-')
|
||||
model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
|
||||
|
||||
|
||||
chat = ChatOpenAI(temperature=0, openai_api_base=base_path, openai_api_key=key, model_name=model_name, max_tokens=100)
|
||||
|
||||
print("Created ChatOpenAI for ", chat.model_name)
|
||||
|
||||
template = "You are a helpful assistant that translates {input_language} to {output_language}."
|
||||
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
|
||||
human_template = "{text}"
|
||||
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
|
||||
|
||||
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
|
||||
|
||||
print("ABOUT to execute")
|
||||
|
||||
# get a chat completion from the formatted messages
|
||||
chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
|
||||
|
||||
print(".");
|
||||
@@ -0,0 +1,32 @@
|
||||
aiohttp==3.8.4
|
||||
aiosignal==1.3.1
|
||||
async-timeout==4.0.2
|
||||
attrs==23.1.0
|
||||
certifi==2022.12.7
|
||||
charset-normalizer==3.1.0
|
||||
colorama==0.4.6
|
||||
dataclasses-json==0.5.7
|
||||
debugpy==1.6.7
|
||||
frozenlist==1.3.3
|
||||
greenlet==2.0.2
|
||||
idna==3.4
|
||||
langchain==0.0.157
|
||||
marshmallow==3.19.0
|
||||
marshmallow-enum==1.5.1
|
||||
multidict==6.0.4
|
||||
mypy-extensions==1.0.0
|
||||
numexpr==2.8.4
|
||||
numpy==1.24.3
|
||||
openai==0.27.6
|
||||
openapi-schema-pydantic==1.2.4
|
||||
packaging==23.1
|
||||
pydantic==1.10.7
|
||||
PyYAML==6.0
|
||||
requests==2.29.0
|
||||
SQLAlchemy==2.0.12
|
||||
tenacity==8.2.2
|
||||
tqdm==4.65.0
|
||||
typing-inspect==0.8.0
|
||||
typing_extensions==4.5.0
|
||||
urllib3==1.26.15
|
||||
yarl==1.9.2
|
||||
@@ -0,0 +1,6 @@
|
||||
|
||||
from langchain.llms import OpenAI
|
||||
|
||||
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
|
||||
text = "What would be a good company name for a company that makes colorful socks?"
|
||||
print(llm(text))
|
||||
@@ -12,6 +12,7 @@ stopwords:
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
backend: "gptj"
|
||||
template:
|
||||
completion: completion
|
||||
chat: completion # gpt4all
|
||||
10
examples/rwkv/Dockerfile.build
Normal file
10
examples/rwkv/Dockerfile.build
Normal file
@@ -0,0 +1,10 @@
|
||||
FROM python
|
||||
|
||||
# convert the model (one-off)
|
||||
RUN pip3 install torch numpy
|
||||
|
||||
WORKDIR /build
|
||||
COPY ./scripts/ .
|
||||
|
||||
RUN git clone --recurse-submodules https://github.com/saharNooby/rwkv.cpp && cd rwkv.cpp && cmake . && cmake --build . --config Release
|
||||
ENTRYPOINT [ "/build/build.sh" ]
|
||||
59
examples/rwkv/README.md
Normal file
59
examples/rwkv/README.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# rwkv
|
||||
|
||||
Example of how to run rwkv models.
|
||||
|
||||
## Run models
|
||||
|
||||
Setup:
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/rwkv
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# build the tooling image to convert an rwkv model locally:
|
||||
docker build -t rwkv-converter -f Dockerfile.build .
|
||||
|
||||
# download and convert a model (one-off) - it's going to be fast on CPU too!
|
||||
docker run -ti --name converter -v $PWD:/data rwkv-converter https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096.pth /data/models/rwkv
|
||||
|
||||
# Get the tokenizer
|
||||
wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O models/rwkv.tokenizer.json
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
Test it out:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"prompt": "A long time ago, in a galaxy far away",
|
||||
"max_tokens": 100,
|
||||
"temperature": 0.9, "top_p": 0.8, "top_k": 80
|
||||
}'
|
||||
|
||||
# {"object":"text_completion","model":"gpt-3.5-turbo","choices":[{"text":", there was a small group of five friends: Annie, Bryan, Charlie, Emily, and Jesse."}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
|
||||
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [{"role": "user", "content": "How are you?"}],
|
||||
"temperature": 0.9, "top_p": 0.8, "top_k": 80
|
||||
}'
|
||||
|
||||
# {"object":"chat.completion","model":"gpt-3.5-turbo","choices":[{"message":{"role":"assistant","content":" Good, thanks. I am about to go to bed. I' ll talk to you later.Bye."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
|
||||
```
|
||||
|
||||
### Fine tuning
|
||||
|
||||
See [RWKV-LM](https://github.com/BlinkDL/RWKV-LM#training--fine-tuning). There is also a Google [colab](https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_v4_RNN_Pile_Fine_Tuning.ipynb).
|
||||
|
||||
## See also
|
||||
|
||||
- [RWKV-LM](https://github.com/BlinkDL/RWKV-LM)
|
||||
- [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
|
||||
16
examples/rwkv/docker-compose.yaml
Normal file
16
examples/rwkv/docker-compose.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DEBUG=true
|
||||
- MODELS_PATH=/models
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
19
examples/rwkv/models/gpt-3.5-turbo.yaml
Normal file
19
examples/rwkv/models/gpt-3.5-turbo.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: rwkv
|
||||
top_k: 80
|
||||
temperature: 0.9
|
||||
max_tokens: 100
|
||||
top_p: 0.8
|
||||
context_size: 1024
|
||||
threads: 14
|
||||
backend: "rwkv"
|
||||
cutwords:
|
||||
- "Bob:.*"
|
||||
roles:
|
||||
user: "Bob:"
|
||||
system: "Alice:"
|
||||
assistant: "Alice:"
|
||||
template:
|
||||
completion: rwkv_completion
|
||||
chat: rwkv_chat
|
||||
13
examples/rwkv/models/rwkv_chat.tmpl
Normal file
13
examples/rwkv/models/rwkv_chat.tmpl
Normal file
@@ -0,0 +1,13 @@
|
||||
The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
|
||||
|
||||
Bob: Hello Alice, how are you doing?
|
||||
|
||||
Alice: Hi Bob! Thanks, I'm fine. What about you?
|
||||
|
||||
Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
|
||||
|
||||
Alice: Not at all! I'm listening.
|
||||
|
||||
{{.Input}}
|
||||
|
||||
Alice:
|
||||
1
examples/rwkv/models/rwkv_completion.tmpl
Normal file
1
examples/rwkv/models/rwkv_completion.tmpl
Normal file
@@ -0,0 +1 @@
|
||||
Complete the following sentence: {{.Input}}
|
||||
6
go.mod
6
go.mod
@@ -3,17 +3,18 @@ module github.com/go-skynet/LocalAI
|
||||
go 1.19
|
||||
|
||||
require (
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230502121737-8ceb6167e405
|
||||
github.com/gofiber/fiber/v2 v2.44.0
|
||||
github.com/hashicorp/go-multierror v1.1.1
|
||||
github.com/jaypipes/ghw v0.10.0
|
||||
github.com/onsi/ginkgo/v2 v2.9.3
|
||||
github.com/onsi/ginkgo/v2 v2.9.4
|
||||
github.com/onsi/gomega v1.27.6
|
||||
github.com/otiai10/openaigo v1.1.0
|
||||
github.com/rs/zerolog v1.29.1
|
||||
github.com/sashabaranov/go-openai v1.9.1
|
||||
github.com/sashabaranov/go-openai v1.9.3
|
||||
github.com/urfave/cli/v2 v2.25.3
|
||||
github.com/valyala/fasthttp v1.47.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
@@ -23,7 +24,6 @@ require (
|
||||
github.com/StackExchange/wmi v1.2.1 // indirect
|
||||
github.com/andybalholm/brotli v1.0.5 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d // indirect
|
||||
github.com/ghodss/yaml v1.0.0 // indirect
|
||||
github.com/go-logr/logr v1.2.4 // indirect
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
|
||||
6
go.sum
6
go.sum
@@ -73,6 +73,8 @@ github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
|
||||
github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
|
||||
github.com/onsi/ginkgo/v2 v2.9.3 h1:5X2vl/isiKqkrOYjiaGgp3JQOcLV59g5o5SuTMqCcxU=
|
||||
github.com/onsi/ginkgo/v2 v2.9.3/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
|
||||
github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
|
||||
github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
|
||||
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
|
||||
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
|
||||
github.com/otiai10/mint v1.4.1 h1:HOVBfKP1oXIc0wWo9hZ8JLdZtyCPWqjvmFDuVZ0yv2Y=
|
||||
@@ -94,6 +96,10 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/sashabaranov/go-openai v1.9.1 h1:3N52HkJKo9Zlo/oe1AVv5ZkCOny0ra58/ACvAxkN3MM=
|
||||
github.com/sashabaranov/go-openai v1.9.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sashabaranov/go-openai v1.9.2 h1:7//Glm9EiMBjelgmBb00yYzKYqm1jckHWWTDLahfeuQ=
|
||||
github.com/sashabaranov/go-openai v1.9.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
|
||||
github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
|
||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
|
||||
|
||||
14
main.go
14
main.go
@@ -1,11 +1,12 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
api "github.com/go-skynet/LocalAI/api"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/jaypipes/ghw"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/urfave/cli/v2"
|
||||
@@ -20,12 +21,6 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
threads := 4
|
||||
cpu, err := ghw.CPU()
|
||||
if err == nil {
|
||||
threads = int(cpu.TotalCores)
|
||||
}
|
||||
|
||||
app := &cli.App{
|
||||
Name: "LocalAI",
|
||||
Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.",
|
||||
@@ -42,13 +37,13 @@ func main() {
|
||||
Name: "threads",
|
||||
DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
|
||||
EnvVars: []string{"THREADS"},
|
||||
Value: threads,
|
||||
Value: 4,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "models-path",
|
||||
DefaultText: "Path containing models used for inferencing",
|
||||
EnvVars: []string{"MODELS_PATH"},
|
||||
Value: path,
|
||||
Value: filepath.Join(path, "models"),
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "config-file",
|
||||
@@ -85,6 +80,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
|
||||
UsageText: `local-ai [options]`,
|
||||
Copyright: "go-skynet authors",
|
||||
Action: func(ctx *cli.Context) error {
|
||||
fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
|
||||
return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,17 +1,4 @@
|
||||
{
|
||||
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
||||
"extends": [
|
||||
"config:base"
|
||||
],
|
||||
"regexManagers": [
|
||||
{
|
||||
"fileMatch": [
|
||||
"^Makefile$"
|
||||
],
|
||||
"matchStrings": [
|
||||
"#\\s*renovate:\\s*datasource=(?<datasource>.*?) depName=(?<depName>.*?)( datasourceTemplate=(?<datasourceTemplate>.*?))?( packageNameTemplate=(?<packageNameTemplate>.*?))?( depNameTemplate=(?<depNameTemplate>.*?))?( valueTemplate=(?<currentValueTemplate>.*?))?( versioning=(?<versioning>.*?))?\\s+.+_VERSION=(?<currentValue>.*?)\\s"
|
||||
],
|
||||
"versioningTemplate": "{{#if versioning}}{{versioning}}{{/if}}"
|
||||
}
|
||||
]
|
||||
"extends": ["config:base"]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user