Compare commits

..

9 Commits

Author SHA1 Message Date
mudler
3daff6f1aa doc: update README with embeddings docs 2023-05-08 20:02:59 +02:00
Ettore Di Giacinto
89dfa0f5fc feat: add experimental support for embeddings as arrays (#207) 2023-05-08 19:31:18 +02:00
renovate[bot]
bc03c492a0 fix(deps): update module github.com/gofiber/fiber/v2 to v2.45.0 (#206)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-07 17:46:31 +02:00
Fabian Hachenberg
f50a4c1454 Added runpod.io template for LocalAI to examples (#203) 2023-05-07 10:58:15 +02:00
mudler
d13d4d95ce examples: fix default parameter 2023-05-07 10:13:57 +02:00
renovate[bot]
428790ec06 fix(deps): update github.com/go-skynet/go-llama.cpp digest to cf9b522 (#202)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-07 09:14:00 +02:00
mudler
4f551ce414 examples: add update index example, update README 2023-05-07 09:05:24 +02:00
mudler
6ed7b10273 examples: add langchain agent example 2023-05-07 08:14:01 +02:00
mudler
02979566ee examples: better defaults 2023-05-07 00:58:30 +02:00
14 changed files with 138 additions and 27 deletions

View File

@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
GOLLAMA_VERSION?=cf9b522db63898dcc5eb86e37c979ab85cbd583e
GOLLAMA_VERSION?=b4e97a42d0c10ada6b529b0ec17b05c72435aeab
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

View File

@@ -529,6 +529,25 @@ curl http://localhost:8080/v1/models
</details>
### Embeddings
<details>
The embedding endpoint is experimental and enabled only if the model is configured with `emebddings: true` in its `yaml` file, for example:
```yaml
name: text-embedding-ada-002
parameters:
model: wizardLM-7B.ggml.q5_1.bin
embeddings: true
```
There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
Note: embeddings is supported only with `llama.cpp` compatible models. (doesn't work with gpt4-all-j, yet).
</details>
## Frequently asked questions
Here are answers to some of the most common questions.

View File

@@ -33,6 +33,7 @@ type Config struct {
Mirostat int `yaml:"mirostat"`
PromptStrings, InputStrings []string
InputToken [][]int
}
type TemplateConfig struct {
@@ -186,8 +187,15 @@ func updateConfig(config *Config, input *OpenAIRequest) {
}
case []interface{}:
for _, pp := range inputs {
if s, ok := pp.(string); ok {
config.InputStrings = append(config.InputStrings, s)
switch i := pp.(type) {
case string:
config.InputStrings = append(config.InputStrings, i)
case []interface{}:
tokens := []int{}
for _, ii := range i {
tokens = append(tokens, int(ii.(float64)))
}
config.InputToken = append(config.InputToken, tokens)
}
}
}

View File

@@ -177,10 +177,23 @@ func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
log.Debug().Msgf("Parameter Config: %+v", config)
items := []Item{}
for i, s := range config.InputStrings {
for i, s := range config.InputToken {
// get the model function to call for the result
embedFn, err := ModelEmbedding(s, loader, *config)
embedFn, err := ModelEmbedding("", s, loader, *config)
if err != nil {
return err
}
embeddings, err := embedFn()
if err != nil {
return err
}
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
}
for i, s := range config.InputStrings {
// get the model function to call for the result
embedFn, err := ModelEmbedding(s, []int{}, loader, *config)
if err != nil {
return err
}

View File

@@ -32,7 +32,7 @@ func defaultLLamaOpts(c Config) []llama.ModelOption {
return llamaOpts
}
func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
if !c.Embeddings {
return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
}
@@ -57,6 +57,9 @@ func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]fl
case *llama.LLama:
fn = func() ([]float32, error) {
predictOptions := buildLLamaPredictOptions(c)
if len(tokens) > 0 {
return model.TokenEmbeddings(tokens, predictOptions...)
}
return model.Embeddings(s, predictOptions...)
}
default:

View File

@@ -73,6 +73,14 @@ Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/st
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)
### Template for Runpod.io
_by [@fHachenberg](https://github.com/fHachenberg)_
Allows to run any LocalAI-compatible model as a backend on the servers of https://runpod.io
[Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra)
## Want to contribute?
Create an issue, and put `Example: <description>` in the title! We will post your examples here.

View File

@@ -30,4 +30,6 @@ export OPENAI_API_KEY=sk-
python test.py
# A good company name for a company that makes colorful socks would be "Colorsocks".
python agent.py
```

View File

@@ -1,4 +1,4 @@
## Loosely based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6
## This is a fork/based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6
from io import StringIO
import sys
@@ -10,7 +10,7 @@ from langchain.agents import initialize_agent
from langchain.agents.tools import Tool
from langchain.llms import OpenAI
base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
class PythonREPL:
@@ -21,7 +21,6 @@ class PythonREPL:
def run(self, command: str) -> str:
"""Run command and returns anything printed."""
# sys.stderr.write("EXECUTING PYTHON CODE:\n---\n" + command + "\n---\n")
old_stdout = sys.stdout
sys.stdout = mystdout = StringIO()
try:
@@ -31,9 +30,8 @@ class PythonREPL:
except Exception as e:
sys.stdout = old_stdout
output = str(e)
# sys.stderr.write("PYTHON OUTPUT: \"" + output + "\"\n")
return output
llm = OpenAI(temperature=0.0, openai_api_base=base_path, model_name=model_name)
python_repl = Tool(
"Python REPL",
@@ -43,5 +41,4 @@ python_repl = Tool(
)
tools = [python_repl]
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
agent.run("What is the 10th fibonacci number?")
agent.run("What is the 10th fibonacci number?")

View File

@@ -4,11 +4,17 @@ This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stab
It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
Summary of the steps:
- prepare the dataset (and store it into `data`)
- prepare a vector index database to run queries on
- run queries
## Requirements
For this in order to work, you will need a model compatible with the `llama.cpp` backend. This is will not work with gpt4all.
For this in order to work, you will need LocalAI and a model compatible with the `llama.cpp` backend. This is will not work with gpt4all, however you can mix models (use a llama.cpp one to build the index database, and gpt4all to query it).
The example uses `WizardLM`. Edit the config files in `models/` accordingly to specify the model you use (change `HERE`).
The example uses `WizardLM` for both embeddings and Q&A. Edit the config files in `models/` accordingly to specify the model you use (change `HERE` in the configuration files).
You will also need a training data set. Copy that over `data`.
@@ -28,7 +34,9 @@ cd LocalAI/examples/query_data
docker-compose up -d --build
```
### Create a storage:
### Create a storage
In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
@@ -41,9 +49,22 @@ After it finishes, a directory "storage" will be created with the vector index d
## Query
We can now query the dataset.
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
python query.py
```
## Update
To update our vector database, run `update.py`
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
python update.py
```

View File

@@ -10,7 +10,7 @@ from llama_index import StorageContext, load_index_from_storage
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
# This example uses text-davinci-003 by default; feel free to change if desired
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_base=base_path))
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
# Configure prompt parameters and initialise helper
max_input_size = 1024
@@ -29,5 +29,7 @@ storage_context = StorageContext.from_defaults(persist_dir='./storage')
index = load_index_from_storage(storage_context, service_context=service_context, )
query_engine = index.as_query_engine()
response = query_engine.query("XXXXXX your question here XXXXX")
print(response)
data = input("Question: ")
response = query_engine.query(data)
print(response)

View File

@@ -13,15 +13,15 @@ base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
# Configure prompt parameters and initialise helper
max_input_size = 256
num_output = 256
max_chunk_overlap = 10
max_input_size = 512
num_output = 512
max_chunk_overlap = 30
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
# Load documents from the 'data' directory
documents = SimpleDirectoryReader('data').load_data()
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 257)
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 512)
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
index.storage_context.persist(persist_dir="./storage")

View File

@@ -0,0 +1,32 @@
import os
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
# os.environ['OPENAI_API_KEY']= ""
from llama_index import LLMPredictor, PromptHelper, SimpleDirectoryReader, ServiceContext
from langchain.llms.openai import OpenAI
from llama_index import StorageContext, load_index_from_storage
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
# This example uses text-davinci-003 by default; feel free to change if desired
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
# Configure prompt parameters and initialise helper
max_input_size = 512
num_output = 256
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
# Load documents from the 'data' directory
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir='./storage')
# load index
index = load_index_from_storage(storage_context, service_context=service_context, )
documents = SimpleDirectoryReader('data').load_data()
index.refresh(documents)
index.storage_context.persist(persist_dir="./storage")

6
go.mod
View File

@@ -6,8 +6,8 @@ require (
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675
github.com/gofiber/fiber/v2 v2.44.0
github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638
github.com/gofiber/fiber/v2 v2.45.0
github.com/hashicorp/go-multierror v1.1.1
github.com/onsi/ginkgo/v2 v2.9.4
github.com/onsi/gomega v1.27.6
@@ -52,7 +52,7 @@ require (
github.com/valyala/tcplisten v1.0.0 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
golang.org/x/net v0.9.0 // indirect
golang.org/x/sys v0.7.0 // indirect
golang.org/x/sys v0.8.0 // indirect
golang.org/x/text v0.9.0 // indirect
golang.org/x/tools v0.8.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect

6
go.sum
View File

@@ -37,11 +37,15 @@ github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod
github.com/go-skynet/go-llama.cpp v0.0.0-20230504223241-67ff6a4db244/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY=
github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675 h1:plXywr95RghidIHPHl+O/zpcNXenEeS6w/6WftFNr9E=
github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY=
github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638 h1:+7UXkGG+LeqJ5oPBEJo5D73Y2drKOVzrlB8D+iG2PHw=
github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gofiber/fiber/v2 v2.44.0 h1:Z90bEvPcJM5GFJnu1py0E1ojoerkyew3iiNJ78MQCM8=
github.com/gofiber/fiber/v2 v2.44.0/go.mod h1:VTMtb/au8g01iqvHyaCzftuM/xmZgKOZCtFzz6CdV9w=
github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s=
github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
@@ -163,6 +167,8 @@ golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=