mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3daff6f1aa | ||
|
|
89dfa0f5fc | ||
|
|
bc03c492a0 | ||
|
|
f50a4c1454 | ||
|
|
d13d4d95ce | ||
|
|
428790ec06 | ||
|
|
4f551ce414 | ||
|
|
6ed7b10273 | ||
|
|
02979566ee |
2
Makefile
2
Makefile
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
|
||||
GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
|
||||
GOLLAMA_VERSION?=cf9b522db63898dcc5eb86e37c979ab85cbd583e
|
||||
GOLLAMA_VERSION?=b4e97a42d0c10ada6b529b0ec17b05c72435aeab
|
||||
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
|
||||
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
|
||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
|
||||
19
README.md
19
README.md
@@ -529,6 +529,25 @@ curl http://localhost:8080/v1/models
|
||||
|
||||
</details>
|
||||
|
||||
### Embeddings
|
||||
|
||||
<details>
|
||||
|
||||
The embedding endpoint is experimental and enabled only if the model is configured with `emebddings: true` in its `yaml` file, for example:
|
||||
|
||||
```yaml
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: wizardLM-7B.ggml.q5_1.bin
|
||||
embeddings: true
|
||||
```
|
||||
|
||||
There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
|
||||
|
||||
Note: embeddings is supported only with `llama.cpp` compatible models. (doesn't work with gpt4-all-j, yet).
|
||||
|
||||
</details>
|
||||
|
||||
## Frequently asked questions
|
||||
|
||||
Here are answers to some of the most common questions.
|
||||
|
||||
@@ -33,6 +33,7 @@ type Config struct {
|
||||
Mirostat int `yaml:"mirostat"`
|
||||
|
||||
PromptStrings, InputStrings []string
|
||||
InputToken [][]int
|
||||
}
|
||||
|
||||
type TemplateConfig struct {
|
||||
@@ -186,8 +187,15 @@ func updateConfig(config *Config, input *OpenAIRequest) {
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range inputs {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.InputStrings = append(config.InputStrings, s)
|
||||
switch i := pp.(type) {
|
||||
case string:
|
||||
config.InputStrings = append(config.InputStrings, i)
|
||||
case []interface{}:
|
||||
tokens := []int{}
|
||||
for _, ii := range i {
|
||||
tokens = append(tokens, int(ii.(float64)))
|
||||
}
|
||||
config.InputToken = append(config.InputToken, tokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -177,10 +177,23 @@ func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
items := []Item{}
|
||||
|
||||
for i, s := range config.InputStrings {
|
||||
|
||||
for i, s := range config.InputToken {
|
||||
// get the model function to call for the result
|
||||
embedFn, err := ModelEmbedding(s, loader, *config)
|
||||
embedFn, err := ModelEmbedding("", s, loader, *config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
embeddings, err := embedFn()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
||||
}
|
||||
|
||||
for i, s := range config.InputStrings {
|
||||
// get the model function to call for the result
|
||||
embedFn, err := ModelEmbedding(s, []int{}, loader, *config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ func defaultLLamaOpts(c Config) []llama.ModelOption {
|
||||
return llamaOpts
|
||||
}
|
||||
|
||||
func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
|
||||
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
|
||||
if !c.Embeddings {
|
||||
return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
|
||||
}
|
||||
@@ -57,6 +57,9 @@ func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]fl
|
||||
case *llama.LLama:
|
||||
fn = func() ([]float32, error) {
|
||||
predictOptions := buildLLamaPredictOptions(c)
|
||||
if len(tokens) > 0 {
|
||||
return model.TokenEmbeddings(tokens, predictOptions...)
|
||||
}
|
||||
return model.Embeddings(s, predictOptions...)
|
||||
}
|
||||
default:
|
||||
|
||||
@@ -73,6 +73,14 @@ Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/st
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)
|
||||
|
||||
### Template for Runpod.io
|
||||
|
||||
_by [@fHachenberg](https://github.com/fHachenberg)_
|
||||
|
||||
Allows to run any LocalAI-compatible model as a backend on the servers of https://runpod.io
|
||||
|
||||
[Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra)
|
||||
|
||||
## Want to contribute?
|
||||
|
||||
Create an issue, and put `Example: <description>` in the title! We will post your examples here.
|
||||
|
||||
@@ -30,4 +30,6 @@ export OPENAI_API_KEY=sk-
|
||||
|
||||
python test.py
|
||||
# A good company name for a company that makes colorful socks would be "Colorsocks".
|
||||
|
||||
python agent.py
|
||||
```
|
||||
44
examples/langchain-python/agent.py
Normal file
44
examples/langchain-python/agent.py
Normal file
@@ -0,0 +1,44 @@
|
||||
## This is a fork/based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6
|
||||
|
||||
from io import StringIO
|
||||
import sys
|
||||
import os
|
||||
from typing import Dict, Optional
|
||||
|
||||
from langchain.agents import load_tools
|
||||
from langchain.agents import initialize_agent
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.llms import OpenAI
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
|
||||
|
||||
class PythonREPL:
|
||||
"""Simulates a standalone Python REPL."""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def run(self, command: str) -> str:
|
||||
"""Run command and returns anything printed."""
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = mystdout = StringIO()
|
||||
try:
|
||||
exec(command, globals())
|
||||
sys.stdout = old_stdout
|
||||
output = mystdout.getvalue()
|
||||
except Exception as e:
|
||||
sys.stdout = old_stdout
|
||||
output = str(e)
|
||||
return output
|
||||
|
||||
llm = OpenAI(temperature=0.0, openai_api_base=base_path, model_name=model_name)
|
||||
python_repl = Tool(
|
||||
"Python REPL",
|
||||
PythonREPL().run,
|
||||
"""A Python shell. Use this to execute python commands. Input should be a valid python command.
|
||||
If you expect output it should be printed out.""",
|
||||
)
|
||||
tools = [python_repl]
|
||||
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
|
||||
agent.run("What is the 10th fibonacci number?")
|
||||
@@ -4,11 +4,17 @@ This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stab
|
||||
|
||||
It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
|
||||
|
||||
Summary of the steps:
|
||||
|
||||
- prepare the dataset (and store it into `data`)
|
||||
- prepare a vector index database to run queries on
|
||||
- run queries
|
||||
|
||||
## Requirements
|
||||
|
||||
For this in order to work, you will need a model compatible with the `llama.cpp` backend. This is will not work with gpt4all.
|
||||
For this in order to work, you will need LocalAI and a model compatible with the `llama.cpp` backend. This is will not work with gpt4all, however you can mix models (use a llama.cpp one to build the index database, and gpt4all to query it).
|
||||
|
||||
The example uses `WizardLM`. Edit the config files in `models/` accordingly to specify the model you use (change `HERE`).
|
||||
The example uses `WizardLM` for both embeddings and Q&A. Edit the config files in `models/` accordingly to specify the model you use (change `HERE` in the configuration files).
|
||||
|
||||
You will also need a training data set. Copy that over `data`.
|
||||
|
||||
@@ -28,7 +34,9 @@ cd LocalAI/examples/query_data
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
### Create a storage:
|
||||
### Create a storage
|
||||
|
||||
In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
@@ -41,9 +49,22 @@ After it finishes, a directory "storage" will be created with the vector index d
|
||||
|
||||
## Query
|
||||
|
||||
We can now query the dataset.
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python query.py
|
||||
```
|
||||
|
||||
## Update
|
||||
|
||||
To update our vector database, run `update.py`
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python update.py
|
||||
```
|
||||
@@ -10,7 +10,7 @@ from llama_index import StorageContext, load_index_from_storage
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
|
||||
# This example uses text-davinci-003 by default; feel free to change if desired
|
||||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_base=base_path))
|
||||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||
|
||||
# Configure prompt parameters and initialise helper
|
||||
max_input_size = 1024
|
||||
@@ -29,5 +29,7 @@ storage_context = StorageContext.from_defaults(persist_dir='./storage')
|
||||
index = load_index_from_storage(storage_context, service_context=service_context, )
|
||||
|
||||
query_engine = index.as_query_engine()
|
||||
response = query_engine.query("XXXXXX your question here XXXXX")
|
||||
print(response)
|
||||
|
||||
data = input("Question: ")
|
||||
response = query_engine.query(data)
|
||||
print(response)
|
||||
|
||||
@@ -13,15 +13,15 @@ base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||
|
||||
# Configure prompt parameters and initialise helper
|
||||
max_input_size = 256
|
||||
num_output = 256
|
||||
max_chunk_overlap = 10
|
||||
max_input_size = 512
|
||||
num_output = 512
|
||||
max_chunk_overlap = 30
|
||||
|
||||
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
||||
|
||||
# Load documents from the 'data' directory
|
||||
documents = SimpleDirectoryReader('data').load_data()
|
||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 257)
|
||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 512)
|
||||
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
|
||||
index.storage_context.persist(persist_dir="./storage")
|
||||
|
||||
|
||||
32
examples/query_data/update.py
Normal file
32
examples/query_data/update.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import os
|
||||
|
||||
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
|
||||
# os.environ['OPENAI_API_KEY']= ""
|
||||
|
||||
from llama_index import LLMPredictor, PromptHelper, SimpleDirectoryReader, ServiceContext
|
||||
from langchain.llms.openai import OpenAI
|
||||
from llama_index import StorageContext, load_index_from_storage
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
|
||||
# This example uses text-davinci-003 by default; feel free to change if desired
|
||||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||
|
||||
# Configure prompt parameters and initialise helper
|
||||
max_input_size = 512
|
||||
num_output = 256
|
||||
max_chunk_overlap = 20
|
||||
|
||||
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
||||
|
||||
# Load documents from the 'data' directory
|
||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
|
||||
|
||||
# rebuild storage context
|
||||
storage_context = StorageContext.from_defaults(persist_dir='./storage')
|
||||
|
||||
# load index
|
||||
index = load_index_from_storage(storage_context, service_context=service_context, )
|
||||
documents = SimpleDirectoryReader('data').load_data()
|
||||
index.refresh(documents)
|
||||
index.storage_context.persist(persist_dir="./storage")
|
||||
6
go.mod
6
go.mod
@@ -6,8 +6,8 @@ require (
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675
|
||||
github.com/gofiber/fiber/v2 v2.44.0
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638
|
||||
github.com/gofiber/fiber/v2 v2.45.0
|
||||
github.com/hashicorp/go-multierror v1.1.1
|
||||
github.com/onsi/ginkgo/v2 v2.9.4
|
||||
github.com/onsi/gomega v1.27.6
|
||||
@@ -52,7 +52,7 @@ require (
|
||||
github.com/valyala/tcplisten v1.0.0 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
||||
golang.org/x/net v0.9.0 // indirect
|
||||
golang.org/x/sys v0.7.0 // indirect
|
||||
golang.org/x/sys v0.8.0 // indirect
|
||||
golang.org/x/text v0.9.0 // indirect
|
||||
golang.org/x/tools v0.8.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
|
||||
6
go.sum
6
go.sum
@@ -37,11 +37,15 @@ github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230504223241-67ff6a4db244/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675 h1:plXywr95RghidIHPHl+O/zpcNXenEeS6w/6WftFNr9E=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638 h1:+7UXkGG+LeqJ5oPBEJo5D73Y2drKOVzrlB8D+iG2PHw=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gofiber/fiber/v2 v2.44.0 h1:Z90bEvPcJM5GFJnu1py0E1ojoerkyew3iiNJ78MQCM8=
|
||||
github.com/gofiber/fiber/v2 v2.44.0/go.mod h1:VTMtb/au8g01iqvHyaCzftuM/xmZgKOZCtFzz6CdV9w=
|
||||
github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s=
|
||||
github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
|
||||
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
@@ -163,6 +167,8 @@ golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
|
||||
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
|
||||
|
||||
Reference in New Issue
Block a user