doc: update README with embeddings docs

feat: add experimental support for embeddings as arrays (#207 )
fix(deps): update module github.com/gofiber/fiber/v2 to v2.45.0 (#206 )
2026-02-03 03:02:38 -05:00 · 2023-05-08 20:02:59 +02:00 · 2023-05-08 19:31:18 +02:00 · 2023-05-07 17:46:31 +02:00 · 2023-05-07 10:58:15 +02:00 · 2023-05-07 10:13:57 +02:00
14 changed files with 138 additions and 27 deletions
--- a/2
+++ b/2
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai

-GOLLAMA_VERSION?=cf9b522db63898dcc5eb86e37c979ab85cbd583e
+GOLLAMA_VERSION?=b4e97a42d0c10ada6b529b0ec17b05c72435aeab
 GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
 GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
--- a/README.md
+++ b/README.md
@@ -529,6 +529,25 @@ curl http://localhost:8080/v1/models

 </details>

+### Embeddings
+
+<details>
+
+The embedding endpoint is experimental and enabled only if the model is configured with `emebddings: true` in its `yaml` file, for example:
+
+```yaml
+name: text-embedding-ada-002
+parameters:
+  model: wizardLM-7B.ggml.q5_1.bin
+embeddings: true
+```
+
+There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
+
+Note: embeddings is supported only with `llama.cpp` compatible models. (doesn't work with gpt4-all-j, yet).
+
+</details>
+
 ## Frequently asked questions

 Here are answers to some of the most common questions.
--- a/api/config.go
+++ b/api/config.go
@@ -33,6 +33,7 @@ type Config struct {
 	Mirostat       int               `yaml:"mirostat"`

 	PromptStrings, InputStrings []string
+	InputToken                  [][]int
 }

 type TemplateConfig struct {
@@ -186,8 +187,15 @@ func updateConfig(config *Config, input *OpenAIRequest) {
 		}
 	case []interface{}:
 		for _, pp := range inputs {
-			if s, ok := pp.(string); ok {
-				config.InputStrings = append(config.InputStrings, s)
+			switch i := pp.(type) {
+			case string:
+				config.InputStrings = append(config.InputStrings, i)
+			case []interface{}:
+				tokens := []int{}
+				for _, ii := range i {
+					tokens = append(tokens, int(ii.(float64)))
+				}
+				config.InputToken = append(config.InputToken, tokens)
 			}
 		}
 	}
--- a/api/openai.go
+++ b/api/openai.go
@@ -177,10 +177,23 @@ func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		items := []Item{}

-		for i, s := range config.InputStrings {
-
+		for i, s := range config.InputToken {
 			// get the model function to call for the result
-			embedFn, err := ModelEmbedding(s, loader, *config)
+			embedFn, err := ModelEmbedding("", s, loader, *config)
+			if err != nil {
+				return err
+			}
+
+			embeddings, err := embedFn()
+			if err != nil {
+				return err
+			}
+			items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
+		}
+
+		for i, s := range config.InputStrings {
+			// get the model function to call for the result
+			embedFn, err := ModelEmbedding(s, []int{}, loader, *config)
 			if err != nil {
 				return err
 			}
--- a/api/prediction.go
+++ b/api/prediction.go
@@ -32,7 +32,7 @@ func defaultLLamaOpts(c Config) []llama.ModelOption {
 	return llamaOpts
 }

-func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
+func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
 	if !c.Embeddings {
 		return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
 	}
@@ -57,6 +57,9 @@ func ModelEmbedding(s string, loader *model.ModelLoader, c Config) (func() ([]fl
 	case *llama.LLama:
 		fn = func() ([]float32, error) {
 			predictOptions := buildLLamaPredictOptions(c)
+			if len(tokens) > 0 {
+				return model.TokenEmbeddings(tokens, predictOptions...)
+			}
 			return model.Embeddings(s, predictOptions...)
 		}
 	default:
--- a/examples/README.md
+++ b/examples/README.md
@@ -73,6 +73,14 @@ Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/st

 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)

+### Template for Runpod.io
+
+_by [@fHachenberg](https://github.com/fHachenberg)_
+
+Allows to run any LocalAI-compatible model as a backend on the servers of https://runpod.io
+
+[Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra)
+
 ## Want to contribute?

 Create an issue, and put `Example: <description>` in the title! We will post your examples here.
--- a/examples/langchain-python/README.md
+++ b/examples/langchain-python/README.md
@@ -30,4 +30,6 @@ export OPENAI_API_KEY=sk-

 python test.py
 # A good company name for a company that makes colorful socks would be "Colorsocks".
+
+python agent.py
 ```
--- a/examples/langchain-python/agent.py
+++ b/examples/langchain-python/agent.py
@@ -1,4 +1,4 @@
-## Loosely based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6
+## This is a fork/based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6

 from io import StringIO
 import sys
@@ -10,7 +10,7 @@ from langchain.agents import initialize_agent
 from langchain.agents.tools import Tool
 from langchain.llms import OpenAI

-base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
+base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
 model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')

 class PythonREPL:
@@ -21,7 +21,6 @@ class PythonREPL:

    def run(self, command: str) -> str:
        """Run command and returns anything printed."""
-        # sys.stderr.write("EXECUTING PYTHON CODE:\n---\n" + command + "\n---\n")
        old_stdout = sys.stdout
        sys.stdout = mystdout = StringIO()
        try:
@@ -31,9 +30,8 @@ class PythonREPL:
        except Exception as e:
            sys.stdout = old_stdout
            output = str(e)
-        # sys.stderr.write("PYTHON OUTPUT: \"" + output + "\"\n")
        return output
-      
+
 llm = OpenAI(temperature=0.0, openai_api_base=base_path, model_name=model_name)
 python_repl = Tool(
        "Python REPL",
@@ -43,5 +41,4 @@ python_repl = Tool(
    )
 tools = [python_repl]
 agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
-agent.run("What is the 10th fibonacci number?")
-
+agent.run("What is the 10th fibonacci number?")
--- a/examples/query_data/README.md
+++ b/examples/query_data/README.md
@@ -4,11 +4,17 @@ This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stab

 It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).

+Summary of the steps:
+
+- prepare the dataset (and store it into `data`)
+- prepare a vector index database to run queries on
+- run queries
+
 ## Requirements

-For this in order to work, you will need a model compatible with the `llama.cpp` backend. This is will not work with gpt4all.
+For this in order to work, you will need LocalAI and a model compatible with the `llama.cpp` backend. This is will not work with gpt4all, however you can mix models (use a llama.cpp one to build the index database, and gpt4all to query it).

-The example uses `WizardLM`. Edit the config files in `models/` accordingly to specify the model you use (change `HERE`).
+The example uses `WizardLM` for both embeddings and Q&A. Edit the config files in `models/` accordingly to specify the model you use (change `HERE` in the configuration files).

 You will also need a training data set. Copy that over `data`.

@@ -28,7 +34,9 @@ cd LocalAI/examples/query_data
 docker-compose up -d --build
 ```

-### Create a storage:
+### Create a storage
+
+In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.

 ```bash
 export OPENAI_API_BASE=http://localhost:8080/v1
@@ -41,9 +49,22 @@ After it finishes, a directory "storage" will be created with the vector index d

 ## Query

+We can now query the dataset. 
+
 ```bash
 export OPENAI_API_BASE=http://localhost:8080/v1
 export OPENAI_API_KEY=sk-

 python query.py
+```
+
+## Update
+
+To update our vector database, run `update.py`
+
+```bash
+export OPENAI_API_BASE=http://localhost:8080/v1
+export OPENAI_API_KEY=sk-
+
+python update.py
 ```
--- a/examples/query_data/query.py
+++ b/examples/query_data/query.py
@@ -10,7 +10,7 @@ from llama_index import StorageContext, load_index_from_storage
 base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')

 # This example uses text-davinci-003 by default; feel free to change if desired
-llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo",openai_api_base=base_path))
+llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))

 # Configure prompt parameters and initialise helper
 max_input_size = 1024
@@ -29,5 +29,7 @@ storage_context = StorageContext.from_defaults(persist_dir='./storage')
 index = load_index_from_storage(storage_context,     service_context=service_context,    )

 query_engine = index.as_query_engine()
-response = query_engine.query("XXXXXX your question here XXXXX")
-print(response)
+
+data = input("Question: ")
+response = query_engine.query(data)
+print(response)
--- a/examples/query_data/store.py
+++ b/examples/query_data/store.py
@@ -13,15 +13,15 @@ base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
 llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))

 # Configure prompt parameters and initialise helper
-max_input_size = 256
-num_output = 256
-max_chunk_overlap = 10
+max_input_size = 512
+num_output = 512
+max_chunk_overlap = 30

 prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

 # Load documents from the 'data' directory
 documents = SimpleDirectoryReader('data').load_data()
-service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 257)
+service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 512)
 index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
 index.storage_context.persist(persist_dir="./storage")

--- a/examples/query_data/update.py
+++ b/examples/query_data/update.py
@@ -0,0 +1,32 @@
+import os
+
+# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
+# os.environ['OPENAI_API_KEY']= ""
+
+from llama_index import   LLMPredictor, PromptHelper, SimpleDirectoryReader, ServiceContext
+from langchain.llms.openai import OpenAI
+from llama_index import StorageContext, load_index_from_storage
+
+base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
+
+# This example uses text-davinci-003 by default; feel free to change if desired
+llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
+
+# Configure prompt parameters and initialise helper
+max_input_size = 512
+num_output = 256
+max_chunk_overlap = 20
+
+prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
+
+# Load documents from the 'data' directory
+service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
+
+# rebuild storage context
+storage_context = StorageContext.from_defaults(persist_dir='./storage')
+
+# load index
+index = load_index_from_storage(storage_context,     service_context=service_context,    )
+documents = SimpleDirectoryReader('data').load_data()
+index.refresh(documents)
+index.storage_context.persist(persist_dir="./storage")
--- a/go.mod
+++ b/go.mod
@@ -6,8 +6,8 @@ require (
 	github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
 	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
 	github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675
-	github.com/gofiber/fiber/v2 v2.44.0
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638
+	github.com/gofiber/fiber/v2 v2.45.0
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/onsi/ginkgo/v2 v2.9.4
 	github.com/onsi/gomega v1.27.6
@@ -52,7 +52,7 @@ require (
 	github.com/valyala/tcplisten v1.0.0 // indirect
 	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
 	golang.org/x/net v0.9.0 // indirect
-	golang.org/x/sys v0.7.0 // indirect
+	golang.org/x/sys v0.8.0 // indirect
 	golang.org/x/text v0.9.0 // indirect
 	golang.org/x/tools v0.8.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
--- a/go.sum
+++ b/go.sum
@@ -37,11 +37,15 @@ github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod
 github.com/go-skynet/go-llama.cpp v0.0.0-20230504223241-67ff6a4db244/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675 h1:plXywr95RghidIHPHl+O/zpcNXenEeS6w/6WftFNr9E=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230505100647-691d479d3675/go.mod h1:LvSQx5QAYBAMpWkbyVFFDiM1Tzj8LP55DvmUM3hbRMY=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638 h1:+7UXkGG+LeqJ5oPBEJo5D73Y2drKOVzrlB8D+iG2PHw=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230506193017-cf9b522db638/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/gofiber/fiber/v2 v2.44.0 h1:Z90bEvPcJM5GFJnu1py0E1ojoerkyew3iiNJ78MQCM8=
 github.com/gofiber/fiber/v2 v2.44.0/go.mod h1:VTMtb/au8g01iqvHyaCzftuM/xmZgKOZCtFzz6CdV9w=
+github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s=
+github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
 github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
@@ -163,6 +167,8 @@ golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
 golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
Author	SHA1	Message	Date
mudler	3daff6f1aa	doc: update README with embeddings docs	2023-05-08 20:02:59 +02:00
Ettore Di Giacinto	89dfa0f5fc	feat: add experimental support for embeddings as arrays (#207 )	2023-05-08 19:31:18 +02:00
renovate[bot]	bc03c492a0	fix(deps): update module github.com/gofiber/fiber/v2 to v2.45.0 (#206 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-07 17:46:31 +02:00
Fabian Hachenberg	f50a4c1454	Added runpod.io template for LocalAI to examples (#203 )	2023-05-07 10:58:15 +02:00
mudler	d13d4d95ce	examples: fix default parameter	2023-05-07 10:13:57 +02:00
renovate[bot]	428790ec06	fix(deps): update github.com/go-skynet/go-llama.cpp digest to cf9b522 (#202 ) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>	2023-05-07 09:14:00 +02:00
mudler	4f551ce414	examples: add update index example, update README	2023-05-07 09:05:24 +02:00
mudler	6ed7b10273	examples: add langchain agent example	2023-05-07 08:14:01 +02:00
mudler	02979566ee	examples: better defaults	2023-05-07 00:58:30 +02:00