Compare commits

...

14 Commits

Author SHA1 Message Date
Ettore Di Giacinto
2488c445b6 feat: bert.cpp token embeddings (#241) 2023-05-12 17:16:49 +02:00
Ettore Di Giacinto
b4241d0a0d tests: enable whisper (#239) 2023-05-12 14:10:18 +02:00
Ettore Di Giacinto
8250391e49 Add support for gptneox/replit (#238) 2023-05-12 11:36:35 +02:00
Ettore Di Giacinto
fd1df4e971 whisper: add tests and allow to set upload size (#237) 2023-05-12 10:04:20 +02:00
ci-robbot [bot]
5115b2faa3 ⬆️ Update go-skynet/go-llama.cpp (#219)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-11 23:43:55 +02:00
ci-robbot [bot]
93e82a8bf4 ⬆️ Update go-skynet/go-gpt2.cpp (#220)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-11 23:43:44 +02:00
Ettore Di Giacinto
4413defca5 feat: add starcoder (#236) 2023-05-11 20:20:07 +02:00
Ettore Di Giacinto
f359e1c6c4 fix: dolly/rp (#235) 2023-05-11 19:38:27 +02:00
renovate[bot]
1bc87d582d fix(deps): update module github.com/sashabaranov/go-openai to v1.9.4 (#230)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 18:02:19 +02:00
renovate[bot]
a86a383357 fix(deps): update github.com/donomii/go-rwkv.cpp digest to 07166da (#224)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 18:01:52 +02:00
renovate[bot]
16f02c7b30 fix(deps): update github.com/go-skynet/go-bert.cpp digest to ec771ec (#223)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-11 18:01:35 +02:00
Ettore Di Giacinto
fe2706890c Update README.md 2023-05-11 17:32:13 +02:00
Ettore Di Giacinto
85f0f8227d refactor: drop code dups (#234) 2023-05-11 16:34:16 +02:00
Ettore Di Giacinto
59e3c02002 make use of new bindings for gpt4all (#232) 2023-05-11 14:31:19 +02:00
17 changed files with 468 additions and 521 deletions

View File

@@ -30,6 +30,9 @@ jobs:
- repository: "go-skynet/bloomz.cpp"
variable: "BLOOMZ_VERSION"
branch: "main"
- repository: "go-skynet/gpt4all"
variable: "GPT4ALL_VERSION"
branch: "main"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

View File

@@ -21,7 +21,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential
sudo apt-get install build-essential ffmpeg
- name: Test
run: |
make test
@@ -38,7 +38,7 @@ jobs:
- name: Dependencies
run: |
brew update
brew install sdl2
brew install sdl2 ffmpeg
- name: Test
run: |
make test

View File

@@ -3,13 +3,14 @@ GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
GOLLAMA_VERSION?=c03e8adbc45c866e0f6d876af1887d6b01d57eb4
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
GOGPT2_VERSION?=abf038a7d8efa4eefdc7c891f05ad33d4e59e49d
GOLLAMA_VERSION?=70593fccbe4b01dedaab805b0f25cb58192c7b38
GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
GPT4ALL_VERSION?=3657f9417e17edf378c27d0a9274a1bf41caa914
GOGPT2_VERSION?=92421a8cf61ed6e03babd9067af292b094cb1307
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993
BERT_VERSION?=ec771ec715576ac050263bb7bb74bfd616a5ba13
BERT_VERSION?=ac22f8f74aec5e31bc46242c17e7d511f127856b
BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
@@ -19,8 +20,8 @@ WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
# Use this if you want to set the default behavior
ifndef BUILD_TYPE
@@ -37,19 +38,26 @@ endif
all: help
## GPT4ALL-J
go-gpt4all-j:
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION) && git submodule update --init --recursive --depth 1
## GPT4ALL
gpt4all:
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h
## BERT embeddings
go-bert:
@@ -85,8 +93,8 @@ bloomz/libbloomz.a: bloomz
go-bert/libgobert.a: go-bert
$(MAKE) -C go-bert libgobert.a
go-gpt4all-j/libgptj.a: go-gpt4all-j
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ $(GENERIC_PREFIX)libgpt4all.a
## CEREBRAS GPT
go-gpt2:
@@ -96,8 +104,12 @@ go-gpt2:
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
go-gpt2/libgpt2.a: go-gpt2
@@ -119,20 +131,20 @@ go-llama/libbinding.a: go-llama
replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
$(GOCMD) mod edit -replace github.com/nomic/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp go-bert bloomz
prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz replace
$(GOCMD) mod download
## GENERIC
rebuild: ## Rebuilds the project
$(MAKE) -C go-llama clean
$(MAKE) -C go-gpt4all-j clean
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
$(MAKE) -C go-gpt2 clean
$(MAKE) -C go-rwkv clean
$(MAKE) -C whisper.cpp clean
@@ -140,11 +152,11 @@ rebuild: ## Rebuilds the project
$(MAKE) -C bloomz clean
$(MAKE) build
prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a replace ## Prepares for building
prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building
clean: ## Remove build related file
rm -fr ./go-llama
rm -rf ./go-gpt4all-j
rm -rf ./gpt4all
rm -rf ./go-gpt2
rm -rf ./go-rwkv
rm -rf ./go-bert
@@ -156,7 +168,7 @@ clean: ## Remove build related file
build: prepare ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -x -o $(BINARY_NAME) ./
generic-build: ## Build the project using generic
BUILD_TYPE="generic" $(MAKE) build
@@ -167,12 +179,16 @@ run: prepare ## run local-ai
test-models/testmodel:
mkdir test-models
mkdir test-dir
wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
cp tests/fixtures/* test-models
test: prepare test-models/testmodel
cp tests/fixtures/* test-models
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api
## Help:
help: ## Show this help.

View File

@@ -25,6 +25,7 @@ See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/
## News
- 11-05-2023: __1.9.0__ released! 🔥 Important whisper updates ( https://github.com/go-skynet/LocalAI/pull/233 https://github.com/go-skynet/LocalAI/pull/229 ) and extended gpt4all model families support ( https://github.com/go-skynet/LocalAI/pull/232 ). Redpajama/dolly experimental ( https://github.com/go-skynet/LocalAI/pull/214 )
- 10-05-2023: __1.8.0__ released! 🔥 Added support for fast and accurate embeddings with `bert.cpp` ( https://github.com/go-skynet/LocalAI/pull/222 )
- 09-05-2023: Added experimental support for transcriptions endpoint ( https://github.com/go-skynet/LocalAI/pull/211 )
- 08-05-2023: Support for embeddings with models using the `llama.cpp` backend ( https://github.com/go-skynet/LocalAI/pull/207 )
@@ -73,7 +74,7 @@ Note: You might need to convert older models to the new format, see [here](https
A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
Note: rwkv models have an associated tokenizer along that needs to be provided with it:
Note: rwkv models needs to specify the backend `rwkv` in the YAML config files and have an associated tokenizer along that needs to be provided with it:
```
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
@@ -91,6 +92,30 @@ It should also be compatible with StableLM and GPTNeoX ggml models (untested).
Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
### Model compatibility table
<details>
| Backend | Compatible models | Completion/Chat endpoint | Audio transcription | Embeddings support | Token stream support | Github | Bindings |
|-----------------|-----------------------|--------------------------|---------------------|-----------------------------------|----------------------|--------------------------------------------|-------------------------------------------|
| llama | Vicuna, Alpaca, LLaMa | yes | no | yes (doesn't seem to be accurate) | yes | https://github.com/ggerganov/llama.cpp | https://github.com/go-skynet/go-llama.cpp |
| gpt4all-llama | Vicuna, Alpaca, LLaMa | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
| gpt4all-mpt | MPT | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
| gpt4all-j | GPT4ALL-J | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
| gpt2 | GPT/NeoX, Cerebras | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| dolly | Dolly | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| redpajama | RedPajama | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| stableLM | StableLM GPT/NeoX | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| replit | Replit | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| gptneox | GPT NeoX | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| starcoder | Starcoder | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
| bloomz | Bloom | yes | no | no | no | https://github.com/NouamaneTazi/bloomz.cpp | https://github.com/go-skynet/bloomz.cpp |
| rwkv | RWKV | yes | no | no | yes | https://github.com/saharNooby/rwkv.cpp | https://github.com/donomii/go-rwkv.cpp |
| bert-embeddings | bert | no | no | yes | no | https://github.com/skeskinen/bert.cpp | https://github.com/go-skynet/go-bert.cpp |
| whisper | whisper | no | yes | no | no | https://github.com/ggerganov/whisper.cpp | https://github.com/ggerganov/whisper.cpp |
</details>
## Usage
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
@@ -545,6 +570,7 @@ name: text-embedding-ada-002
parameters:
model: bert
embeddings: true
backend: "bert-embeddings"
```
There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
@@ -563,6 +589,7 @@ Download one of the models from https://huggingface.co/ggerganov/whisper.cpp/tre
```yaml
name: whisper-1
backend: whisper
parameters:
model: whisper-en
```

View File

@@ -12,7 +12,7 @@ import (
"github.com/rs/zerolog/log"
)
func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
func App(configFile string, loader *model.ModelLoader, uploadLimitMB, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
zerolog.SetGlobalLevel(zerolog.InfoLevel)
if debug {
zerolog.SetGlobalLevel(zerolog.DebugLevel)
@@ -20,6 +20,7 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16
// Return errors as JSON responses
app := fiber.New(fiber.Config{
BodyLimit: uploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
DisableStartupMessage: disableMessage,
// Override default error handler
ErrorHandler: func(ctx *fiber.Ctx, err error) error {

View File

@@ -3,6 +3,8 @@ package api_test
import (
"context"
"os"
"path/filepath"
"runtime"
. "github.com/go-skynet/LocalAI/api"
"github.com/go-skynet/LocalAI/pkg/model"
@@ -23,7 +25,7 @@ var _ = Describe("API test", func() {
Context("API query", func() {
BeforeEach(func() {
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
app = App("", modelLoader, 1, 512, false, true, true)
app = App("", modelLoader, 15, 1, 512, false, true, true)
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
@@ -45,8 +47,7 @@ var _ = Describe("API test", func() {
It("returns the models list", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(3))
Expect(models.Models[0].ID).To(Equal("testmodel"))
Expect(len(models.Models)).To(Equal(7))
})
It("can generate completions", func() {
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
@@ -79,15 +80,55 @@ var _ = Describe("API test", func() {
It("returns errors", func() {
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 9 errors occurred:"))
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 12 errors occurred:"))
})
It("transcribes audio", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateTranscription(
context.Background(),
openai.AudioRequest{
Model: openai.Whisper1,
FilePath: filepath.Join(os.Getenv("TEST_DIR"), "audio.wav"),
},
)
Expect(err).ToNot(HaveOccurred())
Expect(resp.Text).To(ContainSubstring("This is the Micro Machine Man presenting"))
})
It("calculate embeddings", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaEmbeddingV2,
Input: []string{"sun", "cat"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
sunEmbedding := resp.Data[0].Embedding
resp2, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaEmbeddingV2,
Input: []string{"sun"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
})
})
Context("Config file", func() {
BeforeEach(func() {
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true)
app = App(os.Getenv("CONFIG_FILE"), modelLoader, 5, 1, 512, false, true, true)
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
@@ -108,8 +149,7 @@ var _ = Describe("API test", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(5))
Expect(models.Models[0].ID).To(Equal("testmodel"))
Expect(len(models.Models)).To(Equal(9))
})
It("can generate chat completions from config file", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
@@ -134,5 +174,6 @@ var _ = Describe("API test", func() {
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
})
})

View File

@@ -285,5 +285,10 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug
}
}
// Enforce debug flag if passed from CLI
if debug {
config.Debug = true
}
return config, input, nil
}

View File

@@ -12,8 +12,10 @@ import (
"path/filepath"
"strings"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/whisper"
whisperutil "github.com/go-skynet/LocalAI/pkg/whisper"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
@@ -407,14 +409,13 @@ func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
// retrieve the file data from the request
file, err := c.FormFile("file")
if err != nil {
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
return err
}
f, err := file.Open()
if err != nil {
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
return err
}
defer f.Close()
log.Debug().Msgf("Audio file: %+v", file)
dir, err := os.MkdirTemp("", "whisper")
@@ -426,24 +427,33 @@ func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
dst := filepath.Join(dir, path.Base(file.Filename))
dstFile, err := os.Create(dst)
if err != nil {
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
return err
}
if _, err := io.Copy(dstFile, f); err != nil {
log.Debug().Msgf("Audio file %+v - %+v - err %+v", file.Filename, dst, err)
log.Debug().Msgf("Audio file copying error %+v - %+v - err %+v", file.Filename, dst, err)
return err
}
log.Debug().Msgf("Audio file copied to: %+v", dst)
whisperModel, err := loader.WhisperLoader("whisper", config.Model)
whisperModel, err := loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads))
if err != nil {
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
return err
}
tr, err := whisper.Transcript(whisperModel, dst, input.Language)
if whisperModel == nil {
return fmt.Errorf("could not load whisper model")
}
w, ok := whisperModel.(whisper.Model)
if !ok {
return fmt.Errorf("loader returned non-whisper object")
}
tr, err := whisperutil.Transcript(w, dst, input.Language, uint(config.Threads))
if err != nil {
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
return err
}
log.Debug().Msgf("Trascribed: %+v", tr)

View File

@@ -11,8 +11,8 @@ import (
"github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
llama "github.com/go-skynet/go-llama.cpp"
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
)
// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
@@ -68,7 +68,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config)
case *bert.Bert:
fn = func() ([]float32, error) {
if len(tokens) > 0 {
return nil, fmt.Errorf("embeddings endpoint for this model supports only string")
return model.TokenEmbeddings(tokens, bert.SetThreads(c.Threads))
}
return model.Embeddings(s, bert.SetThreads(c.Threads))
}
@@ -199,6 +199,78 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
return response, nil
}
case *gpt2.GPTNeoX:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt2.Replit:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt2.Starcoder:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt2.RedPajama:
fn = func() (string, error) {
// Generate the prediction using the language model
@@ -315,29 +387,35 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
predictOptions...,
)
}
case *gptj.GPTJ:
case *gpt4all.Model:
supportStreams = true
fn = func() (string, error) {
if tokenCallback != nil {
model.SetTokenCallback(tokenCallback)
}
// Generate the prediction using the language model
predictOptions := []gptj.PredictOption{
gptj.SetTemperature(c.Temperature),
gptj.SetTopP(c.TopP),
gptj.SetTopK(c.TopK),
gptj.SetTokens(c.Maxtokens),
gptj.SetThreads(c.Threads),
predictOptions := []gpt4all.PredictOption{
gpt4all.SetTemperature(c.Temperature),
gpt4all.SetTopP(c.TopP),
gpt4all.SetTopK(c.TopK),
gpt4all.SetTokens(c.Maxtokens),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gptj.SetBatch(c.Batch))
predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gptj.SetSeed(c.Seed))
}
return model.Predict(
str, er := model.Predict(
s,
predictOptions...,
)
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
// after a stream event has occurred
model.SetTokenCallback(nil)
return str, er
}
case *llama.LLama:
supportStreams = true

7
go.mod
View File

@@ -3,11 +3,11 @@ module github.com/go-skynet/LocalAI
go 1.19
require (
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a
github.com/go-audio/wav v1.1.0
github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b
@@ -18,7 +18,7 @@ require (
github.com/otiai10/copy v1.11.0
github.com/otiai10/openaigo v1.1.0
github.com/rs/zerolog v1.29.1
github.com/sashabaranov/go-openai v1.9.3
github.com/sashabaranov/go-openai v1.9.4
github.com/swaggo/swag v1.16.1
github.com/urfave/cli/v2 v2.25.3
github.com/valyala/fasthttp v1.47.0
@@ -49,6 +49,7 @@ require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.18 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/nomic/gpt4all/gpt4all-bindings/golang v0.0.0-00010101000000-000000000000 // indirect
github.com/philhofer/fwd v1.1.2 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect

6
go.sum
View File

@@ -18,6 +18,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2 h1:YNbUAyIRtaLODitigJU1EM5ubmMu5FmHtYAayJD6Vbg=
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a h1:MlyiDLNCM/wjbv8U5Elj18NvaAgl61SGiRUpqQz5dfs=
@@ -44,6 +46,8 @@ github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f h1:GW8RQa1RVe
github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea h1:8Isk9D+Auth5OuXVAQPC3MO+5zF/2S7mvs2JZLw6a+8=
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557 h1:LD66fKtvP2lmyuuKL8pBat/pVTKUbLs3L5fM/5lyi4w=
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6 h1:XshpypO6ekU09CI19vuzke2a1Es1lV5ZaxA7CUehu0E=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
@@ -115,6 +119,8 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9MyPBfZM=
github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=

View File

@@ -62,6 +62,12 @@ func main() {
EnvVars: []string{"CONTEXT_SIZE"},
Value: 512,
},
&cli.IntFlag{
Name: "upload-limit",
DefaultText: "Default upload-limit. MB",
EnvVars: []string{"UPLOAD_LIMIT"},
Value: 15,
},
},
Description: `
LocalAI is a drop-in replacement OpenAI API which runs inference locally.
@@ -81,7 +87,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
Copyright: "go-skynet authors",
Action: func(ctx *cli.Context) error {
fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("upload-limit"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
},
}

182
pkg/model/initializers.go Normal file
View File

@@ -0,0 +1,182 @@
package model
import (
"fmt"
"strings"
rwkv "github.com/donomii/go-rwkv.cpp"
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
bloomz "github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/hashicorp/go-multierror"
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
"github.com/rs/zerolog/log"
)
const tokenizerSuffix = ".tokenizer.json"
const (
LlamaBackend = "llama"
BloomzBackend = "bloomz"
StarcoderBackend = "starcoder"
StableLMBackend = "stablelm"
DollyBackend = "dolly"
RedPajamaBackend = "redpajama"
GPTNeoXBackend = "gptneox"
ReplitBackend = "replit"
Gpt2Backend = "gpt2"
Gpt4AllLlamaBackend = "gpt4all-llama"
Gpt4AllMptBackend = "gpt4all-mpt"
Gpt4AllJBackend = "gpt4all-j"
BertEmbeddingsBackend = "bert-embeddings"
RwkvBackend = "rwkv"
WhisperBackend = "whisper"
)
var backends []string = []string{
LlamaBackend,
Gpt4AllLlamaBackend,
Gpt4AllMptBackend,
Gpt4AllJBackend,
Gpt2Backend,
WhisperBackend,
RwkvBackend,
BloomzBackend,
StableLMBackend,
DollyBackend,
RedPajamaBackend,
GPTNeoXBackend,
ReplitBackend,
BertEmbeddingsBackend,
StarcoderBackend,
}
var starCoder = func(modelFile string) (interface{}, error) {
return gpt2.NewStarcoder(modelFile)
}
var redPajama = func(modelFile string) (interface{}, error) {
return gpt2.NewRedPajama(modelFile)
}
var dolly = func(modelFile string) (interface{}, error) {
return gpt2.NewDolly(modelFile)
}
var gptNeoX = func(modelFile string) (interface{}, error) {
return gpt2.NewGPTNeoX(modelFile)
}
var replit = func(modelFile string) (interface{}, error) {
return gpt2.NewReplit(modelFile)
}
var stableLM = func(modelFile string) (interface{}, error) {
return gpt2.NewStableLM(modelFile)
}
var bertEmbeddings = func(modelFile string) (interface{}, error) {
return bert.New(modelFile)
}
var bloomzLM = func(modelFile string) (interface{}, error) {
return bloomz.New(modelFile)
}
var gpt2LM = func(modelFile string) (interface{}, error) {
return gpt2.New(modelFile)
}
var whisperModel = func(modelFile string) (interface{}, error) {
return whisper.New(modelFile)
}
func llamaLM(opts ...llama.ModelOption) func(string) (interface{}, error) {
return func(s string) (interface{}, error) {
return llama.New(s, opts...)
}
}
func gpt4allLM(opts ...gpt4all.ModelOption) func(string) (interface{}, error) {
return func(s string) (interface{}, error) {
return gpt4all.New(s, opts...)
}
}
func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) {
return func(s string) (interface{}, error) {
model := rwkv.LoadFiles(s, tokenFile, threads)
if model == nil {
return nil, fmt.Errorf("could not load model")
}
return model, nil
}
}
func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
switch strings.ToLower(backendString) {
case LlamaBackend:
return ml.LoadModel(modelFile, llamaLM(llamaOpts...))
case BloomzBackend:
return ml.LoadModel(modelFile, bloomzLM)
case StableLMBackend:
return ml.LoadModel(modelFile, stableLM)
case DollyBackend:
return ml.LoadModel(modelFile, dolly)
case RedPajamaBackend:
return ml.LoadModel(modelFile, redPajama)
case Gpt2Backend:
return ml.LoadModel(modelFile, gpt2LM)
case GPTNeoXBackend:
return ml.LoadModel(modelFile, gptNeoX)
case ReplitBackend:
return ml.LoadModel(modelFile, replit)
case StarcoderBackend:
return ml.LoadModel(modelFile, starCoder)
case Gpt4AllLlamaBackend:
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)))
case Gpt4AllMptBackend:
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType)))
case Gpt4AllJBackend:
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType)))
case BertEmbeddingsBackend:
return ml.LoadModel(modelFile, bertEmbeddings)
case RwkvBackend:
return ml.LoadModel(modelFile, rwkvLM(modelFile+tokenizerSuffix, threads))
case WhisperBackend:
return ml.LoadModel(modelFile, whisperModel)
default:
return nil, fmt.Errorf("backend unsupported: %s", backendString)
}
}
func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (interface{}, error) {
log.Debug().Msgf("Loading models greedly")
ml.mu.Lock()
m, exists := ml.models[modelFile]
if exists {
ml.mu.Unlock()
return m, nil
}
ml.mu.Unlock()
var err error
for _, b := range backends {
if b == BloomzBackend || b == WhisperBackend || b == RwkvBackend { // do not autoload bloomz/whisper/rwkv
continue
}
log.Debug().Msgf("[%s] Attempting to load", b)
model, modelerr := ml.BackendLoader(b, modelFile, llamaOpts, threads)
if modelerr == nil && model != nil {
log.Debug().Msgf("[%s] Loads OK", b)
return model, nil
} else if modelerr != nil {
err = multierror.Append(err, modelerr)
log.Debug().Msgf("[%s] Fails: %s", b, modelerr.Error())
}
}
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
}

View File

@@ -10,14 +10,6 @@ import (
"sync"
"text/template"
rwkv "github.com/donomii/go-rwkv.cpp"
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
bloomz "github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/hashicorp/go-multierror"
"github.com/rs/zerolog/log"
)
@@ -25,33 +17,15 @@ type ModelLoader struct {
ModelPath string
mu sync.Mutex
// TODO: this needs generics
models map[string]*llama.LLama
gptmodels map[string]*gptj.GPTJ
gpt2models map[string]*gpt2.GPT2
gptstablelmmodels map[string]*gpt2.StableLM
dollymodels map[string]*gpt2.Dolly
redpajama map[string]*gpt2.RedPajama
rwkv map[string]*rwkv.RwkvState
bloomz map[string]*bloomz.Bloomz
bert map[string]*bert.Bert
promptsTemplates map[string]*template.Template
whisperModels map[string]whisper.Model
models map[string]interface{}
promptsTemplates map[string]*template.Template
}
func NewModelLoader(modelPath string) *ModelLoader {
return &ModelLoader{
ModelPath: modelPath,
gpt2models: make(map[string]*gpt2.GPT2),
gptmodels: make(map[string]*gptj.GPTJ),
gptstablelmmodels: make(map[string]*gpt2.StableLM),
dollymodels: make(map[string]*gpt2.Dolly),
redpajama: make(map[string]*gpt2.RedPajama),
models: make(map[string]*llama.LLama),
rwkv: make(map[string]*rwkv.RwkvState),
bloomz: make(map[string]*bloomz.Bloomz),
bert: make(map[string]*bert.Bert),
promptsTemplates: make(map[string]*template.Template),
whisperModels: make(map[string]whisper.Model),
ModelPath: modelPath,
models: make(map[string]interface{}),
promptsTemplates: make(map[string]*template.Template),
}
}
@@ -136,271 +110,11 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
return nil
}
func (ml *ModelLoader) LoadRedPajama(modelName string) (*gpt2.RedPajama, error) {
func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (interface{}, error)) (interface{}, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist")
}
if m, ok := ml.redpajama[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model, err := gpt2.NewRedPajama(modelFile)
if err != nil {
return nil, err
}
// If there is a prompt template, load it
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
return nil, err
}
ml.redpajama[modelName] = model
return model, err
}
func (ml *ModelLoader) LoadDollyModel(modelName string) (*gpt2.Dolly, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist")
}
if m, ok := ml.dollymodels[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model, err := gpt2.NewDolly(modelFile)
if err != nil {
return nil, err
}
// If there is a prompt template, load it
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
return nil, err
}
ml.dollymodels[modelName] = model
return model, err
}
func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist")
}
if m, ok := ml.gptstablelmmodels[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model, err := gpt2.NewStableLM(modelFile)
if err != nil {
return nil, err
}
// If there is a prompt template, load it
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
return nil, err
}
ml.gptstablelmmodels[modelName] = model
return model, err
}
func (ml *ModelLoader) LoadBERT(modelName string) (*bert.Bert, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist")
}
if m, ok := ml.bert[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model, err := bert.New(modelFile)
if err != nil {
return nil, err
}
// If there is a prompt template, load it
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
return nil, err
}
ml.bert[modelName] = model
return model, err
}
func (ml *ModelLoader) LoadBloomz(modelName string) (*bloomz.Bloomz, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist")
}
if m, ok := ml.bloomz[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model, err := bloomz.New(modelFile)
if err != nil {
return nil, err
}
// If there is a prompt template, load it
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
return nil, err
}
ml.bloomz[modelName] = model
return model, err
}
func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist")
}
if m, ok := ml.gpt2models[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model, err := gpt2.New(modelFile)
if err != nil {
return nil, err
}
// If there is a prompt template, load it
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
return nil, err
}
ml.gpt2models[modelName] = model
return model, err
}
func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist")
}
if m, ok := ml.gptmodels[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model, err := gptj.New(modelFile)
if err != nil {
return nil, err
}
// If there is a prompt template, load it
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
return nil, err
}
ml.gptmodels[modelName] = model
return model, err
}
func (ml *ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (*rwkv.RwkvState, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
log.Debug().Msgf("Loading model name: %s", modelName)
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist")
}
if m, ok := ml.rwkv[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
tokenPath := filepath.Join(ml.ModelPath, tokenFile)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model := rwkv.LoadFiles(modelFile, tokenPath, threads)
if model == nil {
return nil, fmt.Errorf("could not load model")
}
ml.rwkv[modelName] = model
return model, nil
}
func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
log.Debug().Msgf("Loading model name: %s", modelName)
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist")
}
if m, ok := ml.models[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
@@ -410,7 +124,7 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model, err := llama.New(modelFile, opts...)
model, err := loader(modelFile)
if err != nil {
return nil, err
}
@@ -421,162 +135,5 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
}
ml.models[modelName] = model
return model, err
}
func (ml *ModelLoader) LoadWhisperModel(modelName string) (whisper.Model, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
// Check if we already have a loaded model
if !ml.ExistsInModelPath(modelName) {
return nil, fmt.Errorf("model does not exist -- %s", modelName)
}
if m, ok := ml.whisperModels[modelName]; ok {
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
return m, nil
}
// Load the model and keep it in memory for later use
modelFile := filepath.Join(ml.ModelPath, modelName)
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
model, err := whisper.New(modelFile)
if err != nil {
return nil, err
}
ml.whisperModels[modelName] = model
return model, err
}
const tokenizerSuffix = ".tokenizer.json"
var loadedModels map[string]interface{} = map[string]interface{}{}
var muModels sync.Mutex
func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
switch strings.ToLower(backendString) {
case "llama":
return ml.LoadLLaMAModel(modelFile, llamaOpts...)
case "bloomz":
return ml.LoadBloomz(modelFile)
case "stablelm":
return ml.LoadStableLMModel(modelFile)
case "dolly":
return ml.LoadDollyModel(modelFile)
case "redpajama":
return ml.LoadRedPajama(modelFile)
case "gpt2":
return ml.LoadGPT2Model(modelFile)
case "gptj":
return ml.LoadGPTJModel(modelFile)
case "bert-embeddings":
return ml.LoadBERT(modelFile)
case "rwkv":
return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
default:
return nil, fmt.Errorf("backend unsupported: %s", backendString)
}
}
func (ml *ModelLoader) WhisperLoader(backendString string, modelFile string) (model whisper.Model, err error) {
//TODO expose more whisper options in next PR
switch strings.ToLower(backendString) {
case "whisper":
return ml.LoadWhisperModel(modelFile)
default:
return nil, fmt.Errorf("whisper backend unsupported: %s", backendString)
}
}
func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
updateModels := func(model interface{}) {
muModels.Lock()
defer muModels.Unlock()
loadedModels[modelFile] = model
}
muModels.Lock()
m, exists := loadedModels[modelFile]
if exists {
muModels.Unlock()
return m, nil
}
muModels.Unlock()
model, modelerr := ml.LoadLLaMAModel(modelFile, llamaOpts...)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
model, modelerr = ml.LoadGPTJModel(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
model, modelerr = ml.LoadGPT2Model(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
model, modelerr = ml.LoadStableLMModel(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
model, modelerr = ml.LoadDollyModel(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
model, modelerr = ml.LoadRedPajama(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
model, modelerr = ml.LoadBloomz(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
model, modelerr = ml.LoadBERT(modelFile)
if modelerr == nil {
updateModels(model)
return model, nil
} else {
err = multierror.Append(err, modelerr)
}
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
return model, nil
}

View File

@@ -28,7 +28,7 @@ func audioToWav(src, dst string) error {
return nil
}
func Transcript(model whisper.Model, audiopath, language string) (string, error) {
func Transcript(model whisper.Model, audiopath, language string, threads uint) (string, error) {
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
@@ -65,8 +65,12 @@ func Transcript(model whisper.Model, audiopath, language string) (string, error)
}
context.SetThreads(threads)
if language != "" {
context.SetLanguage(language)
} else {
context.SetLanguage("auto")
}
if err := context.Process(data, nil); err != nil {

6
tests/fixtures/embeddings.yaml vendored Normal file
View File

@@ -0,0 +1,6 @@
name: text-embedding-ada-002
parameters:
model: bert
threads: 14
backend: bert-embeddings
embeddings: true

4
tests/fixtures/whisper.yaml vendored Normal file
View File

@@ -0,0 +1,4 @@
name: whisper-1
backend: whisper
parameters:
model: whisper-en