mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
de36a48861 | ||
|
|
961ca93219 | ||
|
|
557ccc5ad8 | ||
|
|
2488c445b6 | ||
|
|
b4241d0a0d | ||
|
|
8250391e49 | ||
|
|
fd1df4e971 | ||
|
|
5115b2faa3 | ||
|
|
93e82a8bf4 | ||
|
|
4413defca5 | ||
|
|
f359e1c6c4 | ||
|
|
1bc87d582d | ||
|
|
a86a383357 | ||
|
|
16f02c7b30 | ||
|
|
fe2706890c | ||
|
|
85f0f8227d | ||
|
|
59e3c02002 |
3
.github/workflows/bump_deps.yaml
vendored
3
.github/workflows/bump_deps.yaml
vendored
@@ -30,6 +30,9 @@ jobs:
|
||||
- repository: "go-skynet/bloomz.cpp"
|
||||
variable: "BLOOMZ_VERSION"
|
||||
branch: "main"
|
||||
- repository: "go-skynet/gpt4all"
|
||||
variable: "GPT4ALL_VERSION"
|
||||
branch: "main"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
- name: Test
|
||||
run: |
|
||||
make test
|
||||
@@ -38,7 +38,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew update
|
||||
brew install sdl2
|
||||
brew install sdl2 ffmpeg
|
||||
- name: Test
|
||||
run: |
|
||||
make test
|
||||
74
Makefile
74
Makefile
@@ -3,13 +3,14 @@ GOTEST=$(GOCMD) test
|
||||
GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
|
||||
GOLLAMA_VERSION?=c03e8adbc45c866e0f6d876af1887d6b01d57eb4
|
||||
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
|
||||
GOGPT2_VERSION?=abf038a7d8efa4eefdc7c891f05ad33d4e59e49d
|
||||
GOLLAMA_VERSION?=70593fccbe4b01dedaab805b0f25cb58192c7b38
|
||||
GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
|
||||
GPT4ALL_VERSION?=a330bfe26e9e35ca402e16df18973a3b162fb4db
|
||||
GOGPT2_VERSION?=92421a8cf61ed6e03babd9067af292b094cb1307
|
||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
|
||||
WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993
|
||||
BERT_VERSION?=ec771ec715576ac050263bb7bb74bfd616a5ba13
|
||||
BERT_VERSION?=ac22f8f74aec5e31bc46242c17e7d511f127856b
|
||||
BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
|
||||
|
||||
|
||||
@@ -19,8 +20,8 @@ WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
|
||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
|
||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
|
||||
|
||||
# Use this if you want to set the default behavior
|
||||
ifndef BUILD_TYPE
|
||||
@@ -37,19 +38,26 @@ endif
|
||||
|
||||
all: help
|
||||
|
||||
## GPT4ALL-J
|
||||
go-gpt4all-j:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
|
||||
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION) && git submodule update --init --recursive --depth 1
|
||||
## GPT4ALL
|
||||
gpt4all:
|
||||
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
||||
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
||||
@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
||||
mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h
|
||||
|
||||
## BERT embeddings
|
||||
go-bert:
|
||||
@@ -85,8 +93,8 @@ bloomz/libbloomz.a: bloomz
|
||||
go-bert/libgobert.a: go-bert
|
||||
$(MAKE) -C go-bert libgobert.a
|
||||
|
||||
go-gpt4all-j/libgptj.a: go-gpt4all-j
|
||||
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
|
||||
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
|
||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ $(GENERIC_PREFIX)libgpt4all.a
|
||||
|
||||
## CEREBRAS GPT
|
||||
go-gpt2:
|
||||
@@ -96,8 +104,12 @@ go-gpt2:
|
||||
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
|
||||
|
||||
go-gpt2/libgpt2.a: go-gpt2
|
||||
@@ -119,20 +131,20 @@ go-llama/libbinding.a: go-llama
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
|
||||
$(GOCMD) mod edit -replace github.com/nomic/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
|
||||
|
||||
prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp go-bert bloomz
|
||||
prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz replace
|
||||
$(GOCMD) mod download
|
||||
|
||||
## GENERIC
|
||||
rebuild: ## Rebuilds the project
|
||||
$(MAKE) -C go-llama clean
|
||||
$(MAKE) -C go-gpt4all-j clean
|
||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
|
||||
$(MAKE) -C go-gpt2 clean
|
||||
$(MAKE) -C go-rwkv clean
|
||||
$(MAKE) -C whisper.cpp clean
|
||||
@@ -140,11 +152,11 @@ rebuild: ## Rebuilds the project
|
||||
$(MAKE) -C bloomz clean
|
||||
$(MAKE) build
|
||||
|
||||
prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a replace ## Prepares for building
|
||||
prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building
|
||||
|
||||
clean: ## Remove build related file
|
||||
rm -fr ./go-llama
|
||||
rm -rf ./go-gpt4all-j
|
||||
rm -rf ./gpt4all
|
||||
rm -rf ./go-gpt2
|
||||
rm -rf ./go-rwkv
|
||||
rm -rf ./go-bert
|
||||
@@ -156,7 +168,7 @@ clean: ## Remove build related file
|
||||
build: prepare ## Build the project
|
||||
$(info ${GREEN}I local-ai build info:${RESET})
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -x -o $(BINARY_NAME) ./
|
||||
|
||||
generic-build: ## Build the project using generic
|
||||
BUILD_TYPE="generic" $(MAKE) build
|
||||
@@ -167,12 +179,16 @@ run: prepare ## run local-ai
|
||||
|
||||
test-models/testmodel:
|
||||
mkdir test-models
|
||||
mkdir test-dir
|
||||
wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
|
||||
wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
|
||||
wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||
cp tests/fixtures/* test-models
|
||||
|
||||
test: prepare test-models/testmodel
|
||||
cp tests/fixtures/* test-models
|
||||
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api
|
||||
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api
|
||||
|
||||
## Help:
|
||||
help: ## Show this help.
|
||||
|
||||
35
README.md
35
README.md
@@ -25,7 +25,9 @@ See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/
|
||||
|
||||
## News
|
||||
|
||||
- 10-05-2023: __1.8.0__ released! 🔥 Added support for fast and accurate embeddings with `bert.cpp` ( https://github.com/go-skynet/LocalAI/pull/222 )
|
||||
- 12-05-2023: __v1.10.0__ released! 🔥🔥 Updated `gpt4all` bindings. Added support for GPTNeox (experimental), RedPajama (experimental), Starcoder (experimental), Replit (experimental), MosaicML MPT. Also now `embeddings` endpoint supports tokens arrays. See the [langchain-chroma](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-chroma) example! Note - this update does NOT include https://github.com/ggerganov/llama.cpp/pull/1405 which makes models incompatible.
|
||||
- 11-05-2023: __v1.9.0__ released! 🔥 Important whisper updates ( https://github.com/go-skynet/LocalAI/pull/233 https://github.com/go-skynet/LocalAI/pull/229 ) and extended gpt4all model families support ( https://github.com/go-skynet/LocalAI/pull/232 ). Redpajama/dolly experimental ( https://github.com/go-skynet/LocalAI/pull/214 )
|
||||
- 10-05-2023: __v1.8.0__ released! 🔥 Added support for fast and accurate embeddings with `bert.cpp` ( https://github.com/go-skynet/LocalAI/pull/222 )
|
||||
- 09-05-2023: Added experimental support for transcriptions endpoint ( https://github.com/go-skynet/LocalAI/pull/211 )
|
||||
- 08-05-2023: Support for embeddings with models using the `llama.cpp` backend ( https://github.com/go-skynet/LocalAI/pull/207 )
|
||||
- 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
|
||||
@@ -35,7 +37,8 @@ Twitter: [@LocalAI_API](https://twitter.com/LocalAI_API) and [@mudler_it](https:
|
||||
|
||||
### Blogs and articles
|
||||
|
||||
- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters.
|
||||
- [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/) by Ettore Di Giacinto
|
||||
- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters. by Tyller Gillson
|
||||
|
||||
## Contribute and help
|
||||
|
||||
@@ -73,7 +76,7 @@ Note: You might need to convert older models to the new format, see [here](https
|
||||
|
||||
A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
|
||||
|
||||
Note: rwkv models have an associated tokenizer along that needs to be provided with it:
|
||||
Note: rwkv models needs to specify the backend `rwkv` in the YAML config files and have an associated tokenizer along that needs to be provided with it:
|
||||
|
||||
```
|
||||
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
|
||||
@@ -91,6 +94,30 @@ It should also be compatible with StableLM and GPTNeoX ggml models (untested).
|
||||
Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
|
||||
|
||||
|
||||
### Model compatibility table
|
||||
|
||||
<details>
|
||||
|
||||
| Backend | Compatible models | Completion/Chat endpoint | Audio transcription | Embeddings support | Token stream support | Github | Bindings |
|
||||
|-----------------|-----------------------|--------------------------|---------------------|-----------------------------------|----------------------|--------------------------------------------|-------------------------------------------|
|
||||
| llama | Vicuna, Alpaca, LLaMa | yes | no | yes (doesn't seem to be accurate) | yes | https://github.com/ggerganov/llama.cpp | https://github.com/go-skynet/go-llama.cpp |
|
||||
| gpt4all-llama | Vicuna, Alpaca, LLaMa | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
|
||||
| gpt4all-mpt | MPT | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
|
||||
| gpt4all-j | GPT4ALL-J | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
|
||||
| gpt2 | GPT/NeoX, Cerebras | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| dolly | Dolly | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| redpajama | RedPajama | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| stableLM | StableLM GPT/NeoX | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| replit | Replit | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| gptneox | GPT NeoX | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| starcoder | Starcoder | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| bloomz | Bloom | yes | no | no | no | https://github.com/NouamaneTazi/bloomz.cpp | https://github.com/go-skynet/bloomz.cpp |
|
||||
| rwkv | RWKV | yes | no | no | yes | https://github.com/saharNooby/rwkv.cpp | https://github.com/donomii/go-rwkv.cpp |
|
||||
| bert-embeddings | bert | no | no | yes | no | https://github.com/skeskinen/bert.cpp | https://github.com/go-skynet/go-bert.cpp |
|
||||
| whisper | whisper | no | yes | no | no | https://github.com/ggerganov/whisper.cpp | https://github.com/ggerganov/whisper.cpp |
|
||||
|
||||
</details>
|
||||
|
||||
## Usage
|
||||
|
||||
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
|
||||
@@ -545,6 +572,7 @@ name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: bert
|
||||
embeddings: true
|
||||
backend: "bert-embeddings"
|
||||
```
|
||||
|
||||
There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
|
||||
@@ -563,6 +591,7 @@ Download one of the models from https://huggingface.co/ggerganov/whisper.cpp/tre
|
||||
|
||||
```yaml
|
||||
name: whisper-1
|
||||
backend: whisper
|
||||
parameters:
|
||||
model: whisper-en
|
||||
```
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
|
||||
func App(configFile string, loader *model.ModelLoader, uploadLimitMB, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
|
||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||
if debug {
|
||||
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
||||
@@ -20,6 +20,7 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16
|
||||
|
||||
// Return errors as JSON responses
|
||||
app := fiber.New(fiber.Config{
|
||||
BodyLimit: uploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
|
||||
DisableStartupMessage: disableMessage,
|
||||
// Override default error handler
|
||||
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
|
||||
|
||||
@@ -3,6 +3,8 @@ package api_test
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
||||
. "github.com/go-skynet/LocalAI/api"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
@@ -23,7 +25,7 @@ var _ = Describe("API test", func() {
|
||||
Context("API query", func() {
|
||||
BeforeEach(func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
app = App("", modelLoader, 1, 512, false, true, true)
|
||||
app = App("", modelLoader, 15, 1, 512, false, true, true)
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
|
||||
defaultConfig := openai.DefaultConfig("")
|
||||
@@ -45,8 +47,7 @@ var _ = Describe("API test", func() {
|
||||
It("returns the models list", func() {
|
||||
models, err := client.ListModels(context.TODO())
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models.Models)).To(Equal(3))
|
||||
Expect(models.Models[0].ID).To(Equal("testmodel"))
|
||||
Expect(len(models.Models)).To(Equal(7))
|
||||
})
|
||||
It("can generate completions", func() {
|
||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
|
||||
@@ -79,15 +80,55 @@ var _ = Describe("API test", func() {
|
||||
It("returns errors", func() {
|
||||
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 9 errors occurred:"))
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 12 errors occurred:"))
|
||||
})
|
||||
It("transcribes audio", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
resp, err := client.CreateTranscription(
|
||||
context.Background(),
|
||||
openai.AudioRequest{
|
||||
Model: openai.Whisper1,
|
||||
FilePath: filepath.Join(os.Getenv("TEST_DIR"), "audio.wav"),
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(resp.Text).To(ContainSubstring("This is the Micro Machine Man presenting"))
|
||||
})
|
||||
|
||||
It("calculate embeddings", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
resp, err := client.CreateEmbeddings(
|
||||
context.Background(),
|
||||
openai.EmbeddingRequest{
|
||||
Model: openai.AdaEmbeddingV2,
|
||||
Input: []string{"sun", "cat"},
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
|
||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
|
||||
|
||||
sunEmbedding := resp.Data[0].Embedding
|
||||
resp2, err := client.CreateEmbeddings(
|
||||
context.Background(),
|
||||
openai.EmbeddingRequest{
|
||||
Model: openai.AdaEmbeddingV2,
|
||||
Input: []string{"sun"},
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
|
||||
})
|
||||
})
|
||||
|
||||
Context("Config file", func() {
|
||||
BeforeEach(func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true)
|
||||
app = App(os.Getenv("CONFIG_FILE"), modelLoader, 5, 1, 512, false, true, true)
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
|
||||
defaultConfig := openai.DefaultConfig("")
|
||||
@@ -108,8 +149,7 @@ var _ = Describe("API test", func() {
|
||||
|
||||
models, err := client.ListModels(context.TODO())
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models.Models)).To(Equal(5))
|
||||
Expect(models.Models[0].ID).To(Equal("testmodel"))
|
||||
Expect(len(models.Models)).To(Equal(9))
|
||||
})
|
||||
It("can generate chat completions from config file", func() {
|
||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
|
||||
@@ -134,5 +174,6 @@ var _ = Describe("API test", func() {
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||
})
|
||||
|
||||
})
|
||||
})
|
||||
|
||||
@@ -285,5 +285,10 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug
|
||||
}
|
||||
}
|
||||
|
||||
// Enforce debug flag if passed from CLI
|
||||
if debug {
|
||||
config.Debug = true
|
||||
}
|
||||
|
||||
return config, input, nil
|
||||
}
|
||||
|
||||
@@ -12,8 +12,10 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/whisper"
|
||||
whisperutil "github.com/go-skynet/LocalAI/pkg/whisper"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
@@ -407,14 +409,13 @@ func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
|
||||
// retrieve the file data from the request
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
|
||||
return err
|
||||
}
|
||||
f, err := file.Open()
|
||||
if err != nil {
|
||||
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
log.Debug().Msgf("Audio file: %+v", file)
|
||||
|
||||
dir, err := os.MkdirTemp("", "whisper")
|
||||
|
||||
@@ -426,24 +427,33 @@ func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
|
||||
dst := filepath.Join(dir, path.Base(file.Filename))
|
||||
dstFile, err := os.Create(dst)
|
||||
if err != nil {
|
||||
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := io.Copy(dstFile, f); err != nil {
|
||||
log.Debug().Msgf("Audio file %+v - %+v - err %+v", file.Filename, dst, err)
|
||||
log.Debug().Msgf("Audio file copying error %+v - %+v - err %+v", file.Filename, dst, err)
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Audio file copied to: %+v", dst)
|
||||
|
||||
whisperModel, err := loader.WhisperLoader("whisper", config.Model)
|
||||
whisperModel, err := loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads))
|
||||
if err != nil {
|
||||
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
|
||||
return err
|
||||
}
|
||||
|
||||
tr, err := whisper.Transcript(whisperModel, dst, input.Language)
|
||||
if whisperModel == nil {
|
||||
return fmt.Errorf("could not load whisper model")
|
||||
}
|
||||
|
||||
w, ok := whisperModel.(whisper.Model)
|
||||
if !ok {
|
||||
return fmt.Errorf("loader returned non-whisper object")
|
||||
}
|
||||
|
||||
tr, err := whisperutil.Transcript(w, dst, input.Language, uint(config.Threads))
|
||||
if err != nil {
|
||||
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Trascribed: %+v", tr)
|
||||
|
||||
@@ -11,8 +11,8 @@ import (
|
||||
"github.com/go-skynet/bloomz.cpp"
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
|
||||
)
|
||||
|
||||
// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
@@ -68,7 +68,7 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config)
|
||||
case *bert.Bert:
|
||||
fn = func() ([]float32, error) {
|
||||
if len(tokens) > 0 {
|
||||
return nil, fmt.Errorf("embeddings endpoint for this model supports only string")
|
||||
return model.TokenEmbeddings(tokens, bert.SetThreads(c.Threads))
|
||||
}
|
||||
return model.Embeddings(s, bert.SetThreads(c.Threads))
|
||||
}
|
||||
@@ -199,6 +199,78 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
|
||||
|
||||
return response, nil
|
||||
}
|
||||
case *gpt2.GPTNeoX:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.Replit:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.Starcoder:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.RedPajama:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
@@ -315,29 +387,35 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gptj.GPTJ:
|
||||
case *gpt4all.Model:
|
||||
supportStreams = true
|
||||
|
||||
fn = func() (string, error) {
|
||||
if tokenCallback != nil {
|
||||
model.SetTokenCallback(tokenCallback)
|
||||
}
|
||||
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gptj.PredictOption{
|
||||
gptj.SetTemperature(c.Temperature),
|
||||
gptj.SetTopP(c.TopP),
|
||||
gptj.SetTopK(c.TopK),
|
||||
gptj.SetTokens(c.Maxtokens),
|
||||
gptj.SetThreads(c.Threads),
|
||||
predictOptions := []gpt4all.PredictOption{
|
||||
gpt4all.SetTemperature(c.Temperature),
|
||||
gpt4all.SetTopP(c.TopP),
|
||||
gpt4all.SetTopK(c.TopK),
|
||||
gpt4all.SetTokens(c.Maxtokens),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gptj.SetBatch(c.Batch))
|
||||
predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gptj.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
str, er := model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
|
||||
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
|
||||
// after a stream event has occurred
|
||||
model.SetTokenCallback(nil)
|
||||
return str, er
|
||||
}
|
||||
case *llama.LLama:
|
||||
supportStreams = true
|
||||
|
||||
@@ -65,7 +65,7 @@ Run a slack bot which lets you talk directly with a model
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/)
|
||||
|
||||
### Question answering on documents
|
||||
### Question answering on documents with llama-index
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
@@ -73,6 +73,14 @@ Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/st
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)
|
||||
|
||||
### Question answering on documents with langchain and chroma
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
Shows how to integrate with `Langchain` and `Chroma` to enable question answering on a set of documents.
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-chroma/)
|
||||
|
||||
### Template for Runpod.io
|
||||
|
||||
_by [@fHachenberg](https://github.com/fHachenberg)_
|
||||
|
||||
54
examples/langchain-chroma/README.md
Normal file
54
examples/langchain-chroma/README.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# Data query example
|
||||
|
||||
This example makes use of [langchain and chroma](https://blog.langchain.dev/langchain-chroma/) to enable question answering on a set of documents.
|
||||
|
||||
## Setup
|
||||
|
||||
Download the models and start the API:
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/query_data
|
||||
|
||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
### Python requirements
|
||||
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Create a storage
|
||||
|
||||
In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
wget https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt
|
||||
python store.py
|
||||
```
|
||||
|
||||
After it finishes, a directory "storage" will be created with the vector index database.
|
||||
|
||||
## Query
|
||||
|
||||
We can now query the dataset.
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python query.py
|
||||
# President Trump recently stated during a press conference regarding tax reform legislation that "we're getting rid of all these loopholes." He also mentioned that he wants to simplify the system further through changes such as increasing the standard deduction amount and making other adjustments aimed at reducing taxpayers' overall burden.
|
||||
```
|
||||
|
||||
Keep in mind now things are hit or miss!
|
||||
1
examples/langchain-chroma/models/completion.tmpl
Normal file
1
examples/langchain-chroma/models/completion.tmpl
Normal file
@@ -0,0 +1 @@
|
||||
{{.Input}}
|
||||
5
examples/langchain-chroma/models/embeddings.yaml
Normal file
5
examples/langchain-chroma/models/embeddings.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: bert
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
16
examples/langchain-chroma/models/gpt-3.5-turbo.yaml
Normal file
16
examples/langchain-chroma/models/gpt-3.5-turbo.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: ggml-gpt4all-j
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
template:
|
||||
completion: completion
|
||||
chat: gpt4all
|
||||
4
examples/langchain-chroma/models/gpt4all.tmpl
Normal file
4
examples/langchain-chroma/models/gpt4all.tmpl
Normal file
@@ -0,0 +1,4 @@
|
||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
||||
### Prompt:
|
||||
{{.Input}}
|
||||
### Response:
|
||||
31
examples/langchain-chroma/query.py
Normal file
31
examples/langchain-chroma/query.py
Normal file
@@ -0,0 +1,31 @@
|
||||
|
||||
import os
|
||||
from langchain.vectorstores import Chroma
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter,CharacterTextSplitter
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import VectorDBQA
|
||||
from langchain.document_loaders import TextLoader
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
|
||||
# Load and process the text
|
||||
loader = TextLoader('state_of_the_union.txt')
|
||||
documents = loader.load()
|
||||
|
||||
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
|
||||
texts = text_splitter.split_documents(documents)
|
||||
|
||||
# Embed and store the texts
|
||||
# Supplying a persist_directory will store the embeddings on disk
|
||||
persist_directory = 'db'
|
||||
|
||||
embedding = OpenAIEmbeddings()
|
||||
|
||||
# Now we can load the persisted database from disk, and use it as normal.
|
||||
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
|
||||
qa = VectorDBQA.from_chain_type(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path), chain_type="stuff", vectorstore=vectordb)
|
||||
|
||||
query = "What the president said about taxes ?"
|
||||
print(qa.run(query))
|
||||
|
||||
4
examples/langchain-chroma/requirements.txt
Normal file
4
examples/langchain-chroma/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
langchain==0.0.160
|
||||
openai==0.27.6
|
||||
chromadb==0.3.21
|
||||
llama-index==0.6.2
|
||||
28
examples/langchain-chroma/store.py
Executable file
28
examples/langchain-chroma/store.py
Executable file
@@ -0,0 +1,28 @@
|
||||
|
||||
import os
|
||||
from langchain.vectorstores import Chroma
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter,TokenTextSplitter,CharacterTextSplitter
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import VectorDBQA
|
||||
from langchain.document_loaders import TextLoader
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
|
||||
# Load and process the text
|
||||
loader = TextLoader('state_of_the_union.txt')
|
||||
documents = loader.load()
|
||||
|
||||
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
|
||||
#text_splitter = TokenTextSplitter()
|
||||
texts = text_splitter.split_documents(documents)
|
||||
|
||||
# Embed and store the texts
|
||||
# Supplying a persist_directory will store the embeddings on disk
|
||||
persist_directory = 'db'
|
||||
|
||||
embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
|
||||
vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory)
|
||||
|
||||
vectordb.persist()
|
||||
vectordb = None
|
||||
7
go.mod
7
go.mod
@@ -3,11 +3,11 @@ module github.com/go-skynet/LocalAI
|
||||
go 1.19
|
||||
|
||||
require (
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a
|
||||
github.com/go-audio/wav v1.1.0
|
||||
github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b
|
||||
@@ -18,7 +18,7 @@ require (
|
||||
github.com/otiai10/copy v1.11.0
|
||||
github.com/otiai10/openaigo v1.1.0
|
||||
github.com/rs/zerolog v1.29.1
|
||||
github.com/sashabaranov/go-openai v1.9.3
|
||||
github.com/sashabaranov/go-openai v1.9.4
|
||||
github.com/swaggo/swag v1.16.1
|
||||
github.com/urfave/cli/v2 v2.25.3
|
||||
github.com/valyala/fasthttp v1.47.0
|
||||
@@ -49,6 +49,7 @@ require (
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.18 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.14 // indirect
|
||||
github.com/nomic/gpt4all/gpt4all-bindings/golang v0.0.0-00010101000000-000000000000 // indirect
|
||||
github.com/philhofer/fwd v1.1.2 // indirect
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
|
||||
6
go.sum
6
go.sum
@@ -18,6 +18,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2 h1:YNbUAyIRtaLODitigJU1EM5ubmMu5FmHtYAayJD6Vbg=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a h1:MlyiDLNCM/wjbv8U5Elj18NvaAgl61SGiRUpqQz5dfs=
|
||||
@@ -44,6 +46,8 @@ github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f h1:GW8RQa1RVe
|
||||
github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea h1:8Isk9D+Auth5OuXVAQPC3MO+5zF/2S7mvs2JZLw6a+8=
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557 h1:LD66fKtvP2lmyuuKL8pBat/pVTKUbLs3L5fM/5lyi4w=
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6 h1:XshpypO6ekU09CI19vuzke2a1Es1lV5ZaxA7CUehu0E=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
|
||||
@@ -115,6 +119,8 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
|
||||
github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9MyPBfZM=
|
||||
github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
|
||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
|
||||
|
||||
8
main.go
8
main.go
@@ -62,6 +62,12 @@ func main() {
|
||||
EnvVars: []string{"CONTEXT_SIZE"},
|
||||
Value: 512,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: "upload-limit",
|
||||
DefaultText: "Default upload-limit. MB",
|
||||
EnvVars: []string{"UPLOAD_LIMIT"},
|
||||
Value: 15,
|
||||
},
|
||||
},
|
||||
Description: `
|
||||
LocalAI is a drop-in replacement OpenAI API which runs inference locally.
|
||||
@@ -81,7 +87,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
|
||||
Copyright: "go-skynet authors",
|
||||
Action: func(ctx *cli.Context) error {
|
||||
fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
|
||||
return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
|
||||
return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("upload-limit"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
182
pkg/model/initializers.go
Normal file
182
pkg/model/initializers.go
Normal file
@@ -0,0 +1,182 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
rwkv "github.com/donomii/go-rwkv.cpp"
|
||||
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
bloomz "github.com/go-skynet/bloomz.cpp"
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
const tokenizerSuffix = ".tokenizer.json"
|
||||
|
||||
const (
|
||||
LlamaBackend = "llama"
|
||||
BloomzBackend = "bloomz"
|
||||
StarcoderBackend = "starcoder"
|
||||
StableLMBackend = "stablelm"
|
||||
DollyBackend = "dolly"
|
||||
RedPajamaBackend = "redpajama"
|
||||
GPTNeoXBackend = "gptneox"
|
||||
ReplitBackend = "replit"
|
||||
Gpt2Backend = "gpt2"
|
||||
Gpt4AllLlamaBackend = "gpt4all-llama"
|
||||
Gpt4AllMptBackend = "gpt4all-mpt"
|
||||
Gpt4AllJBackend = "gpt4all-j"
|
||||
BertEmbeddingsBackend = "bert-embeddings"
|
||||
RwkvBackend = "rwkv"
|
||||
WhisperBackend = "whisper"
|
||||
)
|
||||
|
||||
var backends []string = []string{
|
||||
LlamaBackend,
|
||||
Gpt4AllLlamaBackend,
|
||||
Gpt4AllMptBackend,
|
||||
Gpt4AllJBackend,
|
||||
Gpt2Backend,
|
||||
WhisperBackend,
|
||||
RwkvBackend,
|
||||
BloomzBackend,
|
||||
StableLMBackend,
|
||||
DollyBackend,
|
||||
RedPajamaBackend,
|
||||
GPTNeoXBackend,
|
||||
ReplitBackend,
|
||||
BertEmbeddingsBackend,
|
||||
StarcoderBackend,
|
||||
}
|
||||
|
||||
var starCoder = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewStarcoder(modelFile)
|
||||
}
|
||||
|
||||
var redPajama = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewRedPajama(modelFile)
|
||||
}
|
||||
|
||||
var dolly = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewDolly(modelFile)
|
||||
}
|
||||
|
||||
var gptNeoX = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewGPTNeoX(modelFile)
|
||||
}
|
||||
|
||||
var replit = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewReplit(modelFile)
|
||||
}
|
||||
|
||||
var stableLM = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewStableLM(modelFile)
|
||||
}
|
||||
|
||||
var bertEmbeddings = func(modelFile string) (interface{}, error) {
|
||||
return bert.New(modelFile)
|
||||
}
|
||||
|
||||
var bloomzLM = func(modelFile string) (interface{}, error) {
|
||||
return bloomz.New(modelFile)
|
||||
}
|
||||
var gpt2LM = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.New(modelFile)
|
||||
}
|
||||
|
||||
var whisperModel = func(modelFile string) (interface{}, error) {
|
||||
return whisper.New(modelFile)
|
||||
}
|
||||
|
||||
func llamaLM(opts ...llama.ModelOption) func(string) (interface{}, error) {
|
||||
return func(s string) (interface{}, error) {
|
||||
return llama.New(s, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
func gpt4allLM(opts ...gpt4all.ModelOption) func(string) (interface{}, error) {
|
||||
return func(s string) (interface{}, error) {
|
||||
return gpt4all.New(s, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) {
|
||||
return func(s string) (interface{}, error) {
|
||||
model := rwkv.LoadFiles(s, tokenFile, threads)
|
||||
if model == nil {
|
||||
return nil, fmt.Errorf("could not load model")
|
||||
}
|
||||
return model, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
|
||||
switch strings.ToLower(backendString) {
|
||||
case LlamaBackend:
|
||||
return ml.LoadModel(modelFile, llamaLM(llamaOpts...))
|
||||
case BloomzBackend:
|
||||
return ml.LoadModel(modelFile, bloomzLM)
|
||||
case StableLMBackend:
|
||||
return ml.LoadModel(modelFile, stableLM)
|
||||
case DollyBackend:
|
||||
return ml.LoadModel(modelFile, dolly)
|
||||
case RedPajamaBackend:
|
||||
return ml.LoadModel(modelFile, redPajama)
|
||||
case Gpt2Backend:
|
||||
return ml.LoadModel(modelFile, gpt2LM)
|
||||
case GPTNeoXBackend:
|
||||
return ml.LoadModel(modelFile, gptNeoX)
|
||||
case ReplitBackend:
|
||||
return ml.LoadModel(modelFile, replit)
|
||||
case StarcoderBackend:
|
||||
return ml.LoadModel(modelFile, starCoder)
|
||||
case Gpt4AllLlamaBackend:
|
||||
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)))
|
||||
case Gpt4AllMptBackend:
|
||||
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType)))
|
||||
case Gpt4AllJBackend:
|
||||
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType)))
|
||||
case BertEmbeddingsBackend:
|
||||
return ml.LoadModel(modelFile, bertEmbeddings)
|
||||
case RwkvBackend:
|
||||
return ml.LoadModel(modelFile, rwkvLM(modelFile+tokenizerSuffix, threads))
|
||||
case WhisperBackend:
|
||||
return ml.LoadModel(modelFile, whisperModel)
|
||||
default:
|
||||
return nil, fmt.Errorf("backend unsupported: %s", backendString)
|
||||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (interface{}, error) {
|
||||
log.Debug().Msgf("Loading models greedly")
|
||||
|
||||
ml.mu.Lock()
|
||||
m, exists := ml.models[modelFile]
|
||||
if exists {
|
||||
ml.mu.Unlock()
|
||||
return m, nil
|
||||
}
|
||||
ml.mu.Unlock()
|
||||
var err error
|
||||
|
||||
for _, b := range backends {
|
||||
if b == BloomzBackend || b == WhisperBackend || b == RwkvBackend { // do not autoload bloomz/whisper/rwkv
|
||||
continue
|
||||
}
|
||||
log.Debug().Msgf("[%s] Attempting to load", b)
|
||||
model, modelerr := ml.BackendLoader(b, modelFile, llamaOpts, threads)
|
||||
if modelerr == nil && model != nil {
|
||||
log.Debug().Msgf("[%s] Loads OK", b)
|
||||
return model, nil
|
||||
} else if modelerr != nil {
|
||||
err = multierror.Append(err, modelerr)
|
||||
log.Debug().Msgf("[%s] Fails: %s", b, modelerr.Error())
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
||||
}
|
||||
@@ -10,14 +10,6 @@ import (
|
||||
"sync"
|
||||
"text/template"
|
||||
|
||||
rwkv "github.com/donomii/go-rwkv.cpp"
|
||||
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
bloomz "github.com/go-skynet/bloomz.cpp"
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
@@ -25,33 +17,15 @@ type ModelLoader struct {
|
||||
ModelPath string
|
||||
mu sync.Mutex
|
||||
// TODO: this needs generics
|
||||
models map[string]*llama.LLama
|
||||
gptmodels map[string]*gptj.GPTJ
|
||||
gpt2models map[string]*gpt2.GPT2
|
||||
gptstablelmmodels map[string]*gpt2.StableLM
|
||||
dollymodels map[string]*gpt2.Dolly
|
||||
redpajama map[string]*gpt2.RedPajama
|
||||
rwkv map[string]*rwkv.RwkvState
|
||||
bloomz map[string]*bloomz.Bloomz
|
||||
bert map[string]*bert.Bert
|
||||
promptsTemplates map[string]*template.Template
|
||||
whisperModels map[string]whisper.Model
|
||||
models map[string]interface{}
|
||||
promptsTemplates map[string]*template.Template
|
||||
}
|
||||
|
||||
func NewModelLoader(modelPath string) *ModelLoader {
|
||||
return &ModelLoader{
|
||||
ModelPath: modelPath,
|
||||
gpt2models: make(map[string]*gpt2.GPT2),
|
||||
gptmodels: make(map[string]*gptj.GPTJ),
|
||||
gptstablelmmodels: make(map[string]*gpt2.StableLM),
|
||||
dollymodels: make(map[string]*gpt2.Dolly),
|
||||
redpajama: make(map[string]*gpt2.RedPajama),
|
||||
models: make(map[string]*llama.LLama),
|
||||
rwkv: make(map[string]*rwkv.RwkvState),
|
||||
bloomz: make(map[string]*bloomz.Bloomz),
|
||||
bert: make(map[string]*bert.Bert),
|
||||
promptsTemplates: make(map[string]*template.Template),
|
||||
whisperModels: make(map[string]whisper.Model),
|
||||
ModelPath: modelPath,
|
||||
models: make(map[string]interface{}),
|
||||
promptsTemplates: make(map[string]*template.Template),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -136,271 +110,11 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadRedPajama(modelName string) (*gpt2.RedPajama, error) {
|
||||
func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (interface{}, error)) (interface{}, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.redpajama[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gpt2.NewRedPajama(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.redpajama[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadDollyModel(modelName string) (*gpt2.Dolly, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.dollymodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gpt2.NewDolly(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.dollymodels[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gpt2.NewStableLM(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.gptstablelmmodels[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadBERT(modelName string) (*bert.Bert, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.bert[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := bert.New(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.bert[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadBloomz(modelName string) (*bloomz.Bloomz, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.bloomz[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := bloomz.New(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.bloomz[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.gpt2models[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gpt2.New(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.gpt2models[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.gptmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gptj.New(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.gptmodels[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (*rwkv.RwkvState, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
log.Debug().Msgf("Loading model name: %s", modelName)
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.rwkv[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
tokenPath := filepath.Join(ml.ModelPath, tokenFile)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model := rwkv.LoadFiles(modelFile, tokenPath, threads)
|
||||
if model == nil {
|
||||
return nil, fmt.Errorf("could not load model")
|
||||
}
|
||||
|
||||
ml.rwkv[modelName] = model
|
||||
return model, nil
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
log.Debug().Msgf("Loading model name: %s", modelName)
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.models[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
@@ -410,7 +124,7 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := llama.New(modelFile, opts...)
|
||||
model, err := loader(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -421,162 +135,5 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
|
||||
}
|
||||
|
||||
ml.models[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadWhisperModel(modelName string) (whisper.Model, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist -- %s", modelName)
|
||||
}
|
||||
|
||||
if m, ok := ml.whisperModels[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := whisper.New(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.whisperModels[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
const tokenizerSuffix = ".tokenizer.json"
|
||||
|
||||
var loadedModels map[string]interface{} = map[string]interface{}{}
|
||||
var muModels sync.Mutex
|
||||
|
||||
func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
|
||||
switch strings.ToLower(backendString) {
|
||||
case "llama":
|
||||
return ml.LoadLLaMAModel(modelFile, llamaOpts...)
|
||||
case "bloomz":
|
||||
return ml.LoadBloomz(modelFile)
|
||||
case "stablelm":
|
||||
return ml.LoadStableLMModel(modelFile)
|
||||
case "dolly":
|
||||
return ml.LoadDollyModel(modelFile)
|
||||
case "redpajama":
|
||||
return ml.LoadRedPajama(modelFile)
|
||||
case "gpt2":
|
||||
return ml.LoadGPT2Model(modelFile)
|
||||
case "gptj":
|
||||
return ml.LoadGPTJModel(modelFile)
|
||||
case "bert-embeddings":
|
||||
return ml.LoadBERT(modelFile)
|
||||
case "rwkv":
|
||||
return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
|
||||
default:
|
||||
return nil, fmt.Errorf("backend unsupported: %s", backendString)
|
||||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) WhisperLoader(backendString string, modelFile string) (model whisper.Model, err error) {
|
||||
//TODO expose more whisper options in next PR
|
||||
switch strings.ToLower(backendString) {
|
||||
case "whisper":
|
||||
return ml.LoadWhisperModel(modelFile)
|
||||
default:
|
||||
return nil, fmt.Errorf("whisper backend unsupported: %s", backendString)
|
||||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
|
||||
updateModels := func(model interface{}) {
|
||||
muModels.Lock()
|
||||
defer muModels.Unlock()
|
||||
loadedModels[modelFile] = model
|
||||
}
|
||||
|
||||
muModels.Lock()
|
||||
m, exists := loadedModels[modelFile]
|
||||
if exists {
|
||||
muModels.Unlock()
|
||||
return m, nil
|
||||
}
|
||||
muModels.Unlock()
|
||||
|
||||
model, modelerr := ml.LoadLLaMAModel(modelFile, llamaOpts...)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadGPTJModel(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadGPT2Model(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadStableLMModel(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadDollyModel(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadRedPajama(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadBloomz(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = ml.LoadBERT(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
||||
return model, nil
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ func audioToWav(src, dst string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func Transcript(model whisper.Model, audiopath, language string) (string, error) {
|
||||
func Transcript(model whisper.Model, audiopath, language string, threads uint) (string, error) {
|
||||
|
||||
dir, err := os.MkdirTemp("", "whisper")
|
||||
if err != nil {
|
||||
@@ -65,8 +65,12 @@ func Transcript(model whisper.Model, audiopath, language string) (string, error)
|
||||
|
||||
}
|
||||
|
||||
context.SetThreads(threads)
|
||||
|
||||
if language != "" {
|
||||
context.SetLanguage(language)
|
||||
} else {
|
||||
context.SetLanguage("auto")
|
||||
}
|
||||
|
||||
if err := context.Process(data, nil); err != nil {
|
||||
|
||||
6
tests/fixtures/embeddings.yaml
vendored
Normal file
6
tests/fixtures/embeddings.yaml
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: bert
|
||||
threads: 14
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
4
tests/fixtures/whisper.yaml
vendored
Normal file
4
tests/fixtures/whisper.yaml
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
name: whisper-1
|
||||
backend: whisper
|
||||
parameters:
|
||||
model: whisper-en
|
||||
Reference in New Issue
Block a user