mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
70 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
de36a48861 | ||
|
|
961ca93219 | ||
|
|
557ccc5ad8 | ||
|
|
2488c445b6 | ||
|
|
b4241d0a0d | ||
|
|
8250391e49 | ||
|
|
fd1df4e971 | ||
|
|
5115b2faa3 | ||
|
|
93e82a8bf4 | ||
|
|
4413defca5 | ||
|
|
f359e1c6c4 | ||
|
|
1bc87d582d | ||
|
|
a86a383357 | ||
|
|
16f02c7b30 | ||
|
|
fe2706890c | ||
|
|
85f0f8227d | ||
|
|
59e3c02002 | ||
|
|
032dee256f | ||
|
|
6b5e2b2bf5 | ||
|
|
6fc303de87 | ||
|
|
6ad6e4873d | ||
|
|
d6d7391da8 | ||
|
|
11675932ac | ||
|
|
f02202e1e1 | ||
|
|
f8ee20991c | ||
|
|
e6db14e2f1 | ||
|
|
d00886abea | ||
|
|
4873d2bfa1 | ||
|
|
9f426578cf | ||
|
|
9d01b695a8 | ||
|
|
93829ab228 | ||
|
|
dd234f86d5 | ||
|
|
3daff6f1aa | ||
|
|
89dfa0f5fc | ||
|
|
bc03c492a0 | ||
|
|
f50a4c1454 | ||
|
|
d13d4d95ce | ||
|
|
428790ec06 | ||
|
|
4f551ce414 | ||
|
|
6ed7b10273 | ||
|
|
02979566ee | ||
|
|
cbdcc839f3 | ||
|
|
e1c8f087f4 | ||
|
|
3a90ea44a5 | ||
|
|
e55492475d | ||
|
|
07ec2e441d | ||
|
|
38d7e0b43c | ||
|
|
3411bfd00d | ||
|
|
7e5fe35ae4 | ||
|
|
8c8cf38d4d | ||
|
|
75b25297fd | ||
|
|
009ee47fe2 | ||
|
|
ec2adc2c03 | ||
|
|
ad301e6ed7 | ||
|
|
d094381e5d | ||
|
|
3ff9bbd217 | ||
|
|
e62ee2bc06 | ||
|
|
b49721cdd1 | ||
|
|
64c0a7967f | ||
|
|
e96eadab40 | ||
|
|
e73283121b | ||
|
|
857d13e8d6 | ||
|
|
91db3d4d5c | ||
|
|
961cf29217 | ||
|
|
c839b334eb | ||
|
|
714bfcd45b | ||
|
|
77ce8b953e | ||
|
|
01ada95941 | ||
|
|
eabdc5042a | ||
|
|
96267d9437 |
12
.github/workflows/bump_deps.yaml
vendored
12
.github/workflows/bump_deps.yaml
vendored
@@ -21,6 +21,18 @@ jobs:
|
||||
- repository: "donomii/go-rwkv.cpp"
|
||||
variable: "RWKV_VERSION"
|
||||
branch: "main"
|
||||
- repository: "ggerganov/whisper.cpp"
|
||||
variable: "WHISPER_CPP_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/go-bert.cpp"
|
||||
variable: "BERT_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/bloomz.cpp"
|
||||
variable: "BLOOMZ_VERSION"
|
||||
branch: "main"
|
||||
- repository: "go-skynet/gpt4all"
|
||||
variable: "GPT4ALL_VERSION"
|
||||
branch: "main"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
- name: Test
|
||||
run: |
|
||||
make test
|
||||
@@ -38,7 +38,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew update
|
||||
brew install sdl2
|
||||
brew install sdl2 ffmpeg
|
||||
- name: Test
|
||||
run: |
|
||||
make test
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -2,6 +2,8 @@
|
||||
go-llama
|
||||
go-gpt4all-j
|
||||
go-gpt2
|
||||
go-rwkv
|
||||
whisper.cpp
|
||||
|
||||
# LocalAI build binary
|
||||
LocalAI
|
||||
@@ -11,4 +13,7 @@ local-ai
|
||||
|
||||
# Ignore models
|
||||
models/*
|
||||
test-models/
|
||||
test-models/
|
||||
|
||||
# just in case
|
||||
.DS_Store
|
||||
|
||||
119
Makefile
119
Makefile
@@ -3,11 +3,16 @@ GOTEST=$(GOCMD) test
|
||||
GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
|
||||
GOLLAMA_VERSION?=llama.cpp-f4cef87
|
||||
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
|
||||
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
|
||||
GOLLAMA_VERSION?=70593fccbe4b01dedaab805b0f25cb58192c7b38
|
||||
GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
|
||||
GPT4ALL_VERSION?=a330bfe26e9e35ca402e16df18973a3b162fb4db
|
||||
GOGPT2_VERSION?=92421a8cf61ed6e03babd9067af292b094cb1307
|
||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58
|
||||
RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
|
||||
WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993
|
||||
BERT_VERSION?=ac22f8f74aec5e31bc46242c17e7d511f127856b
|
||||
BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
|
||||
|
||||
|
||||
GREEN := $(shell tput -Txterm setaf 2)
|
||||
YELLOW := $(shell tput -Txterm setaf 3)
|
||||
@@ -15,8 +20,8 @@ WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv
|
||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
|
||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
|
||||
|
||||
# Use this if you want to set the default behavior
|
||||
ifndef BUILD_TYPE
|
||||
@@ -33,30 +38,63 @@ endif
|
||||
|
||||
all: help
|
||||
|
||||
## GPT4ALL-J
|
||||
go-gpt4all-j:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
|
||||
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION) && git submodule update --init --recursive --depth 1
|
||||
## GPT4ALL
|
||||
gpt4all:
|
||||
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
||||
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
||||
@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
||||
mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h
|
||||
|
||||
## BERT embeddings
|
||||
go-bert:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert
|
||||
cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||
@find ./go-bert -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
|
||||
@find ./go-bert -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
|
||||
@find ./go-bert -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
|
||||
|
||||
## RWKV
|
||||
go-rwkv:
|
||||
git clone --recurse-submodules $(RWKV_REPO) go-rwkv
|
||||
cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||
@find ./go-rwkv -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
|
||||
@find ./go-rwkv -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
|
||||
@find ./go-rwkv -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
|
||||
|
||||
go-rwkv/librwkv.a: go-rwkv
|
||||
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. && cp ggml/src/libggml.a ..
|
||||
|
||||
go-gpt4all-j/libgptj.a: go-gpt4all-j
|
||||
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
|
||||
## bloomz
|
||||
bloomz:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
|
||||
@find ./bloomz -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
|
||||
@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
|
||||
@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
|
||||
@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
|
||||
@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
|
||||
|
||||
bloomz/libbloomz.a: bloomz
|
||||
cd bloomz && make libbloomz.a
|
||||
|
||||
go-bert/libgobert.a: go-bert
|
||||
$(MAKE) -C go-bert libgobert.a
|
||||
|
||||
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
|
||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ $(GENERIC_PREFIX)libgpt4all.a
|
||||
|
||||
## CEREBRAS GPT
|
||||
go-gpt2:
|
||||
@@ -66,13 +104,24 @@ go-gpt2:
|
||||
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
|
||||
|
||||
go-gpt2/libgpt2.a: go-gpt2
|
||||
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
|
||||
|
||||
whisper.cpp:
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||
cd whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
whisper.cpp/libwhisper.a: whisper.cpp
|
||||
cd whisper.cpp && make libwhisper.a
|
||||
|
||||
go-llama:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||
@@ -82,28 +131,36 @@ go-llama/libbinding.a: go-llama
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
|
||||
$(GOCMD) mod edit -replace github.com/nomic/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
|
||||
|
||||
prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv
|
||||
prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz replace
|
||||
$(GOCMD) mod download
|
||||
|
||||
## GENERIC
|
||||
rebuild: ## Rebuilds the project
|
||||
$(MAKE) -C go-llama clean
|
||||
$(MAKE) -C go-gpt4all-j clean
|
||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
|
||||
$(MAKE) -C go-gpt2 clean
|
||||
$(MAKE) -C go-rwkv clean
|
||||
$(MAKE) -C whisper.cpp clean
|
||||
$(MAKE) -C go-bert clean
|
||||
$(MAKE) -C bloomz clean
|
||||
$(MAKE) build
|
||||
|
||||
prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a go-rwkv/librwkv.a replace ## Prepares for building
|
||||
prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building
|
||||
|
||||
clean: ## Remove build related file
|
||||
rm -fr ./go-llama
|
||||
rm -rf ./go-gpt4all-j
|
||||
rm -rf ./gpt4all
|
||||
rm -rf ./go-gpt2
|
||||
rm -rf ./go-rwkv
|
||||
rm -rf ./go-bert
|
||||
rm -rf ./bloomz
|
||||
rm -rf $(BINARY_NAME)
|
||||
|
||||
## Build:
|
||||
@@ -111,7 +168,7 @@ clean: ## Remove build related file
|
||||
build: prepare ## Build the project
|
||||
$(info ${GREEN}I local-ai build info:${RESET})
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -x -o $(BINARY_NAME) ./
|
||||
|
||||
generic-build: ## Build the project using generic
|
||||
BUILD_TYPE="generic" $(MAKE) build
|
||||
@@ -122,12 +179,16 @@ run: prepare ## run local-ai
|
||||
|
||||
test-models/testmodel:
|
||||
mkdir test-models
|
||||
mkdir test-dir
|
||||
wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
|
||||
wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
|
||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
|
||||
wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
|
||||
cp tests/fixtures/* test-models
|
||||
|
||||
test: prepare test-models/testmodel
|
||||
cp tests/fixtures/* test-models
|
||||
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v -timeout 30m ./...
|
||||
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api
|
||||
|
||||
## Help:
|
||||
help: ## Show this help.
|
||||
|
||||
126
README.md
126
README.md
@@ -9,30 +9,48 @@
|
||||
|
||||
[](https://discord.gg/uJAeKSAGDy)
|
||||
|
||||
**LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is licensed under Apache 2.0.
|
||||
**LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware, supporting multiple models families. Supports also GPT4ALL-J which is licensed under Apache 2.0.
|
||||
|
||||
- OpenAI compatible API
|
||||
- Supports multiple-models
|
||||
- Supports multiple models
|
||||
- Once loaded the first time, it keep models loaded in memory for faster inference
|
||||
- Support for prompt templates
|
||||
- Doesn't shell-out, but uses C bindings for a faster inference and better performance.
|
||||
|
||||
LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
|
||||
|
||||
### News
|
||||
LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp), [ggml](https://github.com/ggerganov/ggml), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) for audio transcriptions, and [bert.cpp](https://github.com/skeskinen/bert.cpp) for embedding.
|
||||
|
||||
See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/).
|
||||
|
||||
## News
|
||||
|
||||
- 12-05-2023: __v1.10.0__ released! 🔥🔥 Updated `gpt4all` bindings. Added support for GPTNeox (experimental), RedPajama (experimental), Starcoder (experimental), Replit (experimental), MosaicML MPT. Also now `embeddings` endpoint supports tokens arrays. See the [langchain-chroma](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-chroma) example! Note - this update does NOT include https://github.com/ggerganov/llama.cpp/pull/1405 which makes models incompatible.
|
||||
- 11-05-2023: __v1.9.0__ released! 🔥 Important whisper updates ( https://github.com/go-skynet/LocalAI/pull/233 https://github.com/go-skynet/LocalAI/pull/229 ) and extended gpt4all model families support ( https://github.com/go-skynet/LocalAI/pull/232 ). Redpajama/dolly experimental ( https://github.com/go-skynet/LocalAI/pull/214 )
|
||||
- 10-05-2023: __v1.8.0__ released! 🔥 Added support for fast and accurate embeddings with `bert.cpp` ( https://github.com/go-skynet/LocalAI/pull/222 )
|
||||
- 09-05-2023: Added experimental support for transcriptions endpoint ( https://github.com/go-skynet/LocalAI/pull/211 )
|
||||
- 08-05-2023: Support for embeddings with models using the `llama.cpp` backend ( https://github.com/go-skynet/LocalAI/pull/207 )
|
||||
- 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
|
||||
- 01-05-2023: Support for SSE stream of tokens in `llama.cpp` backends ( https://github.com/go-skynet/LocalAI/pull/152 )
|
||||
|
||||
### Socials and community chatter
|
||||
Twitter: [@LocalAI_API](https://twitter.com/LocalAI_API) and [@mudler_it](https://twitter.com/mudler_it)
|
||||
|
||||
- Follow [@LocalAI_API](https://twitter.com/LocalAI_API) on twitter.
|
||||
### Blogs and articles
|
||||
|
||||
- [Reddit post](https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/) about LocalAI.
|
||||
- [Question Answering on Documents locally with LangChain, LocalAI, Chroma, and GPT4All](https://mudler.pm/posts/localai-question-answering/) by Ettore Di Giacinto
|
||||
- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters. by Tyller Gillson
|
||||
|
||||
## Contribute and help
|
||||
|
||||
To help the project you can:
|
||||
|
||||
- Upvote the [Reddit post](https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/) about LocalAI.
|
||||
|
||||
- [Hacker news post](https://news.ycombinator.com/item?id=35726934) - help us out by voting if you like this project.
|
||||
|
||||
- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters.
|
||||
- If you have technological skills and want to contribute to development, have a look at the open issues. If you are new you can have a look at the [good-first-issue](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) and [help-wanted](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels.
|
||||
|
||||
- If you don't have technological skills you can still help improving documentation or add examples or share your user-stories with our community, any help and contribution is welcome!
|
||||
|
||||
## Model compatibility
|
||||
|
||||
@@ -41,32 +59,24 @@ It is compatible with the models supported by [llama.cpp](https://github.com/gge
|
||||
Tested with:
|
||||
- Vicuna
|
||||
- Alpaca
|
||||
- [GPT4ALL](https://github.com/nomic-ai/gpt4all)
|
||||
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin)
|
||||
- [GPT4ALL](https://github.com/nomic-ai/gpt4all) (changes required, see below)
|
||||
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin) (no changes required)
|
||||
- Koala
|
||||
- [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
|
||||
- WizardLM
|
||||
- [RWKV](https://github.com/BlinkDL/RWKV-LM) models with [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
|
||||
|
||||
### Vicuna, Alpaca, LLaMa...
|
||||
|
||||
[llama.cpp](https://github.com/ggerganov/llama.cpp) based models are compatible
|
||||
|
||||
### GPT4ALL
|
||||
|
||||
Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
|
||||
|
||||
### GPT4ALL-J
|
||||
|
||||
No changes required to the model.
|
||||
|
||||
### RWKV
|
||||
|
||||
<details>
|
||||
|
||||
A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
|
||||
|
||||
Note: rwkv models have an associated tokenizer along that needs to be provided with it:
|
||||
Note: rwkv models needs to specify the backend `rwkv` in the YAML config files and have an associated tokenizer along that needs to be provided with it:
|
||||
|
||||
```
|
||||
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
|
||||
@@ -84,6 +94,30 @@ It should also be compatible with StableLM and GPTNeoX ggml models (untested).
|
||||
Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
|
||||
|
||||
|
||||
### Model compatibility table
|
||||
|
||||
<details>
|
||||
|
||||
| Backend | Compatible models | Completion/Chat endpoint | Audio transcription | Embeddings support | Token stream support | Github | Bindings |
|
||||
|-----------------|-----------------------|--------------------------|---------------------|-----------------------------------|----------------------|--------------------------------------------|-------------------------------------------|
|
||||
| llama | Vicuna, Alpaca, LLaMa | yes | no | yes (doesn't seem to be accurate) | yes | https://github.com/ggerganov/llama.cpp | https://github.com/go-skynet/go-llama.cpp |
|
||||
| gpt4all-llama | Vicuna, Alpaca, LLaMa | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
|
||||
| gpt4all-mpt | MPT | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
|
||||
| gpt4all-j | GPT4ALL-J | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
|
||||
| gpt2 | GPT/NeoX, Cerebras | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| dolly | Dolly | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| redpajama | RedPajama | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| stableLM | StableLM GPT/NeoX | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| replit | Replit | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| gptneox | GPT NeoX | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| starcoder | Starcoder | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| bloomz | Bloom | yes | no | no | no | https://github.com/NouamaneTazi/bloomz.cpp | https://github.com/go-skynet/bloomz.cpp |
|
||||
| rwkv | RWKV | yes | no | no | yes | https://github.com/saharNooby/rwkv.cpp | https://github.com/donomii/go-rwkv.cpp |
|
||||
| bert-embeddings | bert | no | no | yes | no | https://github.com/skeskinen/bert.cpp | https://github.com/go-skynet/go-bert.cpp |
|
||||
| whisper | whisper | no | yes | no | no | https://github.com/ggerganov/whisper.cpp | https://github.com/ggerganov/whisper.cpp |
|
||||
|
||||
</details>
|
||||
|
||||
## Usage
|
||||
|
||||
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
|
||||
@@ -162,9 +196,7 @@ To build locally, run `make build` (see below).
|
||||
|
||||
### Other examples
|
||||
|
||||

|
||||
|
||||
To see other examples on how to integrate with other projects for instance chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).
|
||||
To see other examples on how to integrate with other projects for instance for question answering or for using it with chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).
|
||||
|
||||
|
||||
### Advanced configuration
|
||||
@@ -529,6 +561,52 @@ curl http://localhost:8080/v1/models
|
||||
|
||||
</details>
|
||||
|
||||
### Embeddings
|
||||
|
||||
<details>
|
||||
|
||||
The embedding endpoint is experimental and enabled only if the model is configured with `embeddings: true` in its `yaml` file, for example:
|
||||
|
||||
```yaml
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: bert
|
||||
embeddings: true
|
||||
backend: "bert-embeddings"
|
||||
```
|
||||
|
||||
There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
|
||||
|
||||
Note: embeddings is supported only with `llama.cpp` compatible models and `bert` models. bert is more performant and available independently of the LLM model.
|
||||
|
||||
</details>
|
||||
|
||||
### Transcriptions endpoint
|
||||
|
||||
<details>
|
||||
|
||||
Note: requires ffmpeg in the container image, which is currently not shipped due to licensing issues. We will prepare separated images with ffmpeg. (stay tuned!)
|
||||
|
||||
Download one of the models from https://huggingface.co/ggerganov/whisper.cpp/tree/main in the `models` folder, and create a YAML file for your model:
|
||||
|
||||
```yaml
|
||||
name: whisper-1
|
||||
backend: whisper
|
||||
parameters:
|
||||
model: whisper-en
|
||||
```
|
||||
|
||||
The transcriptions endpoint then can be tested like so:
|
||||
```
|
||||
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
||||
|
||||
curl http://localhost:8080/v1/audio/transcriptions -H "Content-Type: multipart/form-data" -F file="@$PWD/gb1.ogg" -F model="whisper-1"
|
||||
|
||||
{"text":"My fellow Americans, this day has brought terrible news and great sadness to our country.At nine o'clock this morning, Mission Control in Houston lost contact with our Space ShuttleColumbia.A short time later, debris was seen falling from the skies above Texas.The Columbia's lost.There are no survivors.One board was a crew of seven.Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark, Captain DavidBrown, Commander William McCool, Dr. Kultna Shavla, and Elon Ramon, a colonel in the IsraeliAir Force.These men and women assumed great risk in the service to all humanity.In an age when spaceflight has come to seem almost routine, it is easy to overlook thedangers of travel by rocket and the difficulties of navigating the fierce outer atmosphere ofthe Earth.These astronauts knew the dangers, and they faced them willingly, knowing they had a highand noble purpose in life.Because of their courage and daring and idealism, we will miss them all the more.All Americans today are thinking as well of the families of these men and women who havebeen given this sudden shock and grief.You're not alone.Our entire nation agrees with you, and those you loved will always have the respect andgratitude of this country.The cause in which they died will continue.Mankind has led into the darkness beyond our world by the inspiration of discovery andthe longing to understand.Our journey into space will go on.In the skies today, we saw destruction and tragedy.As farther than we can see, there is comfort and hope.In the words of the prophet Isaiah, \"Lift your eyes and look to the heavens who createdall these, he who brings out the starry hosts one by one and calls them each by name.\"Because of his great power and mighty strength, not one of them is missing.The same creator who names the stars also knows the names of the seven souls we mourntoday.The crew of the shuttle Columbia did not return safely to Earth yet we can pray that all aresafely home.May God bless the grieving families and may God continue to bless America.[BLANK_AUDIO]"}
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Frequently asked questions
|
||||
|
||||
Here are answers to some of the most common questions.
|
||||
@@ -572,7 +650,7 @@ Not currently, as ggml doesn't support GPUs yet: https://github.com/ggerganov/ll
|
||||
### Where is the webUI?
|
||||
|
||||
<details>
|
||||
We are working on to have a good out of the box experience - however as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
|
||||
There is the availability of localai-webui and chatbot-ui in the examples section and can be setup as per the instructions. However as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
|
||||
|
||||
</details>
|
||||
|
||||
@@ -590,6 +668,7 @@ Feel free to open up a PR to get your project listed!
|
||||
|
||||
- [Kairos](https://github.com/kairos-io/kairos)
|
||||
- [k8sgpt](https://github.com/k8sgpt-ai/k8sgpt#running-local-models)
|
||||
- [Spark](https://github.com/cedriking/spark)
|
||||
|
||||
## Blog posts and other articles
|
||||
|
||||
@@ -612,7 +691,7 @@ Feel free to open up a PR to get your project listed!
|
||||
|
||||
## License
|
||||
|
||||
LocalAI is a community-driven project. It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
|
||||
LocalAI is a community-driven project. It was initially created by [Ettore Di Giacinto](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
|
||||
|
||||
MIT
|
||||
|
||||
@@ -621,6 +700,7 @@ MIT
|
||||
- [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
|
||||
- [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp)
|
||||
- [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp)
|
||||
- [go-skynet/go-bert.cpp](https://github.com/go-skynet/go-bert.cpp)
|
||||
- [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp)
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
19
api/api.go
19
api/api.go
@@ -6,12 +6,13 @@ import (
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
"github.com/gofiber/fiber/v2/middleware/logger"
|
||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
|
||||
func App(configFile string, loader *model.ModelLoader, uploadLimitMB, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
|
||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||
if debug {
|
||||
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
||||
@@ -19,6 +20,7 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16
|
||||
|
||||
// Return errors as JSON responses
|
||||
app := fiber.New(fiber.Config{
|
||||
BodyLimit: uploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
|
||||
DisableStartupMessage: disableMessage,
|
||||
// Override default error handler
|
||||
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
|
||||
@@ -40,6 +42,12 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16
|
||||
},
|
||||
})
|
||||
|
||||
if debug {
|
||||
app.Use(logger.New(logger.Config{
|
||||
Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
|
||||
}))
|
||||
}
|
||||
|
||||
cm := make(ConfigMerger)
|
||||
if err := cm.LoadConfigs(loader.ModelPath); err != nil {
|
||||
log.Error().Msgf("error loading config files: %s", err.Error())
|
||||
@@ -70,6 +78,15 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16
|
||||
app.Post("/v1/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
// /v1/engines/{engine_id}/embeddings
|
||||
|
||||
app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
app.Get("/v1/models", listModels(loader, cm))
|
||||
app.Get("/models", listModels(loader, cm))
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@ package api_test
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
||||
. "github.com/go-skynet/LocalAI/api"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
@@ -23,7 +25,7 @@ var _ = Describe("API test", func() {
|
||||
Context("API query", func() {
|
||||
BeforeEach(func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
app = App("", modelLoader, 1, 512, false, true, true)
|
||||
app = App("", modelLoader, 15, 1, 512, false, true, true)
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
|
||||
defaultConfig := openai.DefaultConfig("")
|
||||
@@ -45,8 +47,7 @@ var _ = Describe("API test", func() {
|
||||
It("returns the models list", func() {
|
||||
models, err := client.ListModels(context.TODO())
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models.Models)).To(Equal(3))
|
||||
Expect(models.Models[0].ID).To(Equal("testmodel"))
|
||||
Expect(len(models.Models)).To(Equal(7))
|
||||
})
|
||||
It("can generate completions", func() {
|
||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
|
||||
@@ -79,15 +80,55 @@ var _ = Describe("API test", func() {
|
||||
It("returns errors", func() {
|
||||
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 5 errors occurred:"))
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 12 errors occurred:"))
|
||||
})
|
||||
It("transcribes audio", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
resp, err := client.CreateTranscription(
|
||||
context.Background(),
|
||||
openai.AudioRequest{
|
||||
Model: openai.Whisper1,
|
||||
FilePath: filepath.Join(os.Getenv("TEST_DIR"), "audio.wav"),
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(resp.Text).To(ContainSubstring("This is the Micro Machine Man presenting"))
|
||||
})
|
||||
|
||||
It("calculate embeddings", func() {
|
||||
if runtime.GOOS != "linux" {
|
||||
Skip("test supported only on linux")
|
||||
}
|
||||
resp, err := client.CreateEmbeddings(
|
||||
context.Background(),
|
||||
openai.EmbeddingRequest{
|
||||
Model: openai.AdaEmbeddingV2,
|
||||
Input: []string{"sun", "cat"},
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
|
||||
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
|
||||
|
||||
sunEmbedding := resp.Data[0].Embedding
|
||||
resp2, err := client.CreateEmbeddings(
|
||||
context.Background(),
|
||||
openai.EmbeddingRequest{
|
||||
Model: openai.AdaEmbeddingV2,
|
||||
Input: []string{"sun"},
|
||||
},
|
||||
)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
|
||||
})
|
||||
})
|
||||
|
||||
Context("Config file", func() {
|
||||
BeforeEach(func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
app = App(os.Getenv("CONFIG_FILE"), modelLoader, 1, 512, false, true, true)
|
||||
app = App(os.Getenv("CONFIG_FILE"), modelLoader, 5, 1, 512, false, true, true)
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
|
||||
defaultConfig := openai.DefaultConfig("")
|
||||
@@ -108,8 +149,7 @@ var _ = Describe("API test", func() {
|
||||
|
||||
models, err := client.ListModels(context.TODO())
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models.Models)).To(Equal(5))
|
||||
Expect(models.Models[0].ID).To(Equal("testmodel"))
|
||||
Expect(len(models.Models)).To(Equal(9))
|
||||
})
|
||||
It("can generate chat completions from config file", func() {
|
||||
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
|
||||
@@ -134,5 +174,6 @@ var _ = Describe("API test", func() {
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||
})
|
||||
|
||||
})
|
||||
})
|
||||
|
||||
192
api/config.go
192
api/config.go
@@ -1,12 +1,16 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
@@ -21,8 +25,15 @@ type Config struct {
|
||||
Threads int `yaml:"threads"`
|
||||
Debug bool `yaml:"debug"`
|
||||
Roles map[string]string `yaml:"roles"`
|
||||
Embeddings bool `yaml:"embeddings"`
|
||||
Backend string `yaml:"backend"`
|
||||
TemplateConfig TemplateConfig `yaml:"template"`
|
||||
MirostatETA float64 `yaml:"mirostat_eta"`
|
||||
MirostatTAU float64 `yaml:"mirostat_tau"`
|
||||
Mirostat int `yaml:"mirostat"`
|
||||
|
||||
PromptStrings, InputStrings []string
|
||||
InputToken [][]int
|
||||
}
|
||||
|
||||
type TemplateConfig struct {
|
||||
@@ -100,3 +111,184 @@ func (cm ConfigMerger) LoadConfigs(path string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func updateConfig(config *Config, input *OpenAIRequest) {
|
||||
if input.Echo {
|
||||
config.Echo = input.Echo
|
||||
}
|
||||
if input.TopK != 0 {
|
||||
config.TopK = input.TopK
|
||||
}
|
||||
if input.TopP != 0 {
|
||||
config.TopP = input.TopP
|
||||
}
|
||||
|
||||
if input.Temperature != 0 {
|
||||
config.Temperature = input.Temperature
|
||||
}
|
||||
|
||||
if input.Maxtokens != 0 {
|
||||
config.Maxtokens = input.Maxtokens
|
||||
}
|
||||
|
||||
switch stop := input.Stop.(type) {
|
||||
case string:
|
||||
if stop != "" {
|
||||
config.StopWords = append(config.StopWords, stop)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range stop {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.StopWords = append(config.StopWords, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
config.RepeatPenalty = input.RepeatPenalty
|
||||
}
|
||||
|
||||
if input.Keep != 0 {
|
||||
config.Keep = input.Keep
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
config.Batch = input.Batch
|
||||
}
|
||||
|
||||
if input.F16 {
|
||||
config.F16 = input.F16
|
||||
}
|
||||
|
||||
if input.IgnoreEOS {
|
||||
config.IgnoreEOS = input.IgnoreEOS
|
||||
}
|
||||
|
||||
if input.Seed != 0 {
|
||||
config.Seed = input.Seed
|
||||
}
|
||||
|
||||
if input.Mirostat != 0 {
|
||||
config.Mirostat = input.Mirostat
|
||||
}
|
||||
|
||||
if input.MirostatETA != 0 {
|
||||
config.MirostatETA = input.MirostatETA
|
||||
}
|
||||
|
||||
if input.MirostatTAU != 0 {
|
||||
config.MirostatTAU = input.MirostatTAU
|
||||
}
|
||||
|
||||
switch inputs := input.Input.(type) {
|
||||
case string:
|
||||
if inputs != "" {
|
||||
config.InputStrings = append(config.InputStrings, inputs)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range inputs {
|
||||
switch i := pp.(type) {
|
||||
case string:
|
||||
config.InputStrings = append(config.InputStrings, i)
|
||||
case []interface{}:
|
||||
tokens := []int{}
|
||||
for _, ii := range i {
|
||||
tokens = append(tokens, int(ii.(float64)))
|
||||
}
|
||||
config.InputToken = append(config.InputToken, tokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch p := input.Prompt.(type) {
|
||||
case string:
|
||||
config.PromptStrings = append(config.PromptStrings, p)
|
||||
case []interface{}:
|
||||
for _, pp := range p {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.PromptStrings = append(config.PromptStrings, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
|
||||
input := new(OpenAIRequest)
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
modelFile := input.Model
|
||||
|
||||
if c.Params("model") != "" {
|
||||
modelFile = c.Params("model")
|
||||
}
|
||||
|
||||
received, _ := json.Marshal(input)
|
||||
|
||||
log.Debug().Msgf("Request received: %s", string(received))
|
||||
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
|
||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||
|
||||
// If no model was specified, take the first available
|
||||
if modelFile == "" && !bearerExists {
|
||||
models, _ := loader.ListModels()
|
||||
if len(models) > 0 {
|
||||
modelFile = models[0]
|
||||
log.Debug().Msgf("No model specified, using: %s", modelFile)
|
||||
} else {
|
||||
log.Debug().Msgf("No model specified, returning error")
|
||||
return nil, nil, fmt.Errorf("no model specified")
|
||||
}
|
||||
}
|
||||
|
||||
// If a model is found in bearer token takes precedence
|
||||
if bearerExists {
|
||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||
modelFile = bearer
|
||||
}
|
||||
|
||||
// Load a config file if present after the model name
|
||||
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
|
||||
if _, err := os.Stat(modelConfig); err == nil {
|
||||
if err := cm.LoadConfig(modelConfig); err != nil {
|
||||
return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
var config *Config
|
||||
cfg, exists := cm[modelFile]
|
||||
if !exists {
|
||||
config = &Config{
|
||||
OpenAIRequest: defaultRequest(modelFile),
|
||||
ContextSize: ctx,
|
||||
Threads: threads,
|
||||
F16: f16,
|
||||
Debug: debug,
|
||||
}
|
||||
} else {
|
||||
config = &cfg
|
||||
}
|
||||
|
||||
// Set the parameters for the language model prediction
|
||||
updateConfig(config, input)
|
||||
|
||||
// Don't allow 0 as setting
|
||||
if config.Threads == 0 {
|
||||
if threads != 0 {
|
||||
config.Threads = threads
|
||||
} else {
|
||||
config.Threads = 4
|
||||
}
|
||||
}
|
||||
|
||||
// Enforce debug flag if passed from CLI
|
||||
if debug {
|
||||
config.Debug = true
|
||||
}
|
||||
|
||||
return config, input, nil
|
||||
}
|
||||
|
||||
363
api/openai.go
363
api/openai.go
@@ -5,11 +5,17 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
whisperutil "github.com/go-skynet/LocalAI/pkg/whisper"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
@@ -33,13 +39,21 @@ type OpenAIUsage struct {
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
}
|
||||
|
||||
type Item struct {
|
||||
Embedding []float32 `json:"embedding"`
|
||||
Index int `json:"index"`
|
||||
Object string `json:"object,omitempty"`
|
||||
}
|
||||
|
||||
type OpenAIResponse struct {
|
||||
Created int `json:"created,omitempty"`
|
||||
Object string `json:"object,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Choices []Choice `json:"choices,omitempty"`
|
||||
Usage OpenAIUsage `json:"usage"`
|
||||
Created int `json:"created,omitempty"`
|
||||
Object string `json:"object,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Choices []Choice `json:"choices,omitempty"`
|
||||
Data []Item `json:"data,omitempty"`
|
||||
|
||||
Usage OpenAIUsage `json:"usage"`
|
||||
}
|
||||
|
||||
type Choice struct {
|
||||
@@ -63,12 +77,17 @@ type OpenAIModel struct {
|
||||
type OpenAIRequest struct {
|
||||
Model string `json:"model" yaml:"model"`
|
||||
|
||||
// whisper
|
||||
File string `json:"file" validate:"required"`
|
||||
ResponseFormat string `json:"response_format"`
|
||||
Language string `json:"language"`
|
||||
|
||||
// Prompt is read only by completion API calls
|
||||
Prompt interface{} `json:"prompt" yaml:"prompt"`
|
||||
|
||||
// Edit endpoint
|
||||
Instruction string `json:"instruction" yaml:"instruction"`
|
||||
Input string `json:"input" yaml:"input"`
|
||||
Instruction string `json:"instruction" yaml:"instruction"`
|
||||
Input interface{} `json:"input" yaml:"input"`
|
||||
|
||||
Stop interface{} `json:"stop" yaml:"stop"`
|
||||
|
||||
@@ -92,6 +111,10 @@ type OpenAIRequest struct {
|
||||
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
|
||||
Keep int `json:"n_keep" yaml:"n_keep"`
|
||||
|
||||
MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"`
|
||||
MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"`
|
||||
Mirostat int `json:"mirostat" yaml:"mirostat"`
|
||||
|
||||
Seed int `json:"seed" yaml:"seed"`
|
||||
}
|
||||
|
||||
@@ -105,135 +128,6 @@ func defaultRequest(modelFile string) OpenAIRequest {
|
||||
}
|
||||
}
|
||||
|
||||
func updateConfig(config *Config, input *OpenAIRequest) {
|
||||
if input.Echo {
|
||||
config.Echo = input.Echo
|
||||
}
|
||||
if input.TopK != 0 {
|
||||
config.TopK = input.TopK
|
||||
}
|
||||
if input.TopP != 0 {
|
||||
config.TopP = input.TopP
|
||||
}
|
||||
|
||||
if input.Temperature != 0 {
|
||||
config.Temperature = input.Temperature
|
||||
}
|
||||
|
||||
if input.Maxtokens != 0 {
|
||||
config.Maxtokens = input.Maxtokens
|
||||
}
|
||||
|
||||
switch stop := input.Stop.(type) {
|
||||
case string:
|
||||
if stop != "" {
|
||||
config.StopWords = append(config.StopWords, stop)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range stop {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.StopWords = append(config.StopWords, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
config.RepeatPenalty = input.RepeatPenalty
|
||||
}
|
||||
|
||||
if input.Keep != 0 {
|
||||
config.Keep = input.Keep
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
config.Batch = input.Batch
|
||||
}
|
||||
|
||||
if input.F16 {
|
||||
config.F16 = input.F16
|
||||
}
|
||||
|
||||
if input.IgnoreEOS {
|
||||
config.IgnoreEOS = input.IgnoreEOS
|
||||
}
|
||||
|
||||
if input.Seed != 0 {
|
||||
config.Seed = input.Seed
|
||||
}
|
||||
}
|
||||
|
||||
func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
|
||||
input := new(OpenAIRequest)
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
modelFile := input.Model
|
||||
received, _ := json.Marshal(input)
|
||||
|
||||
log.Debug().Msgf("Request received: %s", string(received))
|
||||
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
|
||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||
|
||||
// If no model was specified, take the first available
|
||||
if modelFile == "" && !bearerExists {
|
||||
models, _ := loader.ListModels()
|
||||
if len(models) > 0 {
|
||||
modelFile = models[0]
|
||||
log.Debug().Msgf("No model specified, using: %s", modelFile)
|
||||
} else {
|
||||
log.Debug().Msgf("No model specified, returning error")
|
||||
return nil, nil, fmt.Errorf("no model specified")
|
||||
}
|
||||
}
|
||||
|
||||
// If a model is found in bearer token takes precedence
|
||||
if bearerExists {
|
||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||
modelFile = bearer
|
||||
}
|
||||
|
||||
// Load a config file if present after the model name
|
||||
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
|
||||
if _, err := os.Stat(modelConfig); err == nil {
|
||||
if err := cm.LoadConfig(modelConfig); err != nil {
|
||||
return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
var config *Config
|
||||
cfg, exists := cm[modelFile]
|
||||
if !exists {
|
||||
config = &Config{
|
||||
OpenAIRequest: defaultRequest(modelFile),
|
||||
}
|
||||
} else {
|
||||
config = &cfg
|
||||
}
|
||||
|
||||
// Set the parameters for the language model prediction
|
||||
updateConfig(config, input)
|
||||
|
||||
if threads != 0 {
|
||||
config.Threads = threads
|
||||
}
|
||||
if ctx != 0 {
|
||||
config.ContextSize = ctx
|
||||
}
|
||||
if f16 {
|
||||
config.F16 = true
|
||||
}
|
||||
|
||||
if debug {
|
||||
config.Debug = true
|
||||
}
|
||||
|
||||
return config, input, nil
|
||||
}
|
||||
|
||||
// https://platform.openai.com/docs/api-reference/completions
|
||||
func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
@@ -244,19 +138,6 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
predInput := []string{}
|
||||
|
||||
switch p := input.Prompt.(type) {
|
||||
case string:
|
||||
predInput = append(predInput, p)
|
||||
case []interface{}:
|
||||
for _, pp := range p {
|
||||
if s, ok := pp.(string); ok {
|
||||
predInput = append(predInput, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
templateFile := config.Model
|
||||
|
||||
if config.TemplateConfig.Completion != "" {
|
||||
@@ -264,7 +145,7 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
|
||||
}
|
||||
|
||||
var result []Choice
|
||||
for _, i := range predInput {
|
||||
for _, i := range config.PromptStrings {
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
|
||||
Input string
|
||||
@@ -298,7 +179,75 @@ func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader,
|
||||
}
|
||||
}
|
||||
|
||||
// https://platform.openai.com/docs/api-reference/embeddings
|
||||
func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
items := []Item{}
|
||||
|
||||
for i, s := range config.InputToken {
|
||||
// get the model function to call for the result
|
||||
embedFn, err := ModelEmbedding("", s, loader, *config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
embeddings, err := embedFn()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
||||
}
|
||||
|
||||
for i, s := range config.InputStrings {
|
||||
// get the model function to call for the result
|
||||
embedFn, err := ModelEmbedding(s, []int{}, loader, *config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
embeddings, err := embedFn()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
||||
}
|
||||
|
||||
resp := &OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Data: items,
|
||||
Object: "list",
|
||||
}
|
||||
|
||||
jsonResult, _ := json.Marshal(resp)
|
||||
log.Debug().Msgf("Response: %s", jsonResult)
|
||||
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
}
|
||||
}
|
||||
|
||||
func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
|
||||
process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
|
||||
ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
|
||||
resp := OpenAIResponse{
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
log.Debug().Msgf("Sending goroutine: %s", s)
|
||||
|
||||
responses <- resp
|
||||
return true
|
||||
})
|
||||
close(responses)
|
||||
}
|
||||
return func(c *fiber.Ctx) error {
|
||||
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
|
||||
if err != nil {
|
||||
@@ -350,19 +299,7 @@ func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
|
||||
if input.Stream {
|
||||
responses := make(chan OpenAIResponse)
|
||||
|
||||
go func() {
|
||||
ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
|
||||
resp := OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
|
||||
responses <- resp
|
||||
return true
|
||||
})
|
||||
close(responses)
|
||||
}()
|
||||
go process(predInput, input, config, loader, responses)
|
||||
|
||||
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
||||
|
||||
@@ -419,28 +356,32 @@ func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
predInput := input.Input
|
||||
templateFile := config.Model
|
||||
|
||||
if config.TemplateConfig.Edit != "" {
|
||||
templateFile = config.TemplateConfig.Edit
|
||||
}
|
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
|
||||
Input string
|
||||
Instruction string
|
||||
}{Input: predInput, Instruction: input.Instruction})
|
||||
if err == nil {
|
||||
predInput = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||
}
|
||||
var result []Choice
|
||||
for _, i := range config.InputStrings {
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
|
||||
Input string
|
||||
Instruction string
|
||||
}{Input: i})
|
||||
if err == nil {
|
||||
i = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
|
||||
result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
|
||||
*c = append(*c, Choice{Text: s})
|
||||
}, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
|
||||
*c = append(*c, Choice{Text: s})
|
||||
}, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
result = append(result, r...)
|
||||
}
|
||||
|
||||
resp := &OpenAIResponse{
|
||||
@@ -457,6 +398,70 @@ func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, thread
|
||||
}
|
||||
}
|
||||
|
||||
// https://platform.openai.com/docs/api-reference/audio/create
|
||||
func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
// retrieve the file data from the request
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f, err := file.Open()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
dir, err := os.MkdirTemp("", "whisper")
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
dst := filepath.Join(dir, path.Base(file.Filename))
|
||||
dstFile, err := os.Create(dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := io.Copy(dstFile, f); err != nil {
|
||||
log.Debug().Msgf("Audio file copying error %+v - %+v - err %+v", file.Filename, dst, err)
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Audio file copied to: %+v", dst)
|
||||
|
||||
whisperModel, err := loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if whisperModel == nil {
|
||||
return fmt.Errorf("could not load whisper model")
|
||||
}
|
||||
|
||||
w, ok := whisperModel.(whisper.Model)
|
||||
if !ok {
|
||||
return fmt.Errorf("loader returned non-whisper object")
|
||||
}
|
||||
|
||||
tr, err := whisperutil.Transcript(w, dst, input.Language, uint(config.Threads))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Trascribed: %+v", tr)
|
||||
// TODO: handle different outputs here
|
||||
return c.Status(http.StatusOK).JSON(fiber.Map{"text": tr})
|
||||
}
|
||||
}
|
||||
|
||||
func listModels(loader *model.ModelLoader, cm ConfigMerger) func(ctx *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
models, err := loader.ListModels()
|
||||
|
||||
@@ -8,101 +8,18 @@ import (
|
||||
|
||||
"github.com/donomii/go-rwkv.cpp"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/bloomz.cpp"
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
|
||||
)
|
||||
|
||||
const tokenizerSuffix = ".tokenizer.json"
|
||||
|
||||
// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
var mutexMap sync.Mutex
|
||||
var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
|
||||
|
||||
var loadedModels map[string]interface{} = map[string]interface{}{}
|
||||
var muModels sync.Mutex
|
||||
|
||||
func backendLoader(backendString string, loader *model.ModelLoader, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
|
||||
switch strings.ToLower(backendString) {
|
||||
case "llama":
|
||||
return loader.LoadLLaMAModel(modelFile, llamaOpts...)
|
||||
case "stablelm":
|
||||
return loader.LoadStableLMModel(modelFile)
|
||||
case "gpt2":
|
||||
return loader.LoadGPT2Model(modelFile)
|
||||
case "gptj":
|
||||
return loader.LoadGPTJModel(modelFile)
|
||||
case "rwkv":
|
||||
return loader.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
|
||||
default:
|
||||
return nil, fmt.Errorf("backend unsupported: %s", backendString)
|
||||
}
|
||||
}
|
||||
|
||||
func greedyLoader(loader *model.ModelLoader, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
|
||||
updateModels := func(model interface{}) {
|
||||
muModels.Lock()
|
||||
defer muModels.Unlock()
|
||||
loadedModels[modelFile] = model
|
||||
}
|
||||
|
||||
muModels.Lock()
|
||||
m, exists := loadedModels[modelFile]
|
||||
if exists {
|
||||
muModels.Unlock()
|
||||
return m, nil
|
||||
}
|
||||
muModels.Unlock()
|
||||
|
||||
model, modelerr := loader.LoadLLaMAModel(modelFile, llamaOpts...)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = loader.LoadGPTJModel(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = loader.LoadGPT2Model(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = loader.LoadStableLMModel(modelFile)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
model, modelerr = loader.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
|
||||
if modelerr == nil {
|
||||
updateModels(model)
|
||||
return model, nil
|
||||
} else {
|
||||
err = multierror.Append(err, modelerr)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
||||
}
|
||||
|
||||
func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
|
||||
supportStreams := false
|
||||
modelFile := c.Model
|
||||
|
||||
// Try to load the model
|
||||
func defaultLLamaOpts(c Config) []llama.ModelOption {
|
||||
llamaOpts := []llama.ModelOption{}
|
||||
if c.ContextSize != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize))
|
||||
@@ -110,13 +27,153 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
|
||||
if c.F16 {
|
||||
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
||||
}
|
||||
if c.Embeddings {
|
||||
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
||||
}
|
||||
|
||||
return llamaOpts
|
||||
}
|
||||
|
||||
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
|
||||
if !c.Embeddings {
|
||||
return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
|
||||
}
|
||||
|
||||
modelFile := c.Model
|
||||
|
||||
llamaOpts := defaultLLamaOpts(c)
|
||||
|
||||
var inferenceModel interface{}
|
||||
var err error
|
||||
if c.Backend == "" {
|
||||
inferenceModel, err = greedyLoader(loader, modelFile, llamaOpts, uint32(c.Threads))
|
||||
inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
|
||||
} else {
|
||||
inferenceModel, err = backendLoader(c.Backend, loader, modelFile, llamaOpts, uint32(c.Threads))
|
||||
inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var fn func() ([]float32, error)
|
||||
switch model := inferenceModel.(type) {
|
||||
case *llama.LLama:
|
||||
fn = func() ([]float32, error) {
|
||||
predictOptions := buildLLamaPredictOptions(c)
|
||||
if len(tokens) > 0 {
|
||||
return model.TokenEmbeddings(tokens, predictOptions...)
|
||||
}
|
||||
return model.Embeddings(s, predictOptions...)
|
||||
}
|
||||
// bert embeddings
|
||||
case *bert.Bert:
|
||||
fn = func() ([]float32, error) {
|
||||
if len(tokens) > 0 {
|
||||
return model.TokenEmbeddings(tokens, bert.SetThreads(c.Threads))
|
||||
}
|
||||
return model.Embeddings(s, bert.SetThreads(c.Threads))
|
||||
}
|
||||
default:
|
||||
fn = func() ([]float32, error) {
|
||||
return nil, fmt.Errorf("embeddings not supported by the backend")
|
||||
}
|
||||
}
|
||||
|
||||
return func() ([]float32, error) {
|
||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
mutexMap.Lock()
|
||||
l, ok := mutexes[modelFile]
|
||||
if !ok {
|
||||
m := &sync.Mutex{}
|
||||
mutexes[modelFile] = m
|
||||
l = m
|
||||
}
|
||||
mutexMap.Unlock()
|
||||
l.Lock()
|
||||
defer l.Unlock()
|
||||
|
||||
embeds, err := fn()
|
||||
if err != nil {
|
||||
return embeds, err
|
||||
}
|
||||
// Remove trailing 0s
|
||||
for i := len(embeds) - 1; i >= 0; i-- {
|
||||
if embeds[i] == 0.0 {
|
||||
embeds = embeds[:i]
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return embeds, nil
|
||||
}, nil
|
||||
}
|
||||
|
||||
func buildLLamaPredictOptions(c Config) []llama.PredictOption {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []llama.PredictOption{
|
||||
llama.SetTemperature(c.Temperature),
|
||||
llama.SetTopP(c.TopP),
|
||||
llama.SetTopK(c.TopK),
|
||||
llama.SetTokens(c.Maxtokens),
|
||||
llama.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Mirostat != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
|
||||
}
|
||||
|
||||
if c.MirostatETA != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA))
|
||||
}
|
||||
|
||||
if c.MirostatTAU != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU))
|
||||
}
|
||||
|
||||
if c.Debug {
|
||||
predictOptions = append(predictOptions, llama.Debug)
|
||||
}
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
|
||||
|
||||
if c.RepeatPenalty != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
|
||||
}
|
||||
|
||||
if c.Keep != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.F16 {
|
||||
predictOptions = append(predictOptions, llama.EnableF16KV)
|
||||
}
|
||||
|
||||
if c.IgnoreEOS {
|
||||
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return predictOptions
|
||||
}
|
||||
|
||||
func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
|
||||
supportStreams := false
|
||||
modelFile := c.Model
|
||||
|
||||
llamaOpts := defaultLLamaOpts(c)
|
||||
|
||||
var inferenceModel interface{}
|
||||
var err error
|
||||
if c.Backend == "" {
|
||||
inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
|
||||
} else {
|
||||
inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -142,6 +199,122 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
|
||||
|
||||
return response, nil
|
||||
}
|
||||
case *gpt2.GPTNeoX:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.Replit:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.Starcoder:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.RedPajama:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *bloomz.Bloomz:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []bloomz.PredictOption{
|
||||
bloomz.SetTemperature(c.Temperature),
|
||||
bloomz.SetTopP(c.TopP),
|
||||
bloomz.SetTopK(c.TopK),
|
||||
bloomz.SetTokens(c.Maxtokens),
|
||||
bloomz.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, bloomz.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.StableLM:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
@@ -166,6 +339,30 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.Dolly:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.GPT2:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
@@ -190,29 +387,35 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gptj.GPTJ:
|
||||
case *gpt4all.Model:
|
||||
supportStreams = true
|
||||
|
||||
fn = func() (string, error) {
|
||||
if tokenCallback != nil {
|
||||
model.SetTokenCallback(tokenCallback)
|
||||
}
|
||||
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gptj.PredictOption{
|
||||
gptj.SetTemperature(c.Temperature),
|
||||
gptj.SetTopP(c.TopP),
|
||||
gptj.SetTopK(c.TopK),
|
||||
gptj.SetTokens(c.Maxtokens),
|
||||
gptj.SetThreads(c.Threads),
|
||||
predictOptions := []gpt4all.PredictOption{
|
||||
gpt4all.SetTemperature(c.Temperature),
|
||||
gpt4all.SetTopP(c.TopP),
|
||||
gpt4all.SetTopK(c.TopK),
|
||||
gpt4all.SetTokens(c.Maxtokens),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gptj.SetBatch(c.Batch))
|
||||
predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gptj.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
str, er := model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
|
||||
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
|
||||
// after a stream event has occurred
|
||||
model.SetTokenCallback(nil)
|
||||
return str, er
|
||||
}
|
||||
case *llama.LLama:
|
||||
supportStreams = true
|
||||
@@ -222,49 +425,17 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
|
||||
model.SetTokenCallback(tokenCallback)
|
||||
}
|
||||
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []llama.PredictOption{
|
||||
llama.SetTemperature(c.Temperature),
|
||||
llama.SetTopP(c.TopP),
|
||||
llama.SetTopK(c.TopK),
|
||||
llama.SetTokens(c.Maxtokens),
|
||||
llama.SetThreads(c.Threads),
|
||||
}
|
||||
predictOptions := buildLLamaPredictOptions(c)
|
||||
|
||||
if c.Debug {
|
||||
predictOptions = append(predictOptions, llama.Debug)
|
||||
}
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
|
||||
|
||||
if c.RepeatPenalty != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
|
||||
}
|
||||
|
||||
if c.Keep != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.F16 {
|
||||
predictOptions = append(predictOptions, llama.EnableF16KV)
|
||||
}
|
||||
|
||||
if c.IgnoreEOS {
|
||||
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
str, er := model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
|
||||
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
|
||||
// after a stream event has occurred
|
||||
model.SetTokenCallback(nil)
|
||||
return str, er
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,14 +2,92 @@
|
||||
|
||||
Here is a list of projects that can easily be integrated with the LocalAI backend.
|
||||
|
||||
## Projects
|
||||
### Projects
|
||||
|
||||
- [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/) (by [@mkellerman](https://github.com/mkellerman))
|
||||
- [discord-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/) (by [@mudler](https://github.com/mudler))
|
||||
- [langchain](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/) (by [@dave-gray101](https://github.com/dave-gray101))
|
||||
- [langchain-python](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/) (by [@mudler](https://github.com/mudler))
|
||||
- [rwkv](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/) (by [@mudler](https://github.com/mudler))
|
||||
- [slack-bot](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/) (by [@mudler](https://github.com/mudler))
|
||||
|
||||
### Chatbot-UI
|
||||
|
||||
_by [@mkellerman](https://github.com/mkellerman)_
|
||||
|
||||

|
||||
|
||||
This integration shows how to use LocalAI with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/)
|
||||
|
||||
### Discord bot
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
Run a discord bot which lets you talk directly with a model
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/), or for a live demo you can talk with our bot in #random-bot in our discord server.
|
||||
|
||||
### Langchain
|
||||
|
||||
_by [@dave-gray101](https://github.com/dave-gray101)_
|
||||
|
||||
A ready to use example to show e2e how to integrate LocalAI with langchain
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/)
|
||||
|
||||
### Langchain Python
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
A ready to use example to show e2e how to integrate LocalAI with langchain
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/)
|
||||
|
||||
### LocalAI WebUI
|
||||
|
||||
_by [@dhruvgera](https://github.com/dhruvgera)_
|
||||
|
||||

|
||||
|
||||
A light, community-maintained web interface for LocalAI
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/localai-webui/)
|
||||
|
||||
### How to run rwkv models
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
A full example on how to run RWKV models with LocalAI
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/)
|
||||
|
||||
### Slack bot
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
Run a slack bot which lets you talk directly with a model
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/)
|
||||
|
||||
### Question answering on documents with llama-index
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)
|
||||
|
||||
### Question answering on documents with langchain and chroma
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
Shows how to integrate with `Langchain` and `Chroma` to enable question answering on a set of documents.
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-chroma/)
|
||||
|
||||
### Template for Runpod.io
|
||||
|
||||
_by [@fHachenberg](https://github.com/fHachenberg)_
|
||||
|
||||
Allows to run any LocalAI-compatible model as a backend on the servers of https://runpod.io
|
||||
|
||||
[Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra)
|
||||
|
||||
## Want to contribute?
|
||||
|
||||
|
||||
54
examples/langchain-chroma/README.md
Normal file
54
examples/langchain-chroma/README.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# Data query example
|
||||
|
||||
This example makes use of [langchain and chroma](https://blog.langchain.dev/langchain-chroma/) to enable question answering on a set of documents.
|
||||
|
||||
## Setup
|
||||
|
||||
Download the models and start the API:
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/query_data
|
||||
|
||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
### Python requirements
|
||||
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Create a storage
|
||||
|
||||
In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
wget https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt
|
||||
python store.py
|
||||
```
|
||||
|
||||
After it finishes, a directory "storage" will be created with the vector index database.
|
||||
|
||||
## Query
|
||||
|
||||
We can now query the dataset.
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python query.py
|
||||
# President Trump recently stated during a press conference regarding tax reform legislation that "we're getting rid of all these loopholes." He also mentioned that he wants to simplify the system further through changes such as increasing the standard deduction amount and making other adjustments aimed at reducing taxpayers' overall burden.
|
||||
```
|
||||
|
||||
Keep in mind now things are hit or miss!
|
||||
1
examples/langchain-chroma/models/completion.tmpl
Normal file
1
examples/langchain-chroma/models/completion.tmpl
Normal file
@@ -0,0 +1 @@
|
||||
{{.Input}}
|
||||
5
examples/langchain-chroma/models/embeddings.yaml
Normal file
5
examples/langchain-chroma/models/embeddings.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: bert
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
16
examples/langchain-chroma/models/gpt-3.5-turbo.yaml
Normal file
16
examples/langchain-chroma/models/gpt-3.5-turbo.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: ggml-gpt4all-j
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
template:
|
||||
completion: completion
|
||||
chat: gpt4all
|
||||
4
examples/langchain-chroma/models/gpt4all.tmpl
Normal file
4
examples/langchain-chroma/models/gpt4all.tmpl
Normal file
@@ -0,0 +1,4 @@
|
||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
||||
### Prompt:
|
||||
{{.Input}}
|
||||
### Response:
|
||||
31
examples/langchain-chroma/query.py
Normal file
31
examples/langchain-chroma/query.py
Normal file
@@ -0,0 +1,31 @@
|
||||
|
||||
import os
|
||||
from langchain.vectorstores import Chroma
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter,CharacterTextSplitter
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import VectorDBQA
|
||||
from langchain.document_loaders import TextLoader
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
|
||||
# Load and process the text
|
||||
loader = TextLoader('state_of_the_union.txt')
|
||||
documents = loader.load()
|
||||
|
||||
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
|
||||
texts = text_splitter.split_documents(documents)
|
||||
|
||||
# Embed and store the texts
|
||||
# Supplying a persist_directory will store the embeddings on disk
|
||||
persist_directory = 'db'
|
||||
|
||||
embedding = OpenAIEmbeddings()
|
||||
|
||||
# Now we can load the persisted database from disk, and use it as normal.
|
||||
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
|
||||
qa = VectorDBQA.from_chain_type(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path), chain_type="stuff", vectorstore=vectordb)
|
||||
|
||||
query = "What the president said about taxes ?"
|
||||
print(qa.run(query))
|
||||
|
||||
4
examples/langchain-chroma/requirements.txt
Normal file
4
examples/langchain-chroma/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
langchain==0.0.160
|
||||
openai==0.27.6
|
||||
chromadb==0.3.21
|
||||
llama-index==0.6.2
|
||||
28
examples/langchain-chroma/store.py
Executable file
28
examples/langchain-chroma/store.py
Executable file
@@ -0,0 +1,28 @@
|
||||
|
||||
import os
|
||||
from langchain.vectorstores import Chroma
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter,TokenTextSplitter,CharacterTextSplitter
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import VectorDBQA
|
||||
from langchain.document_loaders import TextLoader
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
|
||||
# Load and process the text
|
||||
loader = TextLoader('state_of_the_union.txt')
|
||||
documents = loader.load()
|
||||
|
||||
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
|
||||
#text_splitter = TokenTextSplitter()
|
||||
texts = text_splitter.split_documents(documents)
|
||||
|
||||
# Embed and store the texts
|
||||
# Supplying a persist_directory will store the embeddings on disk
|
||||
persist_directory = 'db'
|
||||
|
||||
embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
|
||||
vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory)
|
||||
|
||||
vectordb.persist()
|
||||
vectordb = None
|
||||
@@ -30,4 +30,6 @@ export OPENAI_API_KEY=sk-
|
||||
|
||||
python test.py
|
||||
# A good company name for a company that makes colorful socks would be "Colorsocks".
|
||||
|
||||
python agent.py
|
||||
```
|
||||
44
examples/langchain-python/agent.py
Normal file
44
examples/langchain-python/agent.py
Normal file
@@ -0,0 +1,44 @@
|
||||
## This is a fork/based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6
|
||||
|
||||
from io import StringIO
|
||||
import sys
|
||||
import os
|
||||
from typing import Dict, Optional
|
||||
|
||||
from langchain.agents import load_tools
|
||||
from langchain.agents import initialize_agent
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.llms import OpenAI
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
|
||||
|
||||
class PythonREPL:
|
||||
"""Simulates a standalone Python REPL."""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def run(self, command: str) -> str:
|
||||
"""Run command and returns anything printed."""
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = mystdout = StringIO()
|
||||
try:
|
||||
exec(command, globals())
|
||||
sys.stdout = old_stdout
|
||||
output = mystdout.getvalue()
|
||||
except Exception as e:
|
||||
sys.stdout = old_stdout
|
||||
output = str(e)
|
||||
return output
|
||||
|
||||
llm = OpenAI(temperature=0.0, openai_api_base=base_path, model_name=model_name)
|
||||
python_repl = Tool(
|
||||
"Python REPL",
|
||||
PythonREPL().run,
|
||||
"""A Python shell. Use this to execute python commands. Input should be a valid python command.
|
||||
If you expect output it should be printed out.""",
|
||||
)
|
||||
tools = [python_repl]
|
||||
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
|
||||
agent.run("What is the 10th fibonacci number?")
|
||||
@@ -2,4 +2,4 @@ FROM python:3.10-bullseye
|
||||
COPY ./langchainpy-localai-example /app
|
||||
WORKDIR /app
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
ENTRYPOINT [ "python", "./simple_demo.py" ];
|
||||
ENTRYPOINT [ "python", "./full_demo.py" ];
|
||||
@@ -27,4 +27,4 @@ docker-compose --profile py up --build
|
||||
|
||||
## Copyright
|
||||
|
||||
Some of the example code in index.mts is adapted from the langchainjs project and is Copyright (c) Harrison Chase. Used under the terms of the MIT license, as is the remainder of this code.
|
||||
Some of the example code in index.mts and full_demo.py is adapted from the langchainjs project and is Copyright (c) Harrison Chase. Used under the terms of the MIT license, as is the remainder of this code.
|
||||
@@ -13,7 +13,7 @@
|
||||
"typeorm": "^0.3.15"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^18.16.3",
|
||||
"@types/node": "^18.16.4",
|
||||
"typescript": "^5.0.4"
|
||||
}
|
||||
},
|
||||
@@ -45,9 +45,9 @@
|
||||
"integrity": "sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw=="
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "18.16.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.3.tgz",
|
||||
"integrity": "sha512-OPs5WnnT1xkCBiuQrZA4+YAV4HEJejmHneyraIaxsbev5yCEr6KMwINNFP9wQeFIw8FWcoTqF3vQsa5CDaI+8Q==",
|
||||
"version": "18.16.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.4.tgz",
|
||||
"integrity": "sha512-LUhvPmAKAbgm+p/K11IWszLZVoZDlMF4NRmqbhEzDz/CnCuehPkZXwZbBCKGJsgjnuVejotBwM7B3Scrq4EqDw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/retry": {
|
||||
@@ -137,13 +137,6 @@
|
||||
"resolved": "https://registry.npmjs.org/binary-search/-/binary-search-1.3.6.tgz",
|
||||
"integrity": "sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA=="
|
||||
},
|
||||
"node_modules/boolbase": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
|
||||
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==",
|
||||
"optional": true,
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
@@ -209,73 +202,6 @@
|
||||
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/cheerio": {
|
||||
"version": "1.0.0-rc.12",
|
||||
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.12.tgz",
|
||||
"integrity": "sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"cheerio-select": "^2.1.0",
|
||||
"dom-serializer": "^2.0.0",
|
||||
"domhandler": "^5.0.3",
|
||||
"domutils": "^3.0.1",
|
||||
"htmlparser2": "^8.0.1",
|
||||
"parse5": "^7.0.0",
|
||||
"parse5-htmlparser2-tree-adapter": "^7.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 6"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/cheeriojs/cheerio?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/cheerio-select": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
|
||||
"integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"boolbase": "^1.0.0",
|
||||
"css-select": "^5.1.0",
|
||||
"css-what": "^6.1.0",
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.3",
|
||||
"domutils": "^3.0.1"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/fb55"
|
||||
}
|
||||
},
|
||||
"node_modules/cheerio/node_modules/parse5": {
|
||||
"version": "7.1.2",
|
||||
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
|
||||
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"entities": "^4.4.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/inikulin/parse5?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/cheerio/node_modules/parse5-htmlparser2-tree-adapter": {
|
||||
"version": "7.0.0",
|
||||
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz",
|
||||
"integrity": "sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"domhandler": "^5.0.2",
|
||||
"parse5": "^7.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/inikulin/parse5?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/cli-highlight": {
|
||||
"version": "2.1.11",
|
||||
"resolved": "https://registry.npmjs.org/cli-highlight/-/cli-highlight-2.1.11.tgz",
|
||||
@@ -379,36 +305,6 @@
|
||||
"node-fetch": "2.6.7"
|
||||
}
|
||||
},
|
||||
"node_modules/css-select": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz",
|
||||
"integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"boolbase": "^1.0.0",
|
||||
"css-what": "^6.1.0",
|
||||
"domhandler": "^5.0.2",
|
||||
"domutils": "^3.0.1",
|
||||
"nth-check": "^2.0.1"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/fb55"
|
||||
}
|
||||
},
|
||||
"node_modules/css-what": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
|
||||
"integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">= 6"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/fb55"
|
||||
}
|
||||
},
|
||||
"node_modules/debug": {
|
||||
"version": "4.3.4",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
|
||||
@@ -433,65 +329,6 @@
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/dom-serializer": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
|
||||
"integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.2",
|
||||
"entities": "^4.2.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/domelementtype": {
|
||||
"version": "2.3.0",
|
||||
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
|
||||
"integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/fb55"
|
||||
}
|
||||
],
|
||||
"optional": true,
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/domhandler": {
|
||||
"version": "5.0.3",
|
||||
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
|
||||
"integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"domelementtype": "^2.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/fb55/domhandler?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/domutils": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz",
|
||||
"integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"dom-serializer": "^2.0.0",
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.3"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/fb55/domutils?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/dotenv": {
|
||||
"version": "16.0.3",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz",
|
||||
@@ -505,19 +342,6 @@
|
||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
|
||||
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
|
||||
},
|
||||
"node_modules/entities": {
|
||||
"version": "4.5.0",
|
||||
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
|
||||
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=0.12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/fb55/entities?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/escalade": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz",
|
||||
@@ -623,26 +447,6 @@
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/htmlparser2": {
|
||||
"version": "8.0.2",
|
||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz",
|
||||
"integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==",
|
||||
"funding": [
|
||||
"https://github.com/fb55/htmlparser2?sponsor=1",
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/fb55"
|
||||
}
|
||||
],
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.3",
|
||||
"domutils": "^3.0.1",
|
||||
"entities": "^4.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ieee754": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
|
||||
@@ -962,19 +766,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/nth-check": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
|
||||
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"boolbase": "^1.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/num-sort": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/num-sort/-/num-sort-2.1.0.tgz",
|
||||
@@ -1457,9 +1248,9 @@
|
||||
"integrity": "sha512-Uy0+khmZqUrUGm5dmMqVlnvufZRSK0FbYzVgp0UMstm+F5+W2/jnEEQyc9vo1ZR/E5ZI/B1WjjoTqBqwJL6Krw=="
|
||||
},
|
||||
"@types/node": {
|
||||
"version": "18.16.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.3.tgz",
|
||||
"integrity": "sha512-OPs5WnnT1xkCBiuQrZA4+YAV4HEJejmHneyraIaxsbev5yCEr6KMwINNFP9wQeFIw8FWcoTqF3vQsa5CDaI+8Q==",
|
||||
"version": "18.16.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.4.tgz",
|
||||
"integrity": "sha512-LUhvPmAKAbgm+p/K11IWszLZVoZDlMF4NRmqbhEzDz/CnCuehPkZXwZbBCKGJsgjnuVejotBwM7B3Scrq4EqDw==",
|
||||
"dev": true
|
||||
},
|
||||
"@types/retry": {
|
||||
@@ -1520,13 +1311,6 @@
|
||||
"resolved": "https://registry.npmjs.org/binary-search/-/binary-search-1.3.6.tgz",
|
||||
"integrity": "sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA=="
|
||||
},
|
||||
"boolbase": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
|
||||
"integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==",
|
||||
"optional": true,
|
||||
"peer": true
|
||||
},
|
||||
"brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
@@ -1568,60 +1352,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"cheerio": {
|
||||
"version": "1.0.0-rc.12",
|
||||
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.12.tgz",
|
||||
"integrity": "sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"requires": {
|
||||
"cheerio-select": "^2.1.0",
|
||||
"dom-serializer": "^2.0.0",
|
||||
"domhandler": "^5.0.3",
|
||||
"domutils": "^3.0.1",
|
||||
"htmlparser2": "^8.0.1",
|
||||
"parse5": "^7.0.0",
|
||||
"parse5-htmlparser2-tree-adapter": "^7.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"parse5": {
|
||||
"version": "7.1.2",
|
||||
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
|
||||
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"requires": {
|
||||
"entities": "^4.4.0"
|
||||
}
|
||||
},
|
||||
"parse5-htmlparser2-tree-adapter": {
|
||||
"version": "7.0.0",
|
||||
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz",
|
||||
"integrity": "sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"requires": {
|
||||
"domhandler": "^5.0.2",
|
||||
"parse5": "^7.0.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"cheerio-select": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz",
|
||||
"integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"requires": {
|
||||
"boolbase": "^1.0.0",
|
||||
"css-select": "^5.1.0",
|
||||
"css-what": "^6.1.0",
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.3",
|
||||
"domutils": "^3.0.1"
|
||||
}
|
||||
},
|
||||
"cli-highlight": {
|
||||
"version": "2.1.11",
|
||||
"resolved": "https://registry.npmjs.org/cli-highlight/-/cli-highlight-2.1.11.tgz",
|
||||
@@ -1705,27 +1435,6 @@
|
||||
"node-fetch": "2.6.7"
|
||||
}
|
||||
},
|
||||
"css-select": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz",
|
||||
"integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"requires": {
|
||||
"boolbase": "^1.0.0",
|
||||
"css-what": "^6.1.0",
|
||||
"domhandler": "^5.0.2",
|
||||
"domutils": "^3.0.1",
|
||||
"nth-check": "^2.0.1"
|
||||
}
|
||||
},
|
||||
"css-what": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/css-what/-/css-what-6.1.0.tgz",
|
||||
"integrity": "sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==",
|
||||
"optional": true,
|
||||
"peer": true
|
||||
},
|
||||
"debug": {
|
||||
"version": "4.3.4",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
|
||||
@@ -1739,47 +1448,6 @@
|
||||
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
|
||||
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ=="
|
||||
},
|
||||
"dom-serializer": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
|
||||
"integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"requires": {
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.2",
|
||||
"entities": "^4.2.0"
|
||||
}
|
||||
},
|
||||
"domelementtype": {
|
||||
"version": "2.3.0",
|
||||
"resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz",
|
||||
"integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==",
|
||||
"optional": true,
|
||||
"peer": true
|
||||
},
|
||||
"domhandler": {
|
||||
"version": "5.0.3",
|
||||
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz",
|
||||
"integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"requires": {
|
||||
"domelementtype": "^2.3.0"
|
||||
}
|
||||
},
|
||||
"domutils": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/domutils/-/domutils-3.1.0.tgz",
|
||||
"integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"requires": {
|
||||
"dom-serializer": "^2.0.0",
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.3"
|
||||
}
|
||||
},
|
||||
"dotenv": {
|
||||
"version": "16.0.3",
|
||||
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz",
|
||||
@@ -1790,13 +1458,6 @@
|
||||
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
|
||||
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
|
||||
},
|
||||
"entities": {
|
||||
"version": "4.5.0",
|
||||
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
|
||||
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
|
||||
"optional": true,
|
||||
"peer": true
|
||||
},
|
||||
"escalade": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz",
|
||||
@@ -1864,19 +1525,6 @@
|
||||
"resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-10.7.3.tgz",
|
||||
"integrity": "sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A=="
|
||||
},
|
||||
"htmlparser2": {
|
||||
"version": "8.0.2",
|
||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz",
|
||||
"integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"requires": {
|
||||
"domelementtype": "^2.3.0",
|
||||
"domhandler": "^5.0.3",
|
||||
"domutils": "^3.0.1",
|
||||
"entities": "^4.4.0"
|
||||
}
|
||||
},
|
||||
"ieee754": {
|
||||
"version": "1.2.1",
|
||||
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
|
||||
@@ -2024,16 +1672,6 @@
|
||||
"whatwg-url": "^5.0.0"
|
||||
}
|
||||
},
|
||||
"nth-check": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz",
|
||||
"integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"requires": {
|
||||
"boolbase": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"num-sort": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/num-sort/-/num-sort-2.1.0.tgz",
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"author": "dave@gray101.com",
|
||||
"license": "MIT",
|
||||
"devDependencies": {
|
||||
"@types/node": "^18.16.3",
|
||||
"@types/node": "^18.16.4",
|
||||
"typescript": "^5.0.4"
|
||||
},
|
||||
"dependencies": {
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import os
|
||||
import logging
|
||||
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain import PromptTemplate, LLMChain
|
||||
from langchain.prompts.chat import (
|
||||
@@ -13,6 +15,9 @@ from langchain.schema import (
|
||||
SystemMessage
|
||||
)
|
||||
|
||||
# This logging incantation makes it easy to see that you're actually reaching your LocalAI instance rather than OpenAI.
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
print('Langchain + LocalAI PYTHON Tests')
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
|
||||
@@ -24,7 +29,7 @@ chat = ChatOpenAI(temperature=0, openai_api_base=base_path, openai_api_key=key,
|
||||
|
||||
print("Created ChatOpenAI for ", chat.model_name)
|
||||
|
||||
template = "You are a helpful assistant that translates {input_language} to {output_language}."
|
||||
template = "You are a helpful assistant that translates {input_language} to {output_language}. The next message will be a sentence in {input_language}. Respond ONLY with the translation in {output_language}. Do not respond in {input_language}!"
|
||||
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
|
||||
human_template = "{text}"
|
||||
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
|
||||
@@ -34,6 +39,8 @@ chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_mes
|
||||
print("ABOUT to execute")
|
||||
|
||||
# get a chat completion from the formatted messages
|
||||
chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
|
||||
response = chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
|
||||
|
||||
print(response)
|
||||
|
||||
print(".");
|
||||
@@ -10,7 +10,7 @@ debugpy==1.6.7
|
||||
frozenlist==1.3.3
|
||||
greenlet==2.0.2
|
||||
idna==3.4
|
||||
langchain==0.0.157
|
||||
langchain==0.0.159
|
||||
marshmallow==3.19.0
|
||||
marshmallow-enum==1.5.1
|
||||
multidict==6.0.4
|
||||
|
||||
@@ -15,4 +15,4 @@ roles:
|
||||
backend: "gptj"
|
||||
template:
|
||||
completion: completion
|
||||
chat: completion # gpt4all
|
||||
chat: gpt4all
|
||||
26
examples/localai-webui/README.md
Normal file
26
examples/localai-webui/README.md
Normal file
@@ -0,0 +1,26 @@
|
||||
# localai-webui
|
||||
|
||||
Example of integration with [dhruvgera/localai-frontend](https://github.com/Dhruvgera/LocalAI-frontend).
|
||||
|
||||

|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/localai-webui
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# Download any desired models to models/ in the parent LocalAI project dir
|
||||
# For example: wget https://gpt4all.io/models/ggml-gpt4all-j.bin
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
Open http://localhost:3000 for the Web UI.
|
||||
|
||||
20
examples/localai-webui/docker-compose.yml
Normal file
20
examples/localai-webui/docker-compose.yml
Normal file
@@ -0,0 +1,20 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- 8080:8080
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai"]
|
||||
|
||||
frontend:
|
||||
image: quay.io/go-skynet/localai-frontend:master
|
||||
ports:
|
||||
- 3000:3000
|
||||
1
examples/query_data/.gitignore
vendored
Normal file
1
examples/query_data/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
storage/
|
||||
67
examples/query_data/README.md
Normal file
67
examples/query_data/README.md
Normal file
@@ -0,0 +1,67 @@
|
||||
# Data query example
|
||||
|
||||
This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
|
||||
|
||||
It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
|
||||
|
||||
Summary of the steps:
|
||||
|
||||
- prepare the dataset (and store it into `data`)
|
||||
- prepare a vector index database to run queries on
|
||||
- run queries
|
||||
|
||||
## Requirements
|
||||
|
||||
You will need a training data set. Copy that over `data`.
|
||||
|
||||
## Setup
|
||||
|
||||
Start the API:
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/query_data
|
||||
|
||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
### Create a storage
|
||||
|
||||
In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python store.py
|
||||
```
|
||||
|
||||
After it finishes, a directory "storage" will be created with the vector index database.
|
||||
|
||||
## Query
|
||||
|
||||
We can now query the dataset.
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python query.py
|
||||
```
|
||||
|
||||
## Update
|
||||
|
||||
To update our vector database, run `update.py`
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python update.py
|
||||
```
|
||||
0
examples/query_data/data/.keep
Normal file
0
examples/query_data/data/.keep
Normal file
15
examples/query_data/docker-compose.yml
Normal file
15
examples/query_data/docker-compose.yml
Normal file
@@ -0,0 +1,15 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- 8080:8080
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai"]
|
||||
1
examples/query_data/models/completion.tmpl
Normal file
1
examples/query_data/models/completion.tmpl
Normal file
@@ -0,0 +1 @@
|
||||
{{.Input}}
|
||||
6
examples/query_data/models/embeddings.yaml
Normal file
6
examples/query_data/models/embeddings.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: bert
|
||||
threads: 14
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
17
examples/query_data/models/gpt-3.5-turbo.yaml
Normal file
17
examples/query_data/models/gpt-3.5-turbo.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: ggml-gpt4all-j
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
threads: 14
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
template:
|
||||
completion: completion
|
||||
chat: gpt4all
|
||||
35
examples/query_data/query.py
Normal file
35
examples/query_data/query.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import os
|
||||
|
||||
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
|
||||
# os.environ['OPENAI_API_KEY']= ""
|
||||
|
||||
from llama_index import LLMPredictor, PromptHelper, ServiceContext
|
||||
from langchain.llms.openai import OpenAI
|
||||
from llama_index import StorageContext, load_index_from_storage
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
|
||||
# This example uses text-davinci-003 by default; feel free to change if desired
|
||||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||
|
||||
# Configure prompt parameters and initialise helper
|
||||
max_input_size = 500
|
||||
num_output = 256
|
||||
max_chunk_overlap = 20
|
||||
|
||||
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
||||
|
||||
# Load documents from the 'data' directory
|
||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
|
||||
|
||||
# rebuild storage context
|
||||
storage_context = StorageContext.from_defaults(persist_dir='./storage')
|
||||
|
||||
# load index
|
||||
index = load_index_from_storage(storage_context, service_context=service_context, )
|
||||
|
||||
query_engine = index.as_query_engine()
|
||||
|
||||
data = input("Question: ")
|
||||
response = query_engine.query(data)
|
||||
print(response)
|
||||
27
examples/query_data/store.py
Normal file
27
examples/query_data/store.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import os
|
||||
|
||||
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
|
||||
# os.environ['OPENAI_API_KEY']= ""
|
||||
|
||||
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext
|
||||
from langchain.llms.openai import OpenAI
|
||||
from llama_index import StorageContext, load_index_from_storage
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
|
||||
# This example uses text-davinci-003 by default; feel free to change if desired
|
||||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||
|
||||
# Configure prompt parameters and initialise helper
|
||||
max_input_size = 400
|
||||
num_output = 400
|
||||
max_chunk_overlap = 30
|
||||
|
||||
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
||||
|
||||
# Load documents from the 'data' directory
|
||||
documents = SimpleDirectoryReader('data').load_data()
|
||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 400)
|
||||
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
|
||||
index.storage_context.persist(persist_dir="./storage")
|
||||
|
||||
32
examples/query_data/update.py
Normal file
32
examples/query_data/update.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import os
|
||||
|
||||
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
|
||||
# os.environ['OPENAI_API_KEY']= ""
|
||||
|
||||
from llama_index import LLMPredictor, PromptHelper, SimpleDirectoryReader, ServiceContext
|
||||
from langchain.llms.openai import OpenAI
|
||||
from llama_index import StorageContext, load_index_from_storage
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
|
||||
# This example uses text-davinci-003 by default; feel free to change if desired
|
||||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||
|
||||
# Configure prompt parameters and initialise helper
|
||||
max_input_size = 512
|
||||
num_output = 256
|
||||
max_chunk_overlap = 20
|
||||
|
||||
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
||||
|
||||
# Load documents from the 'data' directory
|
||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
|
||||
|
||||
# rebuild storage context
|
||||
storage_context = StorageContext.from_defaults(persist_dir='./storage')
|
||||
|
||||
# load index
|
||||
index = load_index_from_storage(storage_context, service_context=service_context, )
|
||||
documents = SimpleDirectoryReader('data').load_data()
|
||||
index.refresh(documents)
|
||||
index.storage_context.persist(persist_dir="./storage")
|
||||
11
examples/rwkv/scripts/build.sh
Executable file
11
examples/rwkv/scripts/build.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
URL=$1
|
||||
OUT=$2
|
||||
FILENAME=$(basename $URL)
|
||||
|
||||
wget -nc $URL -O /build/$FILENAME
|
||||
|
||||
python3 /build/rwkv.cpp/rwkv/convert_pytorch_to_ggml.py /build/$FILENAME /build/float-model float16
|
||||
python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_2
|
||||
39
go.mod
39
go.mod
@@ -3,44 +3,54 @@ module github.com/go-skynet/LocalAI
|
||||
go 1.19
|
||||
|
||||
require (
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a
|
||||
github.com/go-audio/wav v1.1.0
|
||||
github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230502121737-8ceb6167e405
|
||||
github.com/gofiber/fiber/v2 v2.44.0
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b
|
||||
github.com/gofiber/fiber/v2 v2.45.0
|
||||
github.com/hashicorp/go-multierror v1.1.1
|
||||
github.com/jaypipes/ghw v0.10.0
|
||||
github.com/onsi/ginkgo/v2 v2.9.4
|
||||
github.com/onsi/gomega v1.27.6
|
||||
github.com/otiai10/copy v1.11.0
|
||||
github.com/otiai10/openaigo v1.1.0
|
||||
github.com/rs/zerolog v1.29.1
|
||||
github.com/sashabaranov/go-openai v1.9.3
|
||||
github.com/sashabaranov/go-openai v1.9.4
|
||||
github.com/swaggo/swag v1.16.1
|
||||
github.com/urfave/cli/v2 v2.25.3
|
||||
github.com/valyala/fasthttp v1.47.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/StackExchange/wmi v1.2.1 // indirect
|
||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||
github.com/PuerkitoBio/purell v1.1.1 // indirect
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
|
||||
github.com/andybalholm/brotli v1.0.5 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||
github.com/ghodss/yaml v1.0.0 // indirect
|
||||
github.com/go-audio/audio v1.0.0 // indirect
|
||||
github.com/go-audio/riff v1.0.0 // indirect
|
||||
github.com/go-logr/logr v1.2.4 // indirect
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.19.5 // indirect
|
||||
github.com/go-openapi/jsonreference v0.19.6 // indirect
|
||||
github.com/go-openapi/spec v0.20.4 // indirect
|
||||
github.com/go-openapi/swag v0.19.15 // indirect
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
||||
github.com/google/go-cmp v0.5.9 // indirect
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.0.0 // indirect
|
||||
github.com/jaypipes/pcidb v1.0.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/klauspost/compress v1.16.3 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/mailru/easyjson v0.7.6 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.18 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.14 // indirect
|
||||
github.com/mitchellh/go-homedir v1.1.0 // indirect
|
||||
github.com/nomic/gpt4all/gpt4all-bindings/golang v0.0.0-00010101000000-000000000000 // indirect
|
||||
github.com/philhofer/fwd v1.1.2 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
|
||||
@@ -50,9 +60,8 @@ require (
|
||||
github.com/valyala/tcplisten v1.0.0 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
||||
golang.org/x/net v0.9.0 // indirect
|
||||
golang.org/x/sys v0.7.0 // indirect
|
||||
golang.org/x/sys v0.8.0 // indirect
|
||||
golang.org/x/text v0.9.0 // indirect
|
||||
golang.org/x/tools v0.8.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
howett.net/plist v1.0.0 // indirect
|
||||
)
|
||||
|
||||
124
go.sum
124
go.sum
@@ -1,5 +1,9 @@
|
||||
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
|
||||
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
|
||||
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
||||
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
||||
github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
|
||||
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
|
||||
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
@@ -12,30 +16,49 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d h1:lSHwlYf1H4WAWYgf7rjEVTGen1qmigUq2Egpu8mnQiY=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230502223004-0a3db3d72e7d/go.mod h1:H6QBF7/Tz6DAEBDXQged4H1BvsmqY/K5FG9wQRGa01g=
|
||||
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
|
||||
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
||||
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
|
||||
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2 h1:YNbUAyIRtaLODitigJU1EM5ubmMu5FmHtYAayJD6Vbg=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a h1:MlyiDLNCM/wjbv8U5Elj18NvaAgl61SGiRUpqQz5dfs=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
||||
github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
|
||||
github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
|
||||
github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
|
||||
github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
|
||||
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
|
||||
github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
|
||||
github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
|
||||
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ6JsAm9Q1A8I8j9YfNy10bmIfwOiyGyU5wQ=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
||||
github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
|
||||
github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
|
||||
github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
|
||||
github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs=
|
||||
github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns=
|
||||
github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M=
|
||||
github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I=
|
||||
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
|
||||
github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
|
||||
github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
|
||||
github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f h1:GW8RQa1RVeDF1dOuAP/y6xWVC+BRtf9tJOuEza6Asbg=
|
||||
github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea h1:8Isk9D+Auth5OuXVAQPC3MO+5zF/2S7mvs2JZLw6a+8=
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557 h1:LD66fKtvP2lmyuuKL8pBat/pVTKUbLs3L5fM/5lyi4w=
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6 h1:XshpypO6ekU09CI19vuzke2a1Es1lV5ZaxA7CUehu0E=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230430075552-377fd245eae2 h1:CYQRCbOfYtC77OxweAyrdxSVwoLIM/EdZ6Ij+xBzta8=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230430075552-377fd245eae2/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230502121737-8ceb6167e405 h1:pbIxJ/eiL1Irdprxk/mquaxjR1XDGCE+7CT9BGJNRaY=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230502121737-8ceb6167e405/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b h1:qqxrjY8fYDXQahmCMTCACahm1tbiqHLPUHALkFLyBfo=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gofiber/fiber/v2 v2.44.0 h1:Z90bEvPcJM5GFJnu1py0E1ojoerkyew3iiNJ78MQCM8=
|
||||
github.com/gofiber/fiber/v2 v2.44.0/go.mod h1:VTMtb/au8g01iqvHyaCzftuM/xmZgKOZCtFzz6CdV9w=
|
||||
github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s=
|
||||
github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
|
||||
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
@@ -48,16 +71,19 @@ github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brv
|
||||
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
|
||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||
github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
|
||||
github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
|
||||
github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8=
|
||||
github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk=
|
||||
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
|
||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||
github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
|
||||
github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
|
||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||
github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
|
||||
github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
@@ -67,23 +93,20 @@ github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp9
|
||||
github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
|
||||
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||
github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
|
||||
github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
|
||||
github.com/onsi/ginkgo/v2 v2.9.3 h1:5X2vl/isiKqkrOYjiaGgp3JQOcLV59g5o5SuTMqCcxU=
|
||||
github.com/onsi/ginkgo/v2 v2.9.3/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
|
||||
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
|
||||
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
|
||||
github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
|
||||
github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
|
||||
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
|
||||
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
|
||||
github.com/otiai10/mint v1.4.1 h1:HOVBfKP1oXIc0wWo9hZ8JLdZtyCPWqjvmFDuVZ0yv2Y=
|
||||
github.com/otiai10/copy v1.11.0 h1:OKBD80J/mLBrwnzXqGtFCzprFSGioo30JcmR4APsNwc=
|
||||
github.com/otiai10/copy v1.11.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww=
|
||||
github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks=
|
||||
github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg=
|
||||
github.com/otiai10/openaigo v1.1.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg=
|
||||
github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
|
||||
github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
|
||||
github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
@@ -94,20 +117,21 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
|
||||
github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/sashabaranov/go-openai v1.9.1 h1:3N52HkJKo9Zlo/oe1AVv5ZkCOny0ra58/ACvAxkN3MM=
|
||||
github.com/sashabaranov/go-openai v1.9.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sashabaranov/go-openai v1.9.2 h1:7//Glm9EiMBjelgmBb00yYzKYqm1jckHWWTDLahfeuQ=
|
||||
github.com/sashabaranov/go-openai v1.9.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
|
||||
github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9MyPBfZM=
|
||||
github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
|
||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
|
||||
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk=
|
||||
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||
github.com/swaggo/swag v1.16.1 h1:fTNRhKstPKxcnoKsytm4sahr8FaYzUcT7i1/3nd/fBg=
|
||||
github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKikGxto=
|
||||
github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
|
||||
github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
|
||||
github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
|
||||
@@ -130,14 +154,14 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
|
||||
golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
|
||||
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
|
||||
golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
|
||||
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
@@ -146,10 +170,10 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ
|
||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
@@ -158,17 +182,16 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
|
||||
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
|
||||
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
|
||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
@@ -176,8 +199,6 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn
|
||||
golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
|
||||
golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
|
||||
golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
|
||||
golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y=
|
||||
golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
@@ -185,12 +206,13 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
||||
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
|
||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
|
||||
howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
|
||||
|
||||
8
main.go
8
main.go
@@ -62,6 +62,12 @@ func main() {
|
||||
EnvVars: []string{"CONTEXT_SIZE"},
|
||||
Value: 512,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: "upload-limit",
|
||||
DefaultText: "Default upload-limit. MB",
|
||||
EnvVars: []string{"UPLOAD_LIMIT"},
|
||||
Value: 15,
|
||||
},
|
||||
},
|
||||
Description: `
|
||||
LocalAI is a drop-in replacement OpenAI API which runs inference locally.
|
||||
@@ -81,7 +87,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
|
||||
Copyright: "go-skynet authors",
|
||||
Action: func(ctx *cli.Context) error {
|
||||
fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
|
||||
return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
|
||||
return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("upload-limit"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
182
pkg/model/initializers.go
Normal file
182
pkg/model/initializers.go
Normal file
@@ -0,0 +1,182 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
rwkv "github.com/donomii/go-rwkv.cpp"
|
||||
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
bloomz "github.com/go-skynet/bloomz.cpp"
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
const tokenizerSuffix = ".tokenizer.json"
|
||||
|
||||
const (
|
||||
LlamaBackend = "llama"
|
||||
BloomzBackend = "bloomz"
|
||||
StarcoderBackend = "starcoder"
|
||||
StableLMBackend = "stablelm"
|
||||
DollyBackend = "dolly"
|
||||
RedPajamaBackend = "redpajama"
|
||||
GPTNeoXBackend = "gptneox"
|
||||
ReplitBackend = "replit"
|
||||
Gpt2Backend = "gpt2"
|
||||
Gpt4AllLlamaBackend = "gpt4all-llama"
|
||||
Gpt4AllMptBackend = "gpt4all-mpt"
|
||||
Gpt4AllJBackend = "gpt4all-j"
|
||||
BertEmbeddingsBackend = "bert-embeddings"
|
||||
RwkvBackend = "rwkv"
|
||||
WhisperBackend = "whisper"
|
||||
)
|
||||
|
||||
var backends []string = []string{
|
||||
LlamaBackend,
|
||||
Gpt4AllLlamaBackend,
|
||||
Gpt4AllMptBackend,
|
||||
Gpt4AllJBackend,
|
||||
Gpt2Backend,
|
||||
WhisperBackend,
|
||||
RwkvBackend,
|
||||
BloomzBackend,
|
||||
StableLMBackend,
|
||||
DollyBackend,
|
||||
RedPajamaBackend,
|
||||
GPTNeoXBackend,
|
||||
ReplitBackend,
|
||||
BertEmbeddingsBackend,
|
||||
StarcoderBackend,
|
||||
}
|
||||
|
||||
var starCoder = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewStarcoder(modelFile)
|
||||
}
|
||||
|
||||
var redPajama = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewRedPajama(modelFile)
|
||||
}
|
||||
|
||||
var dolly = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewDolly(modelFile)
|
||||
}
|
||||
|
||||
var gptNeoX = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewGPTNeoX(modelFile)
|
||||
}
|
||||
|
||||
var replit = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewReplit(modelFile)
|
||||
}
|
||||
|
||||
var stableLM = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewStableLM(modelFile)
|
||||
}
|
||||
|
||||
var bertEmbeddings = func(modelFile string) (interface{}, error) {
|
||||
return bert.New(modelFile)
|
||||
}
|
||||
|
||||
var bloomzLM = func(modelFile string) (interface{}, error) {
|
||||
return bloomz.New(modelFile)
|
||||
}
|
||||
var gpt2LM = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.New(modelFile)
|
||||
}
|
||||
|
||||
var whisperModel = func(modelFile string) (interface{}, error) {
|
||||
return whisper.New(modelFile)
|
||||
}
|
||||
|
||||
func llamaLM(opts ...llama.ModelOption) func(string) (interface{}, error) {
|
||||
return func(s string) (interface{}, error) {
|
||||
return llama.New(s, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
func gpt4allLM(opts ...gpt4all.ModelOption) func(string) (interface{}, error) {
|
||||
return func(s string) (interface{}, error) {
|
||||
return gpt4all.New(s, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) {
|
||||
return func(s string) (interface{}, error) {
|
||||
model := rwkv.LoadFiles(s, tokenFile, threads)
|
||||
if model == nil {
|
||||
return nil, fmt.Errorf("could not load model")
|
||||
}
|
||||
return model, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
|
||||
switch strings.ToLower(backendString) {
|
||||
case LlamaBackend:
|
||||
return ml.LoadModel(modelFile, llamaLM(llamaOpts...))
|
||||
case BloomzBackend:
|
||||
return ml.LoadModel(modelFile, bloomzLM)
|
||||
case StableLMBackend:
|
||||
return ml.LoadModel(modelFile, stableLM)
|
||||
case DollyBackend:
|
||||
return ml.LoadModel(modelFile, dolly)
|
||||
case RedPajamaBackend:
|
||||
return ml.LoadModel(modelFile, redPajama)
|
||||
case Gpt2Backend:
|
||||
return ml.LoadModel(modelFile, gpt2LM)
|
||||
case GPTNeoXBackend:
|
||||
return ml.LoadModel(modelFile, gptNeoX)
|
||||
case ReplitBackend:
|
||||
return ml.LoadModel(modelFile, replit)
|
||||
case StarcoderBackend:
|
||||
return ml.LoadModel(modelFile, starCoder)
|
||||
case Gpt4AllLlamaBackend:
|
||||
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)))
|
||||
case Gpt4AllMptBackend:
|
||||
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType)))
|
||||
case Gpt4AllJBackend:
|
||||
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType)))
|
||||
case BertEmbeddingsBackend:
|
||||
return ml.LoadModel(modelFile, bertEmbeddings)
|
||||
case RwkvBackend:
|
||||
return ml.LoadModel(modelFile, rwkvLM(modelFile+tokenizerSuffix, threads))
|
||||
case WhisperBackend:
|
||||
return ml.LoadModel(modelFile, whisperModel)
|
||||
default:
|
||||
return nil, fmt.Errorf("backend unsupported: %s", backendString)
|
||||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (interface{}, error) {
|
||||
log.Debug().Msgf("Loading models greedly")
|
||||
|
||||
ml.mu.Lock()
|
||||
m, exists := ml.models[modelFile]
|
||||
if exists {
|
||||
ml.mu.Unlock()
|
||||
return m, nil
|
||||
}
|
||||
ml.mu.Unlock()
|
||||
var err error
|
||||
|
||||
for _, b := range backends {
|
||||
if b == BloomzBackend || b == WhisperBackend || b == RwkvBackend { // do not autoload bloomz/whisper/rwkv
|
||||
continue
|
||||
}
|
||||
log.Debug().Msgf("[%s] Attempting to load", b)
|
||||
model, modelerr := ml.BackendLoader(b, modelFile, llamaOpts, threads)
|
||||
if modelerr == nil && model != nil {
|
||||
log.Debug().Msgf("[%s] Loads OK", b)
|
||||
return model, nil
|
||||
} else if modelerr != nil {
|
||||
err = multierror.Append(err, modelerr)
|
||||
log.Debug().Msgf("[%s] Fails: %s", b, modelerr.Error())
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
||||
}
|
||||
@@ -11,34 +11,21 @@ import (
|
||||
"text/template"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
rwkv "github.com/donomii/go-rwkv.cpp"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
)
|
||||
|
||||
type ModelLoader struct {
|
||||
ModelPath string
|
||||
mu sync.Mutex
|
||||
|
||||
models map[string]*llama.LLama
|
||||
gptmodels map[string]*gptj.GPTJ
|
||||
gpt2models map[string]*gpt2.GPT2
|
||||
gptstablelmmodels map[string]*gpt2.StableLM
|
||||
rwkv map[string]*rwkv.RwkvState
|
||||
promptsTemplates map[string]*template.Template
|
||||
// TODO: this needs generics
|
||||
models map[string]interface{}
|
||||
promptsTemplates map[string]*template.Template
|
||||
}
|
||||
|
||||
func NewModelLoader(modelPath string) *ModelLoader {
|
||||
return &ModelLoader{
|
||||
ModelPath: modelPath,
|
||||
gpt2models: make(map[string]*gpt2.GPT2),
|
||||
gptmodels: make(map[string]*gptj.GPTJ),
|
||||
gptstablelmmodels: make(map[string]*gpt2.StableLM),
|
||||
models: make(map[string]*llama.LLama),
|
||||
rwkv: make(map[string]*rwkv.RwkvState),
|
||||
promptsTemplates: make(map[string]*template.Template),
|
||||
ModelPath: modelPath,
|
||||
models: make(map[string]interface{}),
|
||||
promptsTemplates: make(map[string]*template.Template),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,10 +68,9 @@ func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string,
|
||||
if exists {
|
||||
m = t
|
||||
}
|
||||
|
||||
}
|
||||
if m == nil {
|
||||
return "", nil
|
||||
return "", fmt.Errorf("failed loading any template")
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
@@ -124,143 +110,11 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
|
||||
func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (interface{}, error)) (interface{}, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gpt2.NewStableLM(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.gptstablelmmodels[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.gpt2models[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gpt2.New(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.gpt2models[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.gptmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gptj.New(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.gptmodels[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (*rwkv.RwkvState, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
log.Debug().Msgf("Loading model name: %s", modelName)
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.rwkv[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
tokenPath := filepath.Join(ml.ModelPath, tokenFile)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model := rwkv.LoadFiles(modelFile, tokenPath, threads)
|
||||
if model == nil {
|
||||
return nil, fmt.Errorf("could not load model")
|
||||
}
|
||||
|
||||
ml.rwkv[modelName] = model
|
||||
return model, nil
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
log.Debug().Msgf("Loading model name: %s", modelName)
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.models[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
@@ -270,7 +124,7 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := llama.New(modelFile, opts...)
|
||||
model, err := loader(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -281,5 +135,5 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
|
||||
}
|
||||
|
||||
ml.models[modelName] = model
|
||||
return model, err
|
||||
return model, nil
|
||||
}
|
||||
|
||||
90
pkg/whisper/whisper.go
Normal file
90
pkg/whisper/whisper.go
Normal file
@@ -0,0 +1,90 @@
|
||||
package whisper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
wav "github.com/go-audio/wav"
|
||||
)
|
||||
|
||||
func sh(c string) (string, error) {
|
||||
cmd := exec.Command("/bin/sh", "-c", c)
|
||||
cmd.Env = os.Environ()
|
||||
o, err := cmd.CombinedOutput()
|
||||
return string(o), err
|
||||
}
|
||||
|
||||
// AudioToWav converts audio to wav for transcribe. It bashes out to ffmpeg
|
||||
// TODO: use https://github.com/mccoyst/ogg?
|
||||
func audioToWav(src, dst string) error {
|
||||
out, err := sh(fmt.Sprintf("ffmpeg -i %s -format s16le -ar 16000 -ac 1 -acodec pcm_s16le %s", src, dst))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error: %w out: %s", err, out)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func Transcript(model whisper.Model, audiopath, language string, threads uint) (string, error) {
|
||||
|
||||
dir, err := os.MkdirTemp("", "whisper")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
convertedPath := filepath.Join(dir, "converted.wav")
|
||||
|
||||
if err := audioToWav(audiopath, convertedPath); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Open samples
|
||||
fh, err := os.Open(convertedPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Read samples
|
||||
d := wav.NewDecoder(fh)
|
||||
buf, err := d.FullPCMBuffer()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
data := buf.AsFloat32Buffer().Data
|
||||
|
||||
// Process samples
|
||||
context, err := model.NewContext()
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
||||
}
|
||||
|
||||
context.SetThreads(threads)
|
||||
|
||||
if language != "" {
|
||||
context.SetLanguage(language)
|
||||
} else {
|
||||
context.SetLanguage("auto")
|
||||
}
|
||||
|
||||
if err := context.Process(data, nil); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
text := ""
|
||||
for {
|
||||
segment, err := context.NextSegment()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
text += segment.Text
|
||||
}
|
||||
|
||||
return text, nil
|
||||
}
|
||||
12
tests/fixtures/config.yaml
vendored
12
tests/fixtures/config.yaml
vendored
@@ -1,8 +1,10 @@
|
||||
- name: list1
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
top_p: 80
|
||||
top_k: 0.9
|
||||
temperature: 0.1
|
||||
context_size: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
@@ -14,9 +16,11 @@
|
||||
chat: ggml-gpt4all-j
|
||||
- name: list2
|
||||
parameters:
|
||||
top_p: 80
|
||||
top_k: 0.9
|
||||
temperature: 0.1
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
context_size: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
|
||||
6
tests/fixtures/embeddings.yaml
vendored
Normal file
6
tests/fixtures/embeddings.yaml
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: bert
|
||||
threads: 14
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
6
tests/fixtures/gpt4.yaml
vendored
6
tests/fixtures/gpt4.yaml
vendored
@@ -1,8 +1,10 @@
|
||||
name: gpt4all
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
top_p: 80
|
||||
top_k: 0.9
|
||||
temperature: 0.1
|
||||
context_size: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
|
||||
6
tests/fixtures/gpt4_2.yaml
vendored
6
tests/fixtures/gpt4_2.yaml
vendored
@@ -1,8 +1,10 @@
|
||||
name: gpt4all-2
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 1024
|
||||
threads: 5
|
||||
top_p: 80
|
||||
top_k: 0.9
|
||||
temperature: 0.1
|
||||
context_size: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
|
||||
4
tests/fixtures/whisper.yaml
vendored
Normal file
4
tests/fixtures/whisper.yaml
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
name: whisper-1
|
||||
backend: whisper
|
||||
parameters:
|
||||
model: whisper-en
|
||||
Reference in New Issue
Block a user