Compare commits

...

16 Commits

Author SHA1 Message Date
Ettore Di Giacinto
9decd0813c feat: update go-gpt2 (#359)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-23 21:47:47 +02:00
Ettore Di Giacinto
43d3fb3eba ci: add binary releases pipelines (#358) 2023-05-23 17:12:48 +02:00
Ettore Di Giacinto
f5f8c687be examples: add privateGPT example (#355) 2023-05-23 10:32:34 +02:00
ci-robbot [bot]
9e5cd0f10b ⬆️ Update nomic-ai/gpt4all (#348)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-23 09:16:56 +02:00
renovate[bot]
231a3e7c02 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 2ce2220 (#351)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-23 00:59:48 +02:00
renovate[bot]
57172e2e30 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to 5ca8767 (#350)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-23 00:06:29 +02:00
Ettore Di Giacinto
043399dd07 fix: re-enable start API message (#349)
Signed-off-by: mudler <mudler@mocaccino.org>
2023-05-23 00:06:13 +02:00
renovate[bot]
6b19356740 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to c8c95ab (#344)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-22 19:04:21 +02:00
ci-robbot [bot]
1cbe6a7067 ⬆️ Update nomic-ai/gpt4all (#345)
Signed-off-by: GitHub <noreply@github.com>
Co-authored-by: mudler <mudler@users.noreply.github.com>
2023-05-22 19:02:56 +02:00
Ettore Di Giacinto
2912f9870f Update README.md 2023-05-22 00:13:16 +02:00
Ettore Di Giacinto
9630be56e1 fix: make sure ca-certificates is present in the container images (#342) 2023-05-21 15:24:22 +02:00
Robert Hambrock
4aa78843c0 fix: spec compliant instantiation and termination of streams (#341) 2023-05-21 15:24:04 +02:00
renovate[bot]
b36d9f3776 fix(deps): update github.com/nomic-ai/gpt4all/gpt4all-bindings/golang digest to aba1147 (#333)
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
2023-05-21 14:38:52 +02:00
Ettore Di Giacinto
6f54cab3f0 feat: allow to set cors (#339) 2023-05-21 14:38:25 +02:00
Ettore Di Giacinto
ed5df1e68e examples: remove threads from example models (#337) 2023-05-21 12:25:24 +02:00
mudler
3c07e11e73 docs: update README 2023-05-21 00:45:24 +02:00
27 changed files with 495 additions and 266 deletions

24
.github/release.yml vendored Normal file
View File

@@ -0,0 +1,24 @@
# .github/release.yml
changelog:
exclude:
labels:
- ignore-for-release
categories:
- title: Breaking Changes 🛠
labels:
- Semver-Major
- breaking-change
- title: "Bug fixes :bug:"
labels:
- bug
- title: Exciting New Features 🎉
labels:
- Semver-Minor
- enhancement
- title: 👒 Dependencies
labels:
- dependencies
- title: Other Changes
labels:
- "*"

View File

@@ -12,8 +12,8 @@ jobs:
- repository: "go-skynet/go-llama.cpp"
variable: "GOLLAMA_VERSION"
branch: "master"
- repository: "go-skynet/go-gpt2.cpp"
variable: "GOGPT2_VERSION"
- repository: "go-skynet/go-ggml-transformers.cpp"
variable: "GOGGMLTRANSFORMERS_VERSION"
branch: "master"
- repository: "donomii/go-rwkv.cpp"
variable: "RWKV_VERSION"

84
.github/workflows/release.yaml vendored Normal file
View File

@@ -0,0 +1,84 @@
name: Build and Release
on: push
permissions:
contents: write
jobs:
build-linux:
strategy:
matrix:
include:
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON'
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v3
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
- name: Build
id: build
env:
CMAKE_ARGS: "${{ matrix.define }}"
BUILD_ID: "${{ matrix.build }}"
run: |
make dist
- uses: actions/upload-artifact@v3
with:
name: ${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
build-macOS:
strategy:
matrix:
include:
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON'
runs-on: macOS-latest
steps:
- name: Clone
uses: actions/checkout@v3
with:
submodules: true
- name: Dependencies
run: |
brew update
brew install sdl2 ffmpeg
- name: Build
id: build
env:
CMAKE_ARGS: "${{ matrix.define }}"
BUILD_ID: "${{ matrix.build }}"
run: |
make dist
- uses: actions/upload-artifact@v3
with:
name: ${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*

View File

@@ -1,26 +0,0 @@
name: goreleaser
on:
push:
tags:
- 'v*'
jobs:
goreleaser:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.18
- name: Run GoReleaser
uses: goreleaser/goreleaser-action@v4
with:
version: latest
args: release --clean
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

2
.gitignore vendored
View File

@@ -16,5 +16,7 @@ local-ai
models/*
test-models/
release/
# just in case
.DS_Store

View File

@@ -1,15 +0,0 @@
# Make sure to check the documentation at http://goreleaser.com
project_name: local-ai
builds:
- ldflags:
- -w -s
env:
- CGO_ENABLED=0
goos:
- linux
- darwin
- windows
goarch:
- amd64
- arm64
binary: '{{ .ProjectName }}'

View File

@@ -3,7 +3,7 @@ ARG BUILD_TYPE=
FROM golang:$GO_VERSION
ENV REBUILD=true
WORKDIR /build
RUN apt-get update && apt-get install -y cmake libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5
RUN apt-get update && apt-get install -y cmake libgomp1 libopenblas-dev libopenblas-base libopencv-dev libopencv-core-dev libopencv-core4.5 ca-certificates
COPY . .
RUN ln -s /usr/include/opencv4/opencv2/ /usr/include/opencv2
RUN make build

View File

@@ -11,5 +11,6 @@ RUN make build
FROM debian:$DEBIAN_VERSION
COPY --from=builder /build/local-ai /usr/bin/local-ai
RUN apt-get update && apt-get install -y ca-certificates
EXPOSE 8080
ENTRYPOINT [ "/usr/bin/local-ai" ]

View File

@@ -5,8 +5,8 @@ BINARY_NAME=local-ai
GOLLAMA_VERSION?=ccf23adfb278c0165d388389a5d60f3fe38e4854
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
GPT4ALL_VERSION?=914519e772fd78c15691dcd0b8bac60d6af514ec
GOGPT2_VERSION?=7bff56f0224502c1c9ed6258d2a17e8084628827
GPT4ALL_VERSION?=8119ff4df0a99bde44255db2b8c7290b5582ac2b
GOGGMLTRANSFORMERS_VERSION?=14fd6c9
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
WHISPER_CPP_VERSION?=041be06d5881d3c759cc4ed45d655804361237cd
@@ -17,17 +17,20 @@ CGO_LDFLAGS?=
CUDA_LIBPATH?=/usr/local/cuda/lib64/
STABLEDIFFUSION_VERSION?=c0748eca3642d58bcf9521108bcee46959c647dc
GO_TAGS?=
BUILD_ID?=git
OPTIONAL_TARGETS?=
OS := $(shell uname -s)
ARCH := $(shell uname -m)
GREEN := $(shell tput -Txterm setaf 2)
YELLOW := $(shell tput -Txterm setaf 3)
WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
ifeq ($(BUILD_TYPE),openblas)
CGO_LDFLAGS+=-lopenblas
@@ -114,23 +117,23 @@ gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
## CEREBRAS GPT
go-gpt2:
git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
go-ggml-transformers:
git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp go-ggml-transformers
cd go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
@find ./go-ggml-transformers -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
go-gpt2/libgpt2.a: go-gpt2
$(MAKE) -C go-gpt2 libgpt2.a
go-ggml-transformers/libtransformers.a: go-ggml-transformers
$(MAKE) -C go-ggml-transformers libtransformers.a
whisper.cpp:
git clone https://github.com/ggerganov/whisper.cpp.git
@@ -152,21 +155,21 @@ go-llama/libbinding.a: go-llama
replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
prepare-sources: go-llama go-ggml-transformers gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
$(GOCMD) mod download
## GENERIC
rebuild: ## Rebuilds the project
$(MAKE) -C go-llama clean
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
$(MAKE) -C go-gpt2 clean
$(MAKE) -C go-ggml-transformers clean
$(MAKE) -C go-rwkv clean
$(MAKE) -C whisper.cpp clean
$(MAKE) -C go-stable-diffusion clean
@@ -174,18 +177,19 @@ rebuild: ## Rebuilds the project
$(MAKE) -C bloomz clean
$(MAKE) build
prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building
prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building
clean: ## Remove build related file
rm -fr ./go-llama
rm -rf ./gpt4all
rm -rf ./go-stable-diffusion
rm -rf ./go-gpt2
rm -rf ./go-ggml-transformers
rm -rf ./go-rwkv
rm -rf ./go-bert
rm -rf ./bloomz
rm -rf ./whisper.cpp
rm -rf $(BINARY_NAME)
rm -rf release/
## Build:
@@ -195,6 +199,10 @@ build: prepare ## Build the project
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -tags "$(GO_TAGS)" -x -o $(BINARY_NAME) ./
dist: build
mkdir -p release
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
generic-build: ## Build the project using generic
BUILD_TYPE="generic" $(MAKE) build
@@ -205,7 +213,7 @@ run: prepare ## run local-ai
test-models/testmodel:
mkdir test-models
mkdir test-dir
wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
wget https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav

View File

@@ -23,7 +23,7 @@ In a nutshell:
LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
See the [usage](https://github.com/go-skynet/LocalAI#usage) and [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) sections to learn how to use LocalAI.
See the [usage](https://github.com/go-skynet/LocalAI#usage) and [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) sections to learn how to use LocalAI. For a list of curated models check out the [model gallery](https://github.com/go-skynet/model-gallery).
### How does it work?
@@ -39,6 +39,7 @@ LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https
## News
- 21-05-2023: __v1.14.0__ released. Minor updates to the `/models/apply` endpoint, `llama.cpp` backend updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. `gpt4all` is still compatible with the old format.
- 19-05-2023: __v1.13.0__ released! 🔥🔥 updates to the `gpt4all` and `llama` backend, consolidated CUDA support ( https://github.com/go-skynet/LocalAI/pull/310 thanks to @bubthegreat and @Thireus ), preliminar support for [installing models via API](https://github.com/go-skynet/LocalAI#advanced-prepare-models-using-the-api).
- 17-05-2023: __v1.12.0__ released! 🔥🔥 Minor fixes, plus CUDA (https://github.com/go-skynet/LocalAI/pull/258) support for `llama.cpp`-compatible models and image generation (https://github.com/go-skynet/LocalAI/pull/272).
- 16-05-2023: 🔥🔥🔥 Experimental support for CUDA (https://github.com/go-skynet/LocalAI/pull/258) in the `llama.cpp` backend and Stable diffusion CPU image generation (https://github.com/go-skynet/LocalAI/pull/272) in `master`.
@@ -128,13 +129,13 @@ Depending on the model you are attempting to run might need more RAM or CPU reso
| [gpt4all-llama](https://github.com/nomic-ai/gpt4all) | Vicuna, Alpaca, LLaMa | yes | no | no | yes |
| [gpt4all-mpt](https://github.com/nomic-ai/gpt4all) | MPT | yes | no | no | yes |
| [gpt4all-j](https://github.com/nomic-ai/gpt4all) | GPT4ALL-J | yes | no | no | yes |
| [gpt2](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | GPT/NeoX, Cerebras | yes | no | no | no |
| [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | Dolly | yes | no | no | no |
| [redpajama](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | RedPajama | yes | no | no | no |
| [stableLM](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | StableLM GPT/NeoX | yes | no | no | no |
| [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | Replit | yes | no | no | no |
| [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | GPT NeoX | yes | no | no | no |
| [starcoder](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-gpt2.cpp)) | Starcoder | yes | no | no | no |
| [gpt2](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPT/NeoX, Cerebras | yes | no | no | no |
| [dolly](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Dolly | yes | no | no | no |
| [gptj](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPTJ | yes | no | no | no |
| [mpt](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | MPT | yes | no | no | no |
| [replit](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Replit | yes | no | no | no |
| [gptneox](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | GPT NeoX, RedPajama, StableLM | yes | no | no | no |
| [starcoder](https://github.com/ggerganov/ggml) ([binding](https://github.com/go-skynet/go-ggml-transformers.cpp)) | Starcoder | yes | no | no | no |
| [bloomz](https://github.com/NouamaneTazi/bloomz.cpp) ([binding](https://github.com/go-skynet/bloomz.cpp)) | Bloom | yes | no | no | no |
| [rwkv](https://github.com/saharNooby/rwkv.cpp) ([binding](https://github.com/donomii/go-rw)) | rwkv | yes | no | no | yes |
| [bert](https://github.com/skeskinen/bert.cpp) ([binding](https://github.com/go-skynet/go-bert.cpp) | bert | no | no | yes | no |
@@ -1044,7 +1045,7 @@ MIT
- [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
- [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp)
- [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp)
- [go-skynet/go-ggml-transformers.cpp](https://github.com/go-skynet/go-ggml-transformers.cpp)
- [go-skynet/go-bert.cpp](https://github.com/go-skynet/go-bert.cpp)
- [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp)

View File

@@ -1,10 +1,8 @@
package api
import (
"context"
"errors"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cors"
"github.com/gofiber/fiber/v2/middleware/logger"
@@ -13,16 +11,18 @@ import (
"github.com/rs/zerolog/log"
)
func App(c context.Context, configFile string, loader *model.ModelLoader, uploadLimitMB, threads, ctxSize int, f16 bool, debug, disableMessage bool, imageDir string) *fiber.App {
func App(opts ...AppOption) *fiber.App {
options := newOptions(opts...)
zerolog.SetGlobalLevel(zerolog.InfoLevel)
if debug {
if options.debug {
zerolog.SetGlobalLevel(zerolog.DebugLevel)
}
// Return errors as JSON responses
app := fiber.New(fiber.Config{
BodyLimit: uploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
DisableStartupMessage: disableMessage,
BodyLimit: options.uploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
DisableStartupMessage: options.disableMessage,
// Override default error handler
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
// Status code defaults to 500
@@ -43,24 +43,24 @@ func App(c context.Context, configFile string, loader *model.ModelLoader, upload
},
})
if debug {
if options.debug {
app.Use(logger.New(logger.Config{
Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
}))
}
cm := NewConfigMerger()
if err := cm.LoadConfigs(loader.ModelPath); err != nil {
if err := cm.LoadConfigs(options.loader.ModelPath); err != nil {
log.Error().Msgf("error loading config files: %s", err.Error())
}
if configFile != "" {
if err := cm.LoadConfigFile(configFile); err != nil {
if options.configFile != "" {
if err := cm.LoadConfigFile(options.configFile); err != nil {
log.Error().Msgf("error loading config file: %s", err.Error())
}
}
if debug {
if options.debug {
for _, v := range cm.ListConfigs() {
cfg, _ := cm.GetConfig(v)
log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
@@ -68,46 +68,55 @@ func App(c context.Context, configFile string, loader *model.ModelLoader, upload
}
// Default middleware config
app.Use(recover.New())
app.Use(cors.New())
if options.cors {
if options.corsAllowOrigins == "" {
app.Use(cors.New())
} else {
app.Use(cors.New(cors.Config{
AllowOrigins: options.corsAllowOrigins,
}))
}
}
// LocalAI API endpoints
applier := newGalleryApplier(loader.ModelPath)
applier.start(c, cm)
app.Post("/models/apply", applyModelGallery(loader.ModelPath, cm, applier.C))
applier := newGalleryApplier(options.loader.ModelPath)
applier.start(options.context, cm)
app.Post("/models/apply", applyModelGallery(options.loader.ModelPath, cm, applier.C))
app.Get("/models/jobs/:uuid", getOpStatus(applier))
// openAI compatible API endpoint
// chat
app.Post("/v1/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/v1/chat/completions", chatEndpoint(cm, options))
app.Post("/chat/completions", chatEndpoint(cm, options))
// edit
app.Post("/v1/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/v1/edits", editEndpoint(cm, options))
app.Post("/edits", editEndpoint(cm, options))
// completion
app.Post("/v1/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/v1/completions", completionEndpoint(cm, options))
app.Post("/completions", completionEndpoint(cm, options))
// embeddings
app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/v1/embeddings", embeddingsEndpoint(cm, options))
app.Post("/embeddings", embeddingsEndpoint(cm, options))
app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, options))
// audio
app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, debug, loader, threads, ctxSize, f16))
app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, options))
// images
app.Post("/v1/images/generations", imageEndpoint(cm, debug, loader, imageDir))
app.Post("/v1/images/generations", imageEndpoint(cm, options))
if imageDir != "" {
app.Static("/generated-images", imageDir)
if options.imageDir != "" {
app.Static("/generated-images", options.imageDir)
}
// models
app.Get("/v1/models", listModels(loader, cm))
app.Get("/models", listModels(loader, cm))
app.Get("/v1/models", listModels(options.loader, cm))
app.Get("/models", listModels(options.loader, cm))
return app
}

View File

@@ -114,7 +114,7 @@ var _ = Describe("API test", func() {
modelLoader = model.NewModelLoader(tmpdir)
c, cancel = context.WithCancel(context.Background())
app = App(c, "", modelLoader, 15, 1, 512, false, true, true, "")
app = App(WithContext(c), WithModelLoader(modelLoader))
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
@@ -198,7 +198,7 @@ var _ = Describe("API test", func() {
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
c, cancel = context.WithCancel(context.Background())
app = App(c, "", modelLoader, 15, 1, 512, false, true, true, "")
app = App(WithContext(c), WithModelLoader(modelLoader))
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
@@ -316,7 +316,7 @@ var _ = Describe("API test", func() {
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
c, cancel = context.WithCancel(context.Background())
app = App(c, os.Getenv("CONFIG_FILE"), modelLoader, 5, 1, 512, false, true, true, "")
app = App(WithContext(c), WithModelLoader(modelLoader), WithConfigFile(os.Getenv("CONFIG_FILE")))
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")

View File

@@ -142,15 +142,15 @@ func defaultRequest(modelFile string) OpenAIRequest {
}
// https://platform.openai.com/docs/api-reference/completions
func completionEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
func completionEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, loader, true)
model, input, err := readInput(c, o.loader, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
@@ -166,7 +166,7 @@ func completionEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
var result []Choice
for _, i := range config.PromptStrings {
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
Input string
}{Input: i})
if err == nil {
@@ -174,7 +174,7 @@ func completionEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
log.Debug().Msgf("Template found, input modified to: %s", i)
}
r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
r, err := ComputeChoices(i, input, config, o.loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Text: s})
}, nil)
if err != nil {
@@ -199,14 +199,14 @@ func completionEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
}
// https://platform.openai.com/docs/api-reference/embeddings
func embeddingsEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
func embeddingsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, loader, true)
model, input, err := readInput(c, o.loader, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
@@ -216,7 +216,7 @@ func embeddingsEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
for i, s := range config.InputToken {
// get the model function to call for the result
embedFn, err := ModelEmbedding("", s, loader, *config)
embedFn, err := ModelEmbedding("", s, o.loader, *config)
if err != nil {
return err
}
@@ -230,7 +230,7 @@ func embeddingsEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
for i, s := range config.InputStrings {
// get the model function to call for the result
embedFn, err := ModelEmbedding(s, []int{}, loader, *config)
embedFn, err := ModelEmbedding(s, []int{}, o.loader, *config)
if err != nil {
return err
}
@@ -256,13 +256,20 @@ func embeddingsEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
}
}
func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
initialMessage := OpenAIResponse{
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{Delta: &Message{Role: "assistant"}}},
Object: "chat.completion.chunk",
}
responses <- initialMessage
ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
resp := OpenAIResponse{
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
Choices: []Choice{{Delta: &Message{Content: s}}},
Object: "chat.completion.chunk",
}
log.Debug().Msgf("Sending goroutine: %s", s)
@@ -273,12 +280,12 @@ func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
close(responses)
}
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, loader, true)
model, input, err := readInput(c, o.loader, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
@@ -319,7 +326,7 @@ func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
}
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
Input string
}{Input: predInput})
if err == nil {
@@ -330,7 +337,7 @@ func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
if input.Stream {
responses := make(chan OpenAIResponse)
go process(predInput, input, config, loader, responses)
go process(predInput, input, config, o.loader, responses)
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
@@ -339,13 +346,11 @@ func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
enc := json.NewEncoder(&buf)
enc.Encode(ev)
fmt.Fprintf(w, "event: data\n\n")
fmt.Fprintf(w, "data: %v\n\n", buf.String())
log.Debug().Msgf("Sending chunk: %s", buf.String())
fmt.Fprintf(w, "data: %v\n", buf.String())
w.Flush()
}
w.WriteString("event: data\n\n")
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{FinishReason: "stop"}},
@@ -353,12 +358,13 @@ func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
respData, _ := json.Marshal(resp)
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
w.WriteString("data: [DONE]\n\n")
w.Flush()
}))
return nil
}
result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
result, err := ComputeChoices(predInput, input, config, o.loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
}, nil)
if err != nil {
@@ -378,14 +384,14 @@ func chatEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
}
}
func editEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
func editEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, loader, true)
model, input, err := readInput(c, o.loader, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, loader, debug, threads, ctx, f16)
config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
@@ -401,7 +407,7 @@ func editEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
var result []Choice
for _, i := range config.InputStrings {
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
Input string
Instruction string
}{Input: i})
@@ -410,7 +416,7 @@ func editEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
log.Debug().Msgf("Template found, input modified to: %s", i)
}
r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
r, err := ComputeChoices(i, input, config, o.loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Text: s})
}, nil)
if err != nil {
@@ -449,9 +455,9 @@ func editEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threa
*
*/
func imageEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, imageDir string) func(c *fiber.Ctx) error {
func imageEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
m, input, err := readInput(c, loader, false)
m, input, err := readInput(c, o.loader, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
@@ -461,7 +467,7 @@ func imageEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, imag
}
log.Debug().Msgf("Loading model: %+v", m)
config, input, err := readConfig(m, input, cm, loader, debug, 0, 0, false)
config, input, err := readConfig(m, input, cm, o.loader, o.debug, 0, 0, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
@@ -518,7 +524,7 @@ func imageEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, imag
tempDir := ""
if !b64JSON {
tempDir = imageDir
tempDir = o.imageDir
}
// Create a temporary file
outputFile, err := ioutil.TempFile(tempDir, "b64")
@@ -535,7 +541,7 @@ func imageEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, imag
baseURL := c.BaseURL()
fn, err := ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, loader, *config)
fn, err := ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, o.loader, *config)
if err != nil {
return err
}
@@ -574,14 +580,14 @@ func imageEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, imag
}
// https://platform.openai.com/docs/api-reference/audio/create
func transcriptEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
func transcriptEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
m, input, err := readInput(c, loader, false)
m, input, err := readInput(c, o.loader, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(m, input, cm, loader, debug, threads, ctx, f16)
config, input, err := readConfig(m, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
@@ -616,7 +622,7 @@ func transcriptEndpoint(cm *ConfigMerger, debug bool, loader *model.ModelLoader,
log.Debug().Msgf("Audio file copied to: %+v", dst)
whisperModel, err := loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads))
whisperModel, err := o.loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads))
if err != nil {
return err
}

108
api/options.go Normal file
View File

@@ -0,0 +1,108 @@
package api
import (
"context"
model "github.com/go-skynet/LocalAI/pkg/model"
)
type Option struct {
context context.Context
configFile string
loader *model.ModelLoader
uploadLimitMB, threads, ctxSize int
f16 bool
debug, disableMessage bool
imageDir string
cors bool
corsAllowOrigins string
}
type AppOption func(*Option)
func newOptions(o ...AppOption) *Option {
opt := &Option{
context: context.Background(),
uploadLimitMB: 15,
threads: 1,
ctxSize: 512,
debug: true,
disableMessage: true,
}
for _, oo := range o {
oo(opt)
}
return opt
}
func WithCors(b bool) AppOption {
return func(o *Option) {
o.cors = b
}
}
func WithCorsAllowOrigins(b string) AppOption {
return func(o *Option) {
o.corsAllowOrigins = b
}
}
func WithContext(ctx context.Context) AppOption {
return func(o *Option) {
o.context = ctx
}
}
func WithConfigFile(configFile string) AppOption {
return func(o *Option) {
o.configFile = configFile
}
}
func WithModelLoader(loader *model.ModelLoader) AppOption {
return func(o *Option) {
o.loader = loader
}
}
func WithUploadLimitMB(limit int) AppOption {
return func(o *Option) {
o.uploadLimitMB = limit
}
}
func WithThreads(threads int) AppOption {
return func(o *Option) {
o.threads = threads
}
}
func WithContextSize(ctxSize int) AppOption {
return func(o *Option) {
o.ctxSize = ctxSize
}
}
func WithF16(f16 bool) AppOption {
return func(o *Option) {
o.f16 = f16
}
}
func WithDebug(debug bool) AppOption {
return func(o *Option) {
o.debug = debug
}
}
func WithDisableMessage(disableMessage bool) AppOption {
return func(o *Option) {
o.disableMessage = disableMessage
}
}
func WithImageDir(imageDir string) AppOption {
return func(o *Option) {
o.imageDir = imageDir
}
}

View File

@@ -11,7 +11,7 @@ import (
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
"github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
llama "github.com/go-skynet/go-llama.cpp"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
)
@@ -243,23 +243,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
return response, nil
}
case *gpt2.GPTNeoX:
case *transformers.GPTNeoX:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
@@ -267,23 +267,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
predictOptions...,
)
}
case *gpt2.Replit:
case *transformers.Replit:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
@@ -291,23 +291,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
predictOptions...,
)
}
case *gpt2.Starcoder:
case *transformers.Starcoder:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
@@ -315,23 +315,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
predictOptions...,
)
}
case *gpt2.RedPajama:
case *transformers.MPT:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
@@ -359,23 +359,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
predictOptions...,
)
}
case *gpt2.StableLM:
case *transformers.GPTJ:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
@@ -383,23 +383,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
predictOptions...,
)
}
case *gpt2.Dolly:
case *transformers.Dolly:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
@@ -407,23 +407,23 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
predictOptions...,
)
}
case *gpt2.GPT2:
case *transformers.GPT2:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(c.Temperature),
gpt2.SetTopP(c.TopP),
gpt2.SetTopK(c.TopK),
gpt2.SetTokens(c.Maxtokens),
gpt2.SetThreads(c.Threads),
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(

View File

@@ -57,6 +57,14 @@ A full example on how to run RWKV models with LocalAI
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/)
### PrivateGPT
_by [@mudler](https://github.com/mudler)_
A full example on how to run PrivateGPT with LocalAI
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/privateGPT/)
### Slack bot
_by [@mudler](https://github.com/mudler)_

View File

@@ -5,7 +5,6 @@ parameters:
temperature: 0.2
top_p: 0.7
context_size: 1024
threads: 14
stopwords:
- "HUMAN:"
- "GPT:"

View File

@@ -5,7 +5,6 @@ parameters:
temperature: 0.2
top_p: 0.7
context_size: 1024
threads: 4
stopwords:
- "HUMAN:"
- "GPT:"

View File

@@ -0,0 +1,25 @@
# privateGPT
This example is a re-adaptation of https://github.com/imartinez/privateGPT to work with LocalAI and OpenAI endpoints. We have a fork with the changes required to work with privateGPT here https://github.com/go-skynet/privateGPT ( PR: https://github.com/imartinez/privateGPT/pull/408 ).
Follow the instructions in https://github.com/go-skynet/privateGPT:
```bash
git clone git@github.com:go-skynet/privateGPT.git
cd privateGPT
pip install -r requirements.txt
```
Rename `example.env` to `.env` and edit the variables appropriately.
This is an example `.env` file for LocalAI:
```
PERSIST_DIRECTORY=db
# Set to OpenAI here
MODEL_TYPE=OpenAI
EMBEDDINGS_MODEL_NAME=all-MiniLM-L6-v2
MODEL_N_CTX=1000
# LocalAI URL
OPENAI_API_BASE=http://localhost:8080/v1
```

View File

@@ -5,7 +5,6 @@ parameters:
temperature: 0.2
top_p: 0.7
context_size: 1024
threads: 14
stopwords:
- "HUMAN:"
- "GPT:"

View File

@@ -6,7 +6,6 @@ parameters:
max_tokens: 100
top_p: 0.8
context_size: 1024
threads: 14
backend: "rwkv"
cutwords:
- "Bob:.*"

5
go.mod
View File

@@ -8,14 +8,14 @@ require (
github.com/go-audio/wav v1.1.0
github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245
github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523150735-8bfcb3ea6127
github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278
github.com/gofiber/fiber/v2 v2.46.0
github.com/google/uuid v1.3.0
github.com/hashicorp/go-multierror v1.1.1
github.com/imdario/mergo v0.3.15
github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd
github.com/onsi/ginkgo/v2 v2.9.5
github.com/onsi/gomega v1.27.7
github.com/otiai10/openaigo v1.1.0
@@ -41,6 +41,7 @@ require (
github.com/go-openapi/jsonreference v0.19.6 // indirect
github.com/go-openapi/spec v0.20.4 // indirect
github.com/go-openapi/swag v0.19.15 // indirect
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 // indirect
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect

24
go.sum
View File

@@ -16,12 +16,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56 h1:s8/MZdicstKi5fn9D9mKGIQ/q6IWCYCk/BM68i8v51w=
github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d h1:uxKTbiRnplE2SubchneSf4NChtxLJtOy9VdHnQMT0d0=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230515153606-95b02d76b04d/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520170006-429b9785c080 h1:W3itqKpRX9FhheKiAxdmuOBy/mjDfMf2G1vcuFIYqZc=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520170006-429b9785c080/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881 h1:dafqVivljYk51VLFnnpTXJnfWDe637EobWZ1l8PyEf8=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
@@ -42,21 +36,11 @@ github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf h1:VJfSn8hIDE+K5+h38M3iAyFXrxpRExMKRdTk33UDxsw=
github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 h1:+3KPDf4Wv1VHOkzAfZnlj9qakLSYggTpm80AswhD/FU=
github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4/go.mod h1:VY0s5KoAI2jRCvQXKuDeEEe8KG7VaWifSNJSk+E1KtY=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245 h1:IcfYY5uH0DdDXEJKJ8bq0WZCd9guPPd3xllaWNy8LOk=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230512145559-7bff56f02245/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1 h1:i0oM2MERUgMIRmjOcv22TDQULxbmY8o9rZKLKKyWXLo=
github.com/go-skynet/go-llama.cpp v0.0.0-20230520082618-a298043ef5f1/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po=
github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 h1:/6QWh2oarU7iPSpXj/3bLlkKptyxjKTRrNtGUrh8vhI=
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s=
github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
github.com/gofiber/fiber/v2 v2.46.0 h1:wkkWotblsGVlLjXj2dpgKQAYHtXumsK/HyFugQM68Ns=
github.com/gofiber/fiber/v2 v2.46.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
@@ -95,12 +79,8 @@ github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp9
github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642 h1:KTkh3lOUsGqQyP4v+oa38sPFdrZtNnM4HaxTb3epdYs=
github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd h1:kMnZASxCNc8GsPuAV94tltEsfT6T+esuB+rgzdjwFVM=
github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230519014017-914519e772fd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=

22
main.go
View File

@@ -1,7 +1,6 @@
package main
import (
"context"
"fmt"
"os"
"path/filepath"
@@ -34,6 +33,14 @@ func main() {
Name: "debug",
EnvVars: []string{"DEBUG"},
},
&cli.BoolFlag{
Name: "cors",
EnvVars: []string{"CORS"},
},
&cli.StringFlag{
Name: "cors-allow-origins",
EnvVars: []string{"CORS_ALLOW_ORIGINS"},
},
&cli.IntFlag{
Name: "threads",
DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
@@ -94,7 +101,18 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
Copyright: "go-skynet authors",
Action: func(ctx *cli.Context) error {
fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
return api.App(context.Background(), ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("upload-limit"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false, ctx.String("image-path")).Listen(ctx.String("address"))
return api.App(
api.WithConfigFile(ctx.String("config-file")),
api.WithModelLoader(model.NewModelLoader(ctx.String("models-path"))),
api.WithContextSize(ctx.Int("context-size")),
api.WithDebug(ctx.Bool("debug")),
api.WithImageDir(ctx.String("image-path")),
api.WithF16(ctx.Bool("f16")),
api.WithDisableMessage(false),
api.WithCors(ctx.Bool("cors")),
api.WithCorsAllowOrigins(ctx.String("cors-allow-origins")),
api.WithThreads(ctx.Int("threads")),
api.WithUploadLimitMB(ctx.Int("upload-limit"))).Listen(ctx.String("address"))
},
}

View File

@@ -10,7 +10,7 @@ import (
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
bloomz "github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/hashicorp/go-multierror"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
@@ -23,9 +23,9 @@ const (
LlamaBackend = "llama"
BloomzBackend = "bloomz"
StarcoderBackend = "starcoder"
StableLMBackend = "stablelm"
GPTJBackend = "gptj"
DollyBackend = "dolly"
RedPajamaBackend = "redpajama"
MPTBackend = "mpt"
GPTNeoXBackend = "gptneox"
ReplitBackend = "replit"
Gpt2Backend = "gpt2"
@@ -43,41 +43,41 @@ var backends []string = []string{
Gpt4AllLlamaBackend,
Gpt4AllMptBackend,
Gpt4AllJBackend,
Gpt2Backend,
WhisperBackend,
RwkvBackend,
BloomzBackend,
StableLMBackend,
DollyBackend,
RedPajamaBackend,
ReplitBackend,
GPTNeoXBackend,
WhisperBackend,
BertEmbeddingsBackend,
GPTJBackend,
Gpt2Backend,
DollyBackend,
MPTBackend,
ReplitBackend,
StarcoderBackend,
BloomzBackend,
}
var starCoder = func(modelFile string) (interface{}, error) {
return gpt2.NewStarcoder(modelFile)
return transformers.NewStarcoder(modelFile)
}
var redPajama = func(modelFile string) (interface{}, error) {
return gpt2.NewRedPajama(modelFile)
var mpt = func(modelFile string) (interface{}, error) {
return transformers.NewMPT(modelFile)
}
var dolly = func(modelFile string) (interface{}, error) {
return gpt2.NewDolly(modelFile)
return transformers.NewDolly(modelFile)
}
var gptNeoX = func(modelFile string) (interface{}, error) {
return gpt2.NewGPTNeoX(modelFile)
return transformers.NewGPTNeoX(modelFile)
}
var replit = func(modelFile string) (interface{}, error) {
return gpt2.NewReplit(modelFile)
return transformers.NewReplit(modelFile)
}
var stableLM = func(modelFile string) (interface{}, error) {
return gpt2.NewStableLM(modelFile)
var gptJ = func(modelFile string) (interface{}, error) {
return transformers.NewGPTJ(modelFile)
}
var bertEmbeddings = func(modelFile string) (interface{}, error) {
@@ -87,8 +87,9 @@ var bertEmbeddings = func(modelFile string) (interface{}, error) {
var bloomzLM = func(modelFile string) (interface{}, error) {
return bloomz.New(modelFile)
}
var gpt2LM = func(modelFile string) (interface{}, error) {
return gpt2.New(modelFile)
var transformersLM = func(modelFile string) (interface{}, error) {
return transformers.New(modelFile)
}
var stableDiffusion = func(assetDir string) (interface{}, error) {
@@ -130,14 +131,14 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
return ml.LoadModel(modelFile, llamaLM(llamaOpts...))
case BloomzBackend:
return ml.LoadModel(modelFile, bloomzLM)
case StableLMBackend:
return ml.LoadModel(modelFile, stableLM)
case GPTJBackend:
return ml.LoadModel(modelFile, gptJ)
case DollyBackend:
return ml.LoadModel(modelFile, dolly)
case RedPajamaBackend:
return ml.LoadModel(modelFile, redPajama)
case MPTBackend:
return ml.LoadModel(modelFile, mpt)
case Gpt2Backend:
return ml.LoadModel(modelFile, gpt2LM)
return ml.LoadModel(modelFile, transformersLM)
case GPTNeoXBackend:
return ml.LoadModel(modelFile, gptNeoX)
case ReplitBackend:

View File

@@ -1,6 +1,5 @@
name: text-embedding-ada-002
parameters:
model: bert
threads: 14
backend: bert-embeddings
embeddings: true

View File

@@ -6,7 +6,6 @@ parameters:
max_tokens: 100
top_p: 0.8
context_size: 1024
threads: 14
backend: "rwkv"
cutwords:
- "Bob:.*"