mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 03:02:38 -05:00
Compare commits
125 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4413defca5 | ||
|
|
f359e1c6c4 | ||
|
|
1bc87d582d | ||
|
|
a86a383357 | ||
|
|
16f02c7b30 | ||
|
|
fe2706890c | ||
|
|
85f0f8227d | ||
|
|
59e3c02002 | ||
|
|
032dee256f | ||
|
|
6b5e2b2bf5 | ||
|
|
6fc303de87 | ||
|
|
6ad6e4873d | ||
|
|
d6d7391da8 | ||
|
|
11675932ac | ||
|
|
f02202e1e1 | ||
|
|
f8ee20991c | ||
|
|
e6db14e2f1 | ||
|
|
d00886abea | ||
|
|
4873d2bfa1 | ||
|
|
9f426578cf | ||
|
|
9d01b695a8 | ||
|
|
93829ab228 | ||
|
|
dd234f86d5 | ||
|
|
3daff6f1aa | ||
|
|
89dfa0f5fc | ||
|
|
bc03c492a0 | ||
|
|
f50a4c1454 | ||
|
|
d13d4d95ce | ||
|
|
428790ec06 | ||
|
|
4f551ce414 | ||
|
|
6ed7b10273 | ||
|
|
02979566ee | ||
|
|
cbdcc839f3 | ||
|
|
e1c8f087f4 | ||
|
|
3a90ea44a5 | ||
|
|
e55492475d | ||
|
|
07ec2e441d | ||
|
|
38d7e0b43c | ||
|
|
3411bfd00d | ||
|
|
7e5fe35ae4 | ||
|
|
8c8cf38d4d | ||
|
|
75b25297fd | ||
|
|
009ee47fe2 | ||
|
|
ec2adc2c03 | ||
|
|
ad301e6ed7 | ||
|
|
d094381e5d | ||
|
|
3ff9bbd217 | ||
|
|
e62ee2bc06 | ||
|
|
b49721cdd1 | ||
|
|
64c0a7967f | ||
|
|
e96eadab40 | ||
|
|
e73283121b | ||
|
|
857d13e8d6 | ||
|
|
91db3d4d5c | ||
|
|
961cf29217 | ||
|
|
c839b334eb | ||
|
|
714bfcd45b | ||
|
|
77ce8b953e | ||
|
|
01ada95941 | ||
|
|
eabdc5042a | ||
|
|
96267d9437 | ||
|
|
9497a24127 | ||
|
|
fdf75c6d0e | ||
|
|
6352308882 | ||
|
|
a8172a0f4e | ||
|
|
ebcd10d66f | ||
|
|
885642915f | ||
|
|
2e424491c0 | ||
|
|
aa6faef8f7 | ||
|
|
b3254baf60 | ||
|
|
0a43d27f0e | ||
|
|
3fe11fe24d | ||
|
|
af18fdc749 | ||
|
|
32b5eddd7d | ||
|
|
07c3aa1869 | ||
|
|
e59bad89e7 | ||
|
|
b971807980 | ||
|
|
c974dad799 | ||
|
|
4eae570ef5 | ||
|
|
67992a7d99 | ||
|
|
0a4899f366 | ||
|
|
1eb02f6c91 | ||
|
|
575874e4fb | ||
|
|
751b7eca62 | ||
|
|
1ae7150810 | ||
|
|
70caf9bf8c | ||
|
|
0b226ac027 | ||
|
|
220d6fd59b | ||
|
|
0a00a4b58e | ||
|
|
156e15a4fa | ||
|
|
271d3f6673 | ||
|
|
fec4ab93c5 | ||
|
|
38a7a7a54d | ||
|
|
0db0704e2c | ||
|
|
88f472e5d2 | ||
|
|
92452d46da | ||
|
|
ac70252d70 | ||
|
|
f6451d2518 | ||
|
|
2473f9d19b | ||
|
|
bc583385a9 | ||
|
|
8286bfbab7 | ||
|
|
d129fabe3b | ||
|
|
2539867247 | ||
|
|
69fedb92d9 | ||
|
|
54b5eadcc4 | ||
|
|
16773e2a35 | ||
|
|
78503c62b7 | ||
|
|
a330c9cee5 | ||
|
|
ff0867996e | ||
|
|
1bf8f996d1 | ||
|
|
52f4d993c1 | ||
|
|
d0ceebc5d7 | ||
|
|
9122af3ae1 | ||
|
|
b8533428bc | ||
|
|
677905334c | ||
|
|
d1d55d29a0 | ||
|
|
e07dba7ad6 | ||
|
|
062f832510 | ||
|
|
d0330bb64b | ||
|
|
91a23ec6ec | ||
|
|
0b000dd043 | ||
|
|
c73ba91a66 | ||
|
|
dfc00f8bc1 | ||
|
|
a18ff9c9b3 | ||
|
|
d0199279ad |
9
.github/bump_deps.sh
vendored
Executable file
9
.github/bump_deps.sh
vendored
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
set -xe
|
||||
REPO=$1
|
||||
BRANCH=$2
|
||||
VAR=$3
|
||||
|
||||
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
|
||||
|
||||
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
|
||||
54
.github/workflows/bump_deps.yaml
vendored
Normal file
54
.github/workflows/bump_deps.yaml
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
name: Bump dependencies
|
||||
on:
|
||||
schedule:
|
||||
- cron: 0 20 * * *
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
bump:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- repository: "go-skynet/go-gpt4all-j.cpp"
|
||||
variable: "GOGPT4ALLJ_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/go-llama.cpp"
|
||||
variable: "GOLLAMA_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/go-gpt2.cpp"
|
||||
variable: "GOGPT2_VERSION"
|
||||
branch: "master"
|
||||
- repository: "donomii/go-rwkv.cpp"
|
||||
variable: "RWKV_VERSION"
|
||||
branch: "main"
|
||||
- repository: "ggerganov/whisper.cpp"
|
||||
variable: "WHISPER_CPP_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/go-bert.cpp"
|
||||
variable: "BERT_VERSION"
|
||||
branch: "master"
|
||||
- repository: "go-skynet/bloomz.cpp"
|
||||
variable: "BLOOMZ_VERSION"
|
||||
branch: "main"
|
||||
- repository: "go-skynet/gpt4all"
|
||||
variable: "GPT4ALL_VERSION"
|
||||
branch: "main"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Bump dependencies 🔧
|
||||
run: |
|
||||
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
|
||||
- name: Create Pull Request
|
||||
uses: peter-evans/create-pull-request@v5
|
||||
with:
|
||||
token: ${{ secrets.UPDATE_BOT_TOKEN }}
|
||||
push-to-fork: ci-forks/LocalAI
|
||||
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
|
||||
title: ':arrow_up: Update ${{ matrix.repository }}'
|
||||
branch: "update/${{ matrix.variable }}"
|
||||
body: Bump of ${{ matrix.repository }} version
|
||||
signoff: true
|
||||
|
||||
|
||||
|
||||
4
.github/workflows/image.yml
vendored
4
.github/workflows/image.yml
vendored
@@ -54,8 +54,8 @@ jobs:
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ secrets.QUAY_USERNAME }}
|
||||
password: ${{ secrets.QUAY_PASSWORD }}
|
||||
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
- name: Build
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/build-push-action@v4
|
||||
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -2,6 +2,8 @@
|
||||
go-llama
|
||||
go-gpt4all-j
|
||||
go-gpt2
|
||||
go-rwkv
|
||||
whisper.cpp
|
||||
|
||||
# LocalAI build binary
|
||||
LocalAI
|
||||
@@ -11,4 +13,7 @@ local-ai
|
||||
|
||||
# Ignore models
|
||||
models/*
|
||||
test-models/
|
||||
test-models/
|
||||
|
||||
# just in case
|
||||
.DS_Store
|
||||
|
||||
12
Dockerfile
12
Dockerfile
@@ -1,13 +1,9 @@
|
||||
ARG GO_VERSION=1.20
|
||||
ARG DEBIAN_VERSION=11
|
||||
ARG BUILD_TYPE=
|
||||
|
||||
FROM golang:$GO_VERSION as builder
|
||||
FROM golang:$GO_VERSION
|
||||
WORKDIR /build
|
||||
RUN apt-get update && apt-get install -y cmake
|
||||
COPY . .
|
||||
RUN make build
|
||||
|
||||
FROM debian:$DEBIAN_VERSION
|
||||
COPY --from=builder /build/local-ai /usr/bin/local-ai
|
||||
ENTRYPOINT [ "/usr/bin/local-ai" ]
|
||||
RUN make prepare-sources
|
||||
EXPOSE 8080
|
||||
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
||||
|
||||
14
Dockerfile.dev
Normal file
14
Dockerfile.dev
Normal file
@@ -0,0 +1,14 @@
|
||||
ARG GO_VERSION=1.20
|
||||
ARG DEBIAN_VERSION=11
|
||||
ARG BUILD_TYPE=
|
||||
|
||||
FROM golang:$GO_VERSION as builder
|
||||
WORKDIR /build
|
||||
RUN apt-get update && apt-get install -y cmake
|
||||
COPY . .
|
||||
RUN make build
|
||||
|
||||
FROM debian:$DEBIAN_VERSION
|
||||
COPY --from=builder /build/local-ai /usr/bin/local-ai
|
||||
EXPOSE 8080
|
||||
ENTRYPOINT [ "/usr/bin/local-ai" ]
|
||||
206
Makefile
206
Makefile
@@ -2,12 +2,17 @@ GOCMD=go
|
||||
GOTEST=$(GOCMD) test
|
||||
GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
# renovate: datasource=github-tags depName=go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=llama.cpp-25d7abb
|
||||
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt4all-j.cpp currentValueTemplate=master depNameTemplate=go-gpt4all-j.cpp
|
||||
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
|
||||
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt2.cpp currentValueTemplate=master depNameTemplate=go-gpt2.cpp
|
||||
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
|
||||
|
||||
GOLLAMA_VERSION?=c03e8adbc45c866e0f6d876af1887d6b01d57eb4
|
||||
GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
|
||||
GPT4ALL_VERSION?=3657f9417e17edf378c27d0a9274a1bf41caa914
|
||||
GOGPT2_VERSION?=6a10572
|
||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
|
||||
WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993
|
||||
BERT_VERSION?=ec771ec715576ac050263bb7bb74bfd616a5ba13
|
||||
BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
|
||||
|
||||
|
||||
GREEN := $(shell tput -Txterm setaf 2)
|
||||
YELLOW := $(shell tput -Txterm setaf 3)
|
||||
@@ -15,8 +20,8 @@ WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
|
||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
|
||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
|
||||
|
||||
# Use this if you want to set the default behavior
|
||||
ifndef BUILD_TYPE
|
||||
@@ -33,70 +38,143 @@ endif
|
||||
|
||||
all: help
|
||||
|
||||
## GPT4ALL
|
||||
gpt4all:
|
||||
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
|
||||
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
||||
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
||||
mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h
|
||||
|
||||
## BERT embeddings
|
||||
go-bert:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert
|
||||
cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||
@find ./go-bert -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
|
||||
@find ./go-bert -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
|
||||
@find ./go-bert -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
|
||||
|
||||
## RWKV
|
||||
go-rwkv:
|
||||
git clone --recurse-submodules $(RWKV_REPO) go-rwkv
|
||||
cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
|
||||
@find ./go-rwkv -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
|
||||
@find ./go-rwkv -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
|
||||
@find ./go-rwkv -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
|
||||
|
||||
go-rwkv/librwkv.a: go-rwkv
|
||||
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. && cp ggml/src/libggml.a ..
|
||||
|
||||
## bloomz
|
||||
bloomz:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
|
||||
@find ./bloomz -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
|
||||
@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
|
||||
@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
|
||||
@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
|
||||
@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
|
||||
|
||||
bloomz/libbloomz.a: bloomz
|
||||
cd bloomz && make libbloomz.a
|
||||
|
||||
go-bert/libgobert.a: go-bert
|
||||
$(MAKE) -C go-bert libgobert.a
|
||||
|
||||
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
|
||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ $(GENERIC_PREFIX)libgpt4all.a
|
||||
|
||||
## CEREBRAS GPT
|
||||
go-gpt2:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
|
||||
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
|
||||
|
||||
go-gpt2/libgpt2.a: go-gpt2
|
||||
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
|
||||
|
||||
whisper.cpp:
|
||||
git clone https://github.com/ggerganov/whisper.cpp.git
|
||||
cd whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
whisper.cpp/libwhisper.a: whisper.cpp
|
||||
cd whisper.cpp && make libwhisper.a
|
||||
|
||||
go-llama:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
|
||||
|
||||
go-llama/libbinding.a: go-llama
|
||||
$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||
$(GOCMD) mod edit -replace github.com/nomic/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
|
||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
|
||||
|
||||
prepare-sources: go-llama go-gpt2 gpt4all go-rwkv whisper.cpp go-bert bloomz replace
|
||||
$(GOCMD) mod download
|
||||
|
||||
## GENERIC
|
||||
rebuild: ## Rebuilds the project
|
||||
$(MAKE) -C go-llama clean
|
||||
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
|
||||
$(MAKE) -C go-gpt2 clean
|
||||
$(MAKE) -C go-rwkv clean
|
||||
$(MAKE) -C whisper.cpp clean
|
||||
$(MAKE) -C go-bert clean
|
||||
$(MAKE) -C bloomz clean
|
||||
$(MAKE) build
|
||||
|
||||
prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a go-llama/libbinding.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building
|
||||
|
||||
clean: ## Remove build related file
|
||||
rm -fr ./go-llama
|
||||
rm -rf ./gpt4all
|
||||
rm -rf ./go-gpt2
|
||||
rm -rf ./go-rwkv
|
||||
rm -rf ./go-bert
|
||||
rm -rf ./bloomz
|
||||
rm -rf $(BINARY_NAME)
|
||||
|
||||
## Build:
|
||||
|
||||
build: prepare ## Build the project
|
||||
$(info ${GREEN}I local-ai build info:${RESET})
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -x -o $(BINARY_NAME) ./
|
||||
|
||||
generic-build: ## Build the project using generic
|
||||
BUILD_TYPE="generic" $(MAKE) build
|
||||
|
||||
## GPT4ALL-J
|
||||
go-gpt4all-j:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
|
||||
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
||||
|
||||
go-gpt4all-j/libgptj.a: go-gpt4all-j
|
||||
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
|
||||
|
||||
# CEREBRAS GPT
|
||||
go-gpt2:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
|
||||
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION)
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
|
||||
|
||||
go-gpt2/libgpt2.a: go-gpt2
|
||||
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
|
||||
|
||||
|
||||
go-llama:
|
||||
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||
|
||||
go-llama/libbinding.a: go-llama
|
||||
$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
|
||||
|
||||
prepare: go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a replace
|
||||
|
||||
clean: ## Remove build related file
|
||||
rm -fr ./go-llama
|
||||
rm -rf ./go-gpt4all-j
|
||||
rm -rf ./go-gpt2
|
||||
rm -rf $(BINARY_NAME)
|
||||
|
||||
## Run:
|
||||
run: prepare
|
||||
## Run
|
||||
run: prepare ## run local-ai
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
|
||||
|
||||
test-models/testmodel:
|
||||
@@ -106,7 +184,7 @@ test-models/testmodel:
|
||||
|
||||
test: prepare test-models/testmodel
|
||||
cp tests/fixtures/* test-models
|
||||
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v -timeout 20m ./...
|
||||
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api
|
||||
|
||||
## Help:
|
||||
help: ## Show this help.
|
||||
|
||||
479
README.md
479
README.md
@@ -5,22 +5,50 @@
|
||||
<br>
|
||||
</h1>
|
||||
|
||||
> :warning: This project has been renamed from `llama-cli` to `LocalAI` to reflect the fact that we are focusing on a fast drop-in OpenAI API rather than on the CLI interface. We think that there are already many projects that can be used as a CLI interface already, for instance [llama.cpp](https://github.com/ggerganov/llama.cpp) and [gpt4all](https://github.com/nomic-ai/gpt4all). If you are using `llama-cli` for CLI interactions and want to keep using it, use older versions or please open up an issue - contributions are welcome!
|
||||
|
||||
|
||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml) [](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)
|
||||
|
||||
[](https://discord.gg/uJAeKSAGDy)
|
||||
|
||||
**LocalAI** is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.
|
||||
**LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware, supporting multiple models families. Supports also GPT4ALL-J which is licensed under Apache 2.0.
|
||||
|
||||
- OpenAI compatible API
|
||||
- Supports multiple-models
|
||||
- Supports multiple models
|
||||
- Once loaded the first time, it keep models loaded in memory for faster inference
|
||||
- Support for prompt templates
|
||||
- Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).
|
||||
- Doesn't shell-out, but uses C bindings for a faster inference and better performance.
|
||||
|
||||
Reddit post: https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/
|
||||
LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
|
||||
|
||||
LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp), [ggml](https://github.com/ggerganov/ggml), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) for audio transcriptions, and [bert.cpp](https://github.com/skeskinen/bert.cpp) for embedding.
|
||||
|
||||
See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/).
|
||||
|
||||
## News
|
||||
|
||||
- 11-05-2023: __1.9.0__ released! 🔥 Important whisper updates ( https://github.com/go-skynet/LocalAI/pull/233 https://github.com/go-skynet/LocalAI/pull/229 ) and extended gpt4all model families support ( https://github.com/go-skynet/LocalAI/pull/232 ). Redpajama/dolly experimental ( https://github.com/go-skynet/LocalAI/pull/214 )
|
||||
- 10-05-2023: __1.8.0__ released! 🔥 Added support for fast and accurate embeddings with `bert.cpp` ( https://github.com/go-skynet/LocalAI/pull/222 )
|
||||
- 09-05-2023: Added experimental support for transcriptions endpoint ( https://github.com/go-skynet/LocalAI/pull/211 )
|
||||
- 08-05-2023: Support for embeddings with models using the `llama.cpp` backend ( https://github.com/go-skynet/LocalAI/pull/207 )
|
||||
- 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
|
||||
- 01-05-2023: Support for SSE stream of tokens in `llama.cpp` backends ( https://github.com/go-skynet/LocalAI/pull/152 )
|
||||
|
||||
Twitter: [@LocalAI_API](https://twitter.com/LocalAI_API) and [@mudler_it](https://twitter.com/mudler_it)
|
||||
|
||||
### Blogs and articles
|
||||
|
||||
- [Tutorial to use k8sgpt with LocalAI](https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65) - excellent usecase for localAI, using AI to analyse Kubernetes clusters.
|
||||
|
||||
## Contribute and help
|
||||
|
||||
To help the project you can:
|
||||
|
||||
- Upvote the [Reddit post](https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/) about LocalAI.
|
||||
|
||||
- [Hacker news post](https://news.ycombinator.com/item?id=35726934) - help us out by voting if you like this project.
|
||||
|
||||
- If you have technological skills and want to contribute to development, have a look at the open issues. If you are new you can have a look at the [good-first-issue](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) and [help-wanted](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels.
|
||||
|
||||
- If you don't have technological skills you can still help improving documentation or add examples or share your user-stories with our community, any help and contribution is welcome!
|
||||
|
||||
## Model compatibility
|
||||
|
||||
@@ -29,15 +57,63 @@ It is compatible with the models supported by [llama.cpp](https://github.com/gge
|
||||
Tested with:
|
||||
- Vicuna
|
||||
- Alpaca
|
||||
- [GPT4ALL](https://github.com/nomic-ai/gpt4all)
|
||||
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin)
|
||||
- [GPT4ALL](https://github.com/nomic-ai/gpt4all) (changes required, see below)
|
||||
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin) (no changes required)
|
||||
- Koala
|
||||
- [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
|
||||
- WizardLM
|
||||
- [RWKV](https://github.com/BlinkDL/RWKV-LM) models with [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
|
||||
|
||||
It should also be compatible with StableLM and GPTNeoX ggml models (untested)
|
||||
### GPT4ALL
|
||||
|
||||
Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
|
||||
|
||||
### RWKV
|
||||
|
||||
<details>
|
||||
|
||||
A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
|
||||
|
||||
Note: rwkv models needs to specify the backend `rwkv` in the YAML config files and have an associated tokenizer along that needs to be provided with it:
|
||||
|
||||
```
|
||||
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
|
||||
36464543 -rw-r--r-- 1 mudler mudler 2.4M May 3 10:51 rwkv_small.tokenizer.json
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### Others
|
||||
|
||||
It should also be compatible with StableLM and GPTNeoX ggml models (untested).
|
||||
|
||||
### Hardware requirements
|
||||
|
||||
Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
|
||||
|
||||
|
||||
### Feature support matrix
|
||||
|
||||
<details>
|
||||
|
||||
| Backend | Compatible models | Completion/Chat endpoint | Audio transcription | Embeddings support | Token stream support | Github | Bindings |
|
||||
|-----------------|-----------------------|--------------------------|---------------------|-----------------------------------|----------------------|--------------------------------------------|-------------------------------------------|
|
||||
| llama | Vicuna, Alpaca, LLaMa | yes | no | yes (doesn't seem to be accurate) | yes | https://github.com/ggerganov/llama.cpp | https://github.com/go-skynet/go-llama.cpp |
|
||||
| gpt4all-llama | Vicuna, Alpaca, LLaMa | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
|
||||
| gpt4all-mpt | MPT | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
|
||||
| gpt4all-j | GPT4ALL-J | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
|
||||
| gpt2 | GPT/NeoX, Cerebras | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| dolly | Dolly | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| redpajama | RedPajama | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| stableLM | StableLM GPT/NeoX | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| starcoder | Starcoder | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||
| bloomz | Bloom | yes | no | no | no | https://github.com/NouamaneTazi/bloomz.cpp | https://github.com/go-skynet/bloomz.cpp |
|
||||
| rwkv | RWKV | yes | no | no | yes | https://github.com/saharNooby/rwkv.cpp | https://github.com/donomii/go-rwkv.cpp |
|
||||
| bert-embeddings | bert | no | no | yes | no | https://github.com/skeskinen/bert.cpp | https://github.com/go-skynet/go-bert.cpp |
|
||||
| whisper | whisper | no | yes | no | no | https://github.com/ggerganov/whisper.cpp | https://github.com/ggerganov/whisper.cpp |
|
||||
|
||||
</details>
|
||||
|
||||
## Usage
|
||||
|
||||
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
|
||||
@@ -114,11 +190,101 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
|
||||
|
||||
To build locally, run `make build` (see below).
|
||||
|
||||
## Other examples
|
||||
### Other examples
|
||||
|
||||
To see other examples on how to integrate with other projects, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).
|
||||
To see other examples on how to integrate with other projects for instance for question answering or for using it with chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).
|
||||
|
||||
## Prompt templates
|
||||
|
||||
### Advanced configuration
|
||||
|
||||
LocalAI can be configured to serve user-defined models with a set of default parameters and templates.
|
||||
|
||||
<details>
|
||||
|
||||
You can create multiple `yaml` files in the models path or either specify a single YAML configuration file.
|
||||
Consider the following `models` folder in the `example/chatbot-ui`:
|
||||
|
||||
```
|
||||
base ❯ ls -liah examples/chatbot-ui/models
|
||||
36487587 drwxr-xr-x 2 mudler mudler 4.0K May 3 12:27 .
|
||||
36487586 drwxr-xr-x 3 mudler mudler 4.0K May 3 10:42 ..
|
||||
36465214 -rw-r--r-- 1 mudler mudler 10 Apr 27 07:46 completion.tmpl
|
||||
36464855 -rw-r--r-- 1 mudler mudler 3.6G Apr 27 00:08 ggml-gpt4all-j
|
||||
36464537 -rw-r--r-- 1 mudler mudler 245 May 3 10:42 gpt-3.5-turbo.yaml
|
||||
36467388 -rw-r--r-- 1 mudler mudler 180 Apr 27 07:46 gpt4all.tmpl
|
||||
```
|
||||
|
||||
In the `gpt-3.5-turbo.yaml` file it is defined the `gpt-3.5-turbo` model which is an alias to use `gpt4all-j` with pre-defined options.
|
||||
|
||||
For instance, consider the following that declares `gpt-3.5-turbo` backed by the `ggml-gpt4all-j` model:
|
||||
|
||||
```yaml
|
||||
name: gpt-3.5-turbo
|
||||
# Default model parameters
|
||||
parameters:
|
||||
# Relative to the models path
|
||||
model: ggml-gpt4all-j
|
||||
# temperature
|
||||
temperature: 0.3
|
||||
# all the OpenAI request options here..
|
||||
|
||||
# Default context size
|
||||
context_size: 512
|
||||
threads: 10
|
||||
# Define a backend (optional). By default it will try to guess the backend the first time the model is interacted with.
|
||||
backend: gptj # available: llama, stablelm, gpt2, gptj rwkv
|
||||
# stopwords (if supported by the backend)
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
# define chat roles
|
||||
roles:
|
||||
user: "HUMAN:"
|
||||
system: "GPT:"
|
||||
template:
|
||||
# template file ".tmpl" with the prompt template to use by default on the endpoint call. Note there is no extension in the files
|
||||
completion: completion
|
||||
chat: ggml-gpt4all-j
|
||||
```
|
||||
|
||||
Specifying a `config-file` via CLI allows to declare models in a single file as a list, for instance:
|
||||
|
||||
```yaml
|
||||
- name: list1
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
roles:
|
||||
user: "HUMAN:"
|
||||
system: "GPT:"
|
||||
template:
|
||||
completion: completion
|
||||
chat: ggml-gpt4all-j
|
||||
- name: list2
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
roles:
|
||||
user: "HUMAN:"
|
||||
system: "GPT:"
|
||||
template:
|
||||
completion: completion
|
||||
chat: ggml-gpt4all-j
|
||||
```
|
||||
|
||||
See also [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) as an example on how to use config files.
|
||||
|
||||
</details>
|
||||
|
||||
### Prompt templates
|
||||
|
||||
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
|
||||
|
||||
@@ -136,11 +302,51 @@ The below instruction describes a task. Write a response that appropriately comp
|
||||
|
||||
See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for some of the most popular models.
|
||||
|
||||
|
||||
For the edit endpoint, an example template for alpaca-based models can be:
|
||||
|
||||
```yaml
|
||||
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
||||
|
||||
### Instruction:
|
||||
{{.Instruction}}
|
||||
|
||||
### Input:
|
||||
{{.Input}}
|
||||
|
||||
### Response:
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## API
|
||||
### CLI
|
||||
|
||||
`LocalAI` provides an API for running text generation as a service, that follows the OpenAI reference and can be used as a drop-in. The models once loaded the first time will be kept in memory.
|
||||
You can control LocalAI with command line arguments, to specify a binding address, or the number of threads.
|
||||
|
||||
<details>
|
||||
|
||||
Usage:
|
||||
|
||||
```
|
||||
local-ai --models-path <model_path> [--address <address>] [--threads <num_threads>]
|
||||
```
|
||||
|
||||
| Parameter | Environment Variable | Default Value | Description |
|
||||
| ------------ | -------------------- | ------------- | -------------------------------------- |
|
||||
| models-path | MODELS_PATH | | The path where you have models (ending with `.bin`). |
|
||||
| threads | THREADS | Number of Physical cores | The number of threads to use for text generation. |
|
||||
| address | ADDRESS | :8080 | The address and port to listen on. |
|
||||
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
|
||||
| debug | DEBUG | false | Enable debug mode. |
|
||||
| config-file | CONFIG_FILE | empty | Path to a LocalAI config file. |
|
||||
|
||||
</details>
|
||||
|
||||
## Setup
|
||||
|
||||
Currently LocalAI comes as a container image and can be used with docker or a container engine of choice. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
|
||||
|
||||
### Docker
|
||||
|
||||
<details>
|
||||
Example of starting the API with `docker`:
|
||||
@@ -161,30 +367,123 @@ You should see:
|
||||
└───────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
You can control the API server options with command line arguments:
|
||||
</details>
|
||||
|
||||
### Build locally
|
||||
|
||||
<details>
|
||||
|
||||
In order to build the `LocalAI` container image locally you can use `docker`:
|
||||
|
||||
```
|
||||
local-api --models-path <model_path> [--address <address>] [--threads <num_threads>]
|
||||
# build the image
|
||||
docker build -t LocalAI .
|
||||
docker run LocalAI
|
||||
```
|
||||
|
||||
The API takes takes the following parameters:
|
||||
Or you can build the binary with `make`:
|
||||
|
||||
| Parameter | Environment Variable | Default Value | Description |
|
||||
| ------------ | -------------------- | ------------- | -------------------------------------- |
|
||||
| models-path | MODELS_PATH | | The path where you have models (ending with `.bin`). |
|
||||
| threads | THREADS | Number of Physical cores | The number of threads to use for text generation. |
|
||||
| address | ADDRESS | :8080 | The address and port to listen on. |
|
||||
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
|
||||
| debug | DEBUG | false | Enable debug mode. |
|
||||
|
||||
Once the server is running, you can start making requests to it using HTTP, using the OpenAI API.
|
||||
```
|
||||
make build
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Advanced configuration
|
||||
### Build on mac
|
||||
|
||||
Building on Mac (M1 or M2) works, but you may need to install some prerequisites using `brew`.
|
||||
|
||||
### Supported OpenAI API endpoints
|
||||
<details>
|
||||
|
||||
The below has been tested by one mac user and found to work. Note that this doesn't use docker to run the server:
|
||||
|
||||
```
|
||||
# install build dependencies
|
||||
brew install cmake
|
||||
brew install go
|
||||
|
||||
# clone the repo
|
||||
git clone https://github.com/go-skynet/LocalAI.git
|
||||
|
||||
cd LocalAI
|
||||
|
||||
# build the binary
|
||||
make build
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# Use a template from the examples
|
||||
cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
|
||||
|
||||
# Run LocalAI
|
||||
./local-ai --models-path ./models/ --debug
|
||||
|
||||
# Now API is accessible at localhost:8080
|
||||
curl http://localhost:8080/v1/models
|
||||
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "ggml-gpt4all-j",
|
||||
"messages": [{"role": "user", "content": "How are you?"}],
|
||||
"temperature": 0.9
|
||||
}'
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### Windows compatibility
|
||||
|
||||
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
|
||||
|
||||
### Run LocalAI in Kubernetes
|
||||
|
||||
LocalAI can be installed inside Kubernetes with helm.
|
||||
|
||||
<details>
|
||||
|
||||
1. Add the helm repo
|
||||
```bash
|
||||
helm repo add go-skynet https://go-skynet.github.io/helm-charts/
|
||||
```
|
||||
1. Create a values files with your settings:
|
||||
```bash
|
||||
cat <<EOF > values.yaml
|
||||
deployment:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
env:
|
||||
threads: 4
|
||||
contextSize: 1024
|
||||
modelsPath: "/models"
|
||||
# Optionally create a PVC, mount the PV to the LocalAI Deployment,
|
||||
# and download a model to prepopulate the models directory
|
||||
modelsVolume:
|
||||
enabled: true
|
||||
url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
|
||||
pvc:
|
||||
size: 6Gi
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
auth:
|
||||
# Optional value for HTTP basic access authentication header
|
||||
basic: "" # 'username:password' base64 encoded
|
||||
service:
|
||||
type: ClusterIP
|
||||
annotations: {}
|
||||
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
|
||||
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
||||
EOF
|
||||
```
|
||||
3. Install the helm chart:
|
||||
```bash
|
||||
helm repo update
|
||||
helm install local-ai go-skynet/local-ai -f values.yaml
|
||||
```
|
||||
|
||||
Check out also the [helm chart repository on GitHub](https://github.com/go-skynet/helm-charts).
|
||||
|
||||
</details>
|
||||
|
||||
## Supported OpenAI API endpoints
|
||||
|
||||
You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create).
|
||||
|
||||
@@ -195,7 +494,7 @@ Note:
|
||||
- You can also specify the model as part of the OpenAI token.
|
||||
- If only one model is available, the API will use it for all the requests.
|
||||
|
||||
#### Chat completions
|
||||
### Chat completions
|
||||
|
||||
<details>
|
||||
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
|
||||
@@ -211,10 +510,30 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
|
||||
Available additional parameters: `top_p`, `top_k`, `max_tokens`
|
||||
</details>
|
||||
|
||||
#### Completions
|
||||
### Edit completions
|
||||
|
||||
<details>
|
||||
To generate an edit completion you can send a POST request to the `/v1/edits` endpoint with the instruction as the request body:
|
||||
|
||||
```
|
||||
curl http://localhost:8080/v1/edits -H "Content-Type: application/json" -d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
"instruction": "rephrase",
|
||||
"input": "Black cat jumped out of the window",
|
||||
"temperature": 0.7
|
||||
}'
|
||||
```
|
||||
|
||||
Available additional parameters: `top_p`, `top_k`, `max_tokens`.
|
||||
|
||||
</details>
|
||||
|
||||
### Completions
|
||||
|
||||
<details>
|
||||
|
||||
To generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as per the request body:
|
||||
|
||||
```
|
||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
@@ -227,7 +546,7 @@ Available additional parameters: `top_p`, `top_k`, `max_tokens`
|
||||
|
||||
</details>
|
||||
|
||||
#### List models
|
||||
### List models
|
||||
|
||||
<details>
|
||||
You can list all the models available with:
|
||||
@@ -238,57 +557,51 @@ curl http://localhost:8080/v1/models
|
||||
|
||||
</details>
|
||||
|
||||
## Helm Chart Installation (run LocalAI in Kubernetes)
|
||||
|
||||
LocalAI can be installed inside Kubernetes with helm.
|
||||
### Embeddings
|
||||
|
||||
<details>
|
||||
The local-ai Helm chart supports two options for the LocalAI server's models directory:
|
||||
1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
|
||||
|
||||
Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
|
||||
The embedding endpoint is experimental and enabled only if the model is configured with `embeddings: true` in its `yaml` file, for example:
|
||||
|
||||
2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
|
||||
```yaml
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: bert
|
||||
embeddings: true
|
||||
backend: "bert-embeddings"
|
||||
```
|
||||
|
||||
First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
|
||||
```bash
|
||||
helm install local-ai charts/local-ai -n local-ai --create-namespace
|
||||
```
|
||||
Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
|
||||
There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
|
||||
|
||||
Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
|
||||
```bash
|
||||
helm upgrade local-ai -n local-ai charts/local-ai
|
||||
```
|
||||
This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.
|
||||
Note: embeddings is supported only with `llama.cpp` compatible models and `bert` models. bert is more performant and available independently of the LLM model.
|
||||
|
||||
</details>
|
||||
|
||||
## Blog posts
|
||||
### Transcriptions endpoint
|
||||
|
||||
- https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65
|
||||
<details>
|
||||
|
||||
## Windows compatibility
|
||||
Note: requires ffmpeg in the container image, which is currently not shipped due to licensing issues. We will prepare separated images with ffmpeg. (stay tuned!)
|
||||
|
||||
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
|
||||
Download one of the models from https://huggingface.co/ggerganov/whisper.cpp/tree/main in the `models` folder, and create a YAML file for your model:
|
||||
|
||||
## Build locally
|
||||
|
||||
Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
|
||||
|
||||
In order to build the `LocalAI` container image locally you can use `docker`:
|
||||
|
||||
```
|
||||
# build the image
|
||||
docker build -t LocalAI .
|
||||
docker run LocalAI
|
||||
```yaml
|
||||
name: whisper-1
|
||||
backend: whisper
|
||||
parameters:
|
||||
model: whisper-en
|
||||
```
|
||||
|
||||
Or build the binary with `make`:
|
||||
The transcriptions endpoint then can be tested like so:
|
||||
```
|
||||
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
||||
|
||||
curl http://localhost:8080/v1/audio/transcriptions -H "Content-Type: multipart/form-data" -F file="@$PWD/gb1.ogg" -F model="whisper-1"
|
||||
|
||||
{"text":"My fellow Americans, this day has brought terrible news and great sadness to our country.At nine o'clock this morning, Mission Control in Houston lost contact with our Space ShuttleColumbia.A short time later, debris was seen falling from the skies above Texas.The Columbia's lost.There are no survivors.One board was a crew of seven.Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark, Captain DavidBrown, Commander William McCool, Dr. Kultna Shavla, and Elon Ramon, a colonel in the IsraeliAir Force.These men and women assumed great risk in the service to all humanity.In an age when spaceflight has come to seem almost routine, it is easy to overlook thedangers of travel by rocket and the difficulties of navigating the fierce outer atmosphere ofthe Earth.These astronauts knew the dangers, and they faced them willingly, knowing they had a highand noble purpose in life.Because of their courage and daring and idealism, we will miss them all the more.All Americans today are thinking as well of the families of these men and women who havebeen given this sudden shock and grief.You're not alone.Our entire nation agrees with you, and those you loved will always have the respect andgratitude of this country.The cause in which they died will continue.Mankind has led into the darkness beyond our world by the inspiration of discovery andthe longing to understand.Our journey into space will go on.In the skies today, we saw destruction and tragedy.As farther than we can see, there is comfort and hope.In the words of the prophet Isaiah, \"Lift your eyes and look to the heavens who createdall these, he who brings out the starry hosts one by one and calls them each by name.\"Because of his great power and mighty strength, not one of them is missing.The same creator who names the stars also knows the names of the seven souls we mourntoday.The crew of the shuttle Columbia did not return safely to Earth yet we can pray that all aresafely home.May God bless the grieving families and may God continue to bless America.[BLANK_AUDIO]"}
|
||||
```
|
||||
make build
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Frequently asked questions
|
||||
|
||||
@@ -333,7 +646,7 @@ Not currently, as ggml doesn't support GPUs yet: https://github.com/ggerganov/ll
|
||||
### Where is the webUI?
|
||||
|
||||
<details>
|
||||
We are working on to have a good out of the box experience - however as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
|
||||
There is the availability of localai-webui and chatbot-ui in the examples section and can be setup as per the instructions. However as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
|
||||
|
||||
</details>
|
||||
|
||||
@@ -351,26 +664,50 @@ Feel free to open up a PR to get your project listed!
|
||||
|
||||
- [Kairos](https://github.com/kairos-io/kairos)
|
||||
- [k8sgpt](https://github.com/k8sgpt-ai/k8sgpt#running-local-models)
|
||||
- [Spark](https://github.com/cedriking/spark)
|
||||
|
||||
## Blog posts and other articles
|
||||
|
||||
- https://medium.com/@tyler_97636/k8sgpt-localai-unlock-kubernetes-superpowers-for-free-584790de9b65
|
||||
- https://kairos.io/docs/examples/localai/
|
||||
|
||||
## Short-term roadmap
|
||||
|
||||
- [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)
|
||||
- [ ] Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
|
||||
- [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351) and gpt4all
|
||||
- [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351) and [gpt4all](https://github.com/go-skynet/LocalAI/issues/85)
|
||||
- [x] Multi-model support
|
||||
- [ ] Have a webUI!
|
||||
- [ ] Allow configuration of defaults for models.
|
||||
- [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models.
|
||||
- [x] Have a webUI!
|
||||
- [x] Allow configuration of defaults for models.
|
||||
- [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models, directly from the webui.
|
||||
|
||||
## Star history
|
||||
|
||||
[](https://star-history.com/#go-skynet/LocalAI&Date)
|
||||
|
||||
## License
|
||||
|
||||
LocalAI is a community-driven project. It was initially created by [Ettore Di Giacinto](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
|
||||
|
||||
MIT
|
||||
|
||||
## Golang bindings used
|
||||
|
||||
- [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
|
||||
- [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp)
|
||||
- [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp)
|
||||
- [go-skynet/go-bert.cpp](https://github.com/go-skynet/go-bert.cpp)
|
||||
- [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp)
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp)
|
||||
- https://github.com/tatsu-lab/stanford_alpaca
|
||||
- https://github.com/cornelk/llama-go for the initial ideas
|
||||
- https://github.com/antimatter15/alpaca.cpp for the light model version (this is compatible and tested only with that checkpoint model!)
|
||||
|
||||
## Contributors
|
||||
|
||||
<a href="https://github.com/go-skynet/LocalAI/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=go-skynet/LocalAI" />
|
||||
</a>
|
||||
|
||||
27
api/api.go
27
api/api.go
@@ -6,6 +6,7 @@ import (
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
"github.com/gofiber/fiber/v2/middleware/logger"
|
||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -40,6 +41,12 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16
|
||||
},
|
||||
})
|
||||
|
||||
if debug {
|
||||
app.Use(logger.New(logger.Config{
|
||||
Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
|
||||
}))
|
||||
}
|
||||
|
||||
cm := make(ConfigMerger)
|
||||
if err := cm.LoadConfigs(loader.ModelPath); err != nil {
|
||||
log.Error().Msgf("error loading config files: %s", err.Error())
|
||||
@@ -61,11 +68,23 @@ func App(configFile string, loader *model.ModelLoader, threads, ctxSize int, f16
|
||||
app.Use(cors.New())
|
||||
|
||||
// openAI compatible API endpoint
|
||||
app.Post("/v1/chat/completions", openAIEndpoint(cm, true, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/chat/completions", openAIEndpoint(cm, true, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/v1/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/chat/completions", chatEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
app.Post("/v1/completions", openAIEndpoint(cm, false, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/completions", openAIEndpoint(cm, false, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/v1/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/edits", editEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
app.Post("/v1/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/completions", completionEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
app.Post("/v1/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
app.Post("/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
// /v1/engines/{engine_id}/embeddings
|
||||
|
||||
app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, debug, loader, threads, ctxSize, f16))
|
||||
|
||||
app.Get("/v1/models", listModels(loader, cm))
|
||||
app.Get("/models", listModels(loader, cm))
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
openaigo "github.com/otiai10/openaigo"
|
||||
"github.com/sashabaranov/go-openai"
|
||||
)
|
||||
|
||||
@@ -18,6 +19,7 @@ var _ = Describe("API test", func() {
|
||||
var app *fiber.App
|
||||
var modelLoader *model.ModelLoader
|
||||
var client *openai.Client
|
||||
var client2 *openaigo.Client
|
||||
Context("API query", func() {
|
||||
BeforeEach(func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
@@ -27,6 +29,9 @@ var _ = Describe("API test", func() {
|
||||
defaultConfig := openai.DefaultConfig("")
|
||||
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
|
||||
|
||||
client2 = openaigo.NewClient("")
|
||||
client2.BaseURL = defaultConfig.BaseURL
|
||||
|
||||
// Wait for API to be ready
|
||||
client = openai.NewClientWithConfig(defaultConfig)
|
||||
Eventually(func() error {
|
||||
@@ -74,7 +79,7 @@ var _ = Describe("API test", func() {
|
||||
It("returns errors", func() {
|
||||
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: llama: model does not exist"))
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 10 errors occurred:"))
|
||||
})
|
||||
|
||||
})
|
||||
@@ -87,7 +92,8 @@ var _ = Describe("API test", func() {
|
||||
|
||||
defaultConfig := openai.DefaultConfig("")
|
||||
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
|
||||
|
||||
client2 = openaigo.NewClient("")
|
||||
client2.BaseURL = defaultConfig.BaseURL
|
||||
// Wait for API to be ready
|
||||
client = openai.NewClientWithConfig(defaultConfig)
|
||||
Eventually(func() error {
|
||||
@@ -117,5 +123,16 @@ var _ = Describe("API test", func() {
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
|
||||
})
|
||||
It("can generate edit completions from config file", func() {
|
||||
request := openaigo.EditCreateRequestBody{
|
||||
Model: "list2",
|
||||
Instruction: "foo",
|
||||
Input: "bar",
|
||||
}
|
||||
resp, err := client2.CreateEdit(context.Background(), request)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
194
api/config.go
194
api/config.go
@@ -1,12 +1,16 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
@@ -21,12 +25,21 @@ type Config struct {
|
||||
Threads int `yaml:"threads"`
|
||||
Debug bool `yaml:"debug"`
|
||||
Roles map[string]string `yaml:"roles"`
|
||||
Embeddings bool `yaml:"embeddings"`
|
||||
Backend string `yaml:"backend"`
|
||||
TemplateConfig TemplateConfig `yaml:"template"`
|
||||
MirostatETA float64 `yaml:"mirostat_eta"`
|
||||
MirostatTAU float64 `yaml:"mirostat_tau"`
|
||||
Mirostat int `yaml:"mirostat"`
|
||||
|
||||
PromptStrings, InputStrings []string
|
||||
InputToken [][]int
|
||||
}
|
||||
|
||||
type TemplateConfig struct {
|
||||
Completion string `yaml:"completion"`
|
||||
Chat string `yaml:"chat"`
|
||||
Edit string `yaml:"edit"`
|
||||
}
|
||||
|
||||
type ConfigMerger map[string]Config
|
||||
@@ -98,3 +111,184 @@ func (cm ConfigMerger) LoadConfigs(path string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func updateConfig(config *Config, input *OpenAIRequest) {
|
||||
if input.Echo {
|
||||
config.Echo = input.Echo
|
||||
}
|
||||
if input.TopK != 0 {
|
||||
config.TopK = input.TopK
|
||||
}
|
||||
if input.TopP != 0 {
|
||||
config.TopP = input.TopP
|
||||
}
|
||||
|
||||
if input.Temperature != 0 {
|
||||
config.Temperature = input.Temperature
|
||||
}
|
||||
|
||||
if input.Maxtokens != 0 {
|
||||
config.Maxtokens = input.Maxtokens
|
||||
}
|
||||
|
||||
switch stop := input.Stop.(type) {
|
||||
case string:
|
||||
if stop != "" {
|
||||
config.StopWords = append(config.StopWords, stop)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range stop {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.StopWords = append(config.StopWords, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
config.RepeatPenalty = input.RepeatPenalty
|
||||
}
|
||||
|
||||
if input.Keep != 0 {
|
||||
config.Keep = input.Keep
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
config.Batch = input.Batch
|
||||
}
|
||||
|
||||
if input.F16 {
|
||||
config.F16 = input.F16
|
||||
}
|
||||
|
||||
if input.IgnoreEOS {
|
||||
config.IgnoreEOS = input.IgnoreEOS
|
||||
}
|
||||
|
||||
if input.Seed != 0 {
|
||||
config.Seed = input.Seed
|
||||
}
|
||||
|
||||
if input.Mirostat != 0 {
|
||||
config.Mirostat = input.Mirostat
|
||||
}
|
||||
|
||||
if input.MirostatETA != 0 {
|
||||
config.MirostatETA = input.MirostatETA
|
||||
}
|
||||
|
||||
if input.MirostatTAU != 0 {
|
||||
config.MirostatTAU = input.MirostatTAU
|
||||
}
|
||||
|
||||
switch inputs := input.Input.(type) {
|
||||
case string:
|
||||
if inputs != "" {
|
||||
config.InputStrings = append(config.InputStrings, inputs)
|
||||
}
|
||||
case []interface{}:
|
||||
for _, pp := range inputs {
|
||||
switch i := pp.(type) {
|
||||
case string:
|
||||
config.InputStrings = append(config.InputStrings, i)
|
||||
case []interface{}:
|
||||
tokens := []int{}
|
||||
for _, ii := range i {
|
||||
tokens = append(tokens, int(ii.(float64)))
|
||||
}
|
||||
config.InputToken = append(config.InputToken, tokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch p := input.Prompt.(type) {
|
||||
case string:
|
||||
config.PromptStrings = append(config.PromptStrings, p)
|
||||
case []interface{}:
|
||||
for _, pp := range p {
|
||||
if s, ok := pp.(string); ok {
|
||||
config.PromptStrings = append(config.PromptStrings, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
|
||||
input := new(OpenAIRequest)
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
modelFile := input.Model
|
||||
|
||||
if c.Params("model") != "" {
|
||||
modelFile = c.Params("model")
|
||||
}
|
||||
|
||||
received, _ := json.Marshal(input)
|
||||
|
||||
log.Debug().Msgf("Request received: %s", string(received))
|
||||
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
|
||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||
|
||||
// If no model was specified, take the first available
|
||||
if modelFile == "" && !bearerExists {
|
||||
models, _ := loader.ListModels()
|
||||
if len(models) > 0 {
|
||||
modelFile = models[0]
|
||||
log.Debug().Msgf("No model specified, using: %s", modelFile)
|
||||
} else {
|
||||
log.Debug().Msgf("No model specified, returning error")
|
||||
return nil, nil, fmt.Errorf("no model specified")
|
||||
}
|
||||
}
|
||||
|
||||
// If a model is found in bearer token takes precedence
|
||||
if bearerExists {
|
||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||
modelFile = bearer
|
||||
}
|
||||
|
||||
// Load a config file if present after the model name
|
||||
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
|
||||
if _, err := os.Stat(modelConfig); err == nil {
|
||||
if err := cm.LoadConfig(modelConfig); err != nil {
|
||||
return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
var config *Config
|
||||
cfg, exists := cm[modelFile]
|
||||
if !exists {
|
||||
config = &Config{
|
||||
OpenAIRequest: defaultRequest(modelFile),
|
||||
ContextSize: ctx,
|
||||
Threads: threads,
|
||||
F16: f16,
|
||||
Debug: debug,
|
||||
}
|
||||
} else {
|
||||
config = &cfg
|
||||
}
|
||||
|
||||
// Set the parameters for the language model prediction
|
||||
updateConfig(config, input)
|
||||
|
||||
// Don't allow 0 as setting
|
||||
if config.Threads == 0 {
|
||||
if threads != 0 {
|
||||
config.Threads = threads
|
||||
} else {
|
||||
config.Threads = 4
|
||||
}
|
||||
}
|
||||
|
||||
// Enforce debug flag if passed from CLI
|
||||
if debug {
|
||||
config.Debug = true
|
||||
}
|
||||
|
||||
return config, input, nil
|
||||
}
|
||||
|
||||
525
api/openai.go
525
api/openai.go
@@ -2,15 +2,20 @@ package api
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
whisperutil "github.com/go-skynet/LocalAI/pkg/whisper"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/valyala/fasthttp"
|
||||
@@ -28,12 +33,27 @@ type ErrorResponse struct {
|
||||
Error *APIError `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type OpenAIUsage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
}
|
||||
|
||||
type Item struct {
|
||||
Embedding []float32 `json:"embedding"`
|
||||
Index int `json:"index"`
|
||||
Object string `json:"object,omitempty"`
|
||||
}
|
||||
|
||||
type OpenAIResponse struct {
|
||||
Created int `json:"created,omitempty"`
|
||||
Object string `json:"object,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
Model string `json:"model,omitempty"`
|
||||
Choices []Choice `json:"choices,omitempty"`
|
||||
Data []Item `json:"data,omitempty"`
|
||||
|
||||
Usage OpenAIUsage `json:"usage"`
|
||||
}
|
||||
|
||||
type Choice struct {
|
||||
@@ -57,10 +77,19 @@ type OpenAIModel struct {
|
||||
type OpenAIRequest struct {
|
||||
Model string `json:"model" yaml:"model"`
|
||||
|
||||
// Prompt is read only by completion API calls
|
||||
Prompt string `json:"prompt" yaml:"prompt"`
|
||||
// whisper
|
||||
File string `json:"file" validate:"required"`
|
||||
ResponseFormat string `json:"response_format"`
|
||||
Language string `json:"language"`
|
||||
|
||||
Stop string `json:"stop" yaml:"stop"`
|
||||
// Prompt is read only by completion API calls
|
||||
Prompt interface{} `json:"prompt" yaml:"prompt"`
|
||||
|
||||
// Edit endpoint
|
||||
Instruction string `json:"instruction" yaml:"instruction"`
|
||||
Input interface{} `json:"input" yaml:"input"`
|
||||
|
||||
Stop interface{} `json:"stop" yaml:"stop"`
|
||||
|
||||
// Messages is read only by chat/completion API calls
|
||||
Messages []Message `json:"messages" yaml:"messages"`
|
||||
@@ -82,6 +111,10 @@ type OpenAIRequest struct {
|
||||
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
|
||||
Keep int `json:"n_keep" yaml:"n_keep"`
|
||||
|
||||
MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"`
|
||||
MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"`
|
||||
Mirostat int `json:"mirostat" yaml:"mirostat"`
|
||||
|
||||
Seed int `json:"seed" yaml:"seed"`
|
||||
}
|
||||
|
||||
@@ -95,163 +128,163 @@ func defaultRequest(modelFile string) OpenAIRequest {
|
||||
}
|
||||
}
|
||||
|
||||
func updateConfig(config *Config, input *OpenAIRequest) {
|
||||
if input.Echo {
|
||||
config.Echo = input.Echo
|
||||
}
|
||||
if input.TopK != 0 {
|
||||
config.TopK = input.TopK
|
||||
}
|
||||
if input.TopP != 0 {
|
||||
config.TopP = input.TopP
|
||||
}
|
||||
// https://platform.openai.com/docs/api-reference/completions
|
||||
func completionEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
if input.Temperature != 0 {
|
||||
config.Temperature = input.Temperature
|
||||
}
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
if input.Maxtokens != 0 {
|
||||
config.Maxtokens = input.Maxtokens
|
||||
}
|
||||
templateFile := config.Model
|
||||
|
||||
if input.Stop != "" {
|
||||
config.StopWords = append(config.StopWords, input.Stop)
|
||||
}
|
||||
if config.TemplateConfig.Completion != "" {
|
||||
templateFile = config.TemplateConfig.Completion
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
config.RepeatPenalty = input.RepeatPenalty
|
||||
}
|
||||
var result []Choice
|
||||
for _, i := range config.PromptStrings {
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
|
||||
Input string
|
||||
}{Input: i})
|
||||
if err == nil {
|
||||
i = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
|
||||
if input.Keep != 0 {
|
||||
config.Keep = input.Keep
|
||||
}
|
||||
r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
|
||||
*c = append(*c, Choice{Text: s})
|
||||
}, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
config.Batch = input.Batch
|
||||
}
|
||||
result = append(result, r...)
|
||||
}
|
||||
|
||||
if input.F16 {
|
||||
config.F16 = input.F16
|
||||
}
|
||||
resp := &OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
Object: "text_completion",
|
||||
}
|
||||
|
||||
if input.IgnoreEOS {
|
||||
config.IgnoreEOS = input.IgnoreEOS
|
||||
}
|
||||
jsonResult, _ := json.Marshal(resp)
|
||||
log.Debug().Msgf("Response: %s", jsonResult)
|
||||
|
||||
if input.Seed != 0 {
|
||||
config.Seed = input.Seed
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
}
|
||||
}
|
||||
|
||||
var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
|
||||
var mu sync.Mutex = sync.Mutex{}
|
||||
|
||||
// https://platform.openai.com/docs/api-reference/completions
|
||||
func openAIEndpoint(cm ConfigMerger, chat, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
// https://platform.openai.com/docs/api-reference/embeddings
|
||||
func embeddingsEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
input := new(OpenAIRequest)
|
||||
// Get input data from the request body
|
||||
if err := c.BodyParser(input); err != nil {
|
||||
return err
|
||||
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
items := []Item{}
|
||||
|
||||
for i, s := range config.InputToken {
|
||||
// get the model function to call for the result
|
||||
embedFn, err := ModelEmbedding("", s, loader, *config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
embeddings, err := embedFn()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
||||
}
|
||||
|
||||
for i, s := range config.InputStrings {
|
||||
// get the model function to call for the result
|
||||
embedFn, err := ModelEmbedding(s, []int{}, loader, *config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
embeddings, err := embedFn()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
||||
}
|
||||
|
||||
resp := &OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Data: items,
|
||||
Object: "list",
|
||||
}
|
||||
|
||||
jsonResult, _ := json.Marshal(resp)
|
||||
log.Debug().Msgf("Response: %s", jsonResult)
|
||||
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
}
|
||||
}
|
||||
|
||||
func chatEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
|
||||
process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
|
||||
ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
|
||||
resp := OpenAIResponse{
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []Choice{{Delta: &Message{Role: "assistant", Content: s}}},
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
log.Debug().Msgf("Sending goroutine: %s", s)
|
||||
|
||||
responses <- resp
|
||||
return true
|
||||
})
|
||||
close(responses)
|
||||
}
|
||||
return func(c *fiber.Ctx) error {
|
||||
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
var predInput string
|
||||
|
||||
mess := []string{}
|
||||
for _, i := range input.Messages {
|
||||
r := config.Roles[i.Role]
|
||||
if r == "" {
|
||||
r = i.Role
|
||||
}
|
||||
|
||||
content := fmt.Sprint(r, " ", i.Content)
|
||||
mess = append(mess, content)
|
||||
}
|
||||
|
||||
predInput = strings.Join(mess, "\n")
|
||||
|
||||
if input.Stream {
|
||||
log.Debug().Msgf("Stream request received")
|
||||
c.Context().SetContentType("text/event-stream")
|
||||
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
|
||||
c.Set("Content-Type", "text/event-stream; charset=utf-8")
|
||||
// c.Set("Content-Type", "text/event-stream")
|
||||
c.Set("Cache-Control", "no-cache")
|
||||
c.Set("Connection", "keep-alive")
|
||||
c.Set("Transfer-Encoding", "chunked")
|
||||
}
|
||||
|
||||
modelFile := input.Model
|
||||
received, _ := json.Marshal(input)
|
||||
|
||||
log.Debug().Msgf("Request received: %s", string(received))
|
||||
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
|
||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||
|
||||
// If no model was specified, take the first available
|
||||
if modelFile == "" && !bearerExists {
|
||||
models, _ := loader.ListModels()
|
||||
if len(models) > 0 {
|
||||
modelFile = models[0]
|
||||
log.Debug().Msgf("No model specified, using: %s", modelFile)
|
||||
} else {
|
||||
log.Debug().Msgf("No model specified, returning error")
|
||||
return fmt.Errorf("no model specified")
|
||||
}
|
||||
}
|
||||
|
||||
// If a model is found in bearer token takes precedence
|
||||
if bearerExists {
|
||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||
modelFile = bearer
|
||||
}
|
||||
|
||||
// Load a config file if present after the model name
|
||||
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
|
||||
if _, err := os.Stat(modelConfig); err == nil {
|
||||
if err := cm.LoadConfig(modelConfig); err != nil {
|
||||
return fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
var config *Config
|
||||
cfg, exists := cm[modelFile]
|
||||
if !exists {
|
||||
config = &Config{
|
||||
OpenAIRequest: defaultRequest(modelFile),
|
||||
}
|
||||
} else {
|
||||
config = &cfg
|
||||
}
|
||||
|
||||
// Set the parameters for the language model prediction
|
||||
updateConfig(config, input)
|
||||
|
||||
if threads != 0 {
|
||||
config.Threads = threads
|
||||
}
|
||||
if ctx != 0 {
|
||||
config.ContextSize = ctx
|
||||
}
|
||||
if f16 {
|
||||
config.F16 = true
|
||||
}
|
||||
|
||||
if debug {
|
||||
config.Debug = true
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
predInput := input.Prompt
|
||||
if chat {
|
||||
mess := []string{}
|
||||
for _, i := range input.Messages {
|
||||
r := config.Roles[i.Role]
|
||||
if r == "" {
|
||||
r = i.Role
|
||||
}
|
||||
|
||||
content := fmt.Sprint(r, " ", i.Content)
|
||||
mess = append(mess, content)
|
||||
}
|
||||
|
||||
predInput = strings.Join(mess, "\n")
|
||||
}
|
||||
|
||||
templateFile := config.Model
|
||||
if config.TemplateConfig.Chat != "" && chat {
|
||||
templateFile = config.TemplateConfig.Chat
|
||||
}
|
||||
|
||||
if config.TemplateConfig.Completion != "" && !chat {
|
||||
templateFile = config.TemplateConfig.Completion
|
||||
if config.TemplateConfig.Chat != "" {
|
||||
templateFile = config.TemplateConfig.Chat
|
||||
}
|
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
@@ -263,105 +296,163 @@ func openAIEndpoint(cm ConfigMerger, chat, debug bool, loader *model.ModelLoader
|
||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||
}
|
||||
|
||||
result := []Choice{}
|
||||
if input.Stream {
|
||||
responses := make(chan OpenAIResponse)
|
||||
|
||||
n := input.N
|
||||
go process(predInput, input, config, loader, responses)
|
||||
|
||||
if input.N == 0 {
|
||||
n = 1
|
||||
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
||||
|
||||
for ev := range responses {
|
||||
var buf bytes.Buffer
|
||||
enc := json.NewEncoder(&buf)
|
||||
enc.Encode(ev)
|
||||
|
||||
fmt.Fprintf(w, "event: data\n\n")
|
||||
fmt.Fprintf(w, "data: %v\n\n", buf.String())
|
||||
log.Debug().Msgf("Sending chunk: %s", buf.String())
|
||||
w.Flush()
|
||||
}
|
||||
|
||||
w.WriteString("event: data\n\n")
|
||||
resp := &OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []Choice{{FinishReason: "stop"}},
|
||||
}
|
||||
respData, _ := json.Marshal(resp)
|
||||
|
||||
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
|
||||
w.Flush()
|
||||
}))
|
||||
return nil
|
||||
}
|
||||
|
||||
// get the model function to call for the result
|
||||
predFunc, err := ModelInference(predInput, loader, *config)
|
||||
result, err := ComputeChoices(predInput, input, config, loader, func(s string, c *[]Choice) {
|
||||
*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
|
||||
}, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
finetunePrediction := func(prediction string) string {
|
||||
if config.Echo {
|
||||
prediction = predInput + prediction
|
||||
}
|
||||
|
||||
for _, c := range config.Cutstrings {
|
||||
mu.Lock()
|
||||
reg, ok := cutstrings[c]
|
||||
if !ok {
|
||||
cutstrings[c] = regexp.MustCompile(c)
|
||||
reg = cutstrings[c]
|
||||
}
|
||||
mu.Unlock()
|
||||
prediction = reg.ReplaceAllString(prediction, "")
|
||||
}
|
||||
|
||||
for _, c := range config.TrimSpace {
|
||||
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
|
||||
}
|
||||
return prediction
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
prediction, err := predFunc()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
prediction = finetunePrediction(prediction)
|
||||
|
||||
if chat {
|
||||
if input.Stream {
|
||||
result = append(result, Choice{Delta: &Message{Role: "assistant", Content: prediction}})
|
||||
} else {
|
||||
result = append(result, Choice{Message: &Message{Role: "assistant", Content: prediction}})
|
||||
}
|
||||
} else {
|
||||
result = append(result, Choice{Text: prediction})
|
||||
}
|
||||
}
|
||||
|
||||
resp := &OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
Object: "chat.completion",
|
||||
}
|
||||
if input.Stream && chat {
|
||||
resp.Object = "chat.completion.chunk"
|
||||
} else if chat {
|
||||
resp.Object = "chat.completion"
|
||||
} else {
|
||||
resp.Object = "text_completion"
|
||||
respData, _ := json.Marshal(resp)
|
||||
log.Debug().Msgf("Response: %s", respData)
|
||||
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
}
|
||||
}
|
||||
|
||||
func editEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||
|
||||
templateFile := config.Model
|
||||
|
||||
if config.TemplateConfig.Edit != "" {
|
||||
templateFile = config.TemplateConfig.Edit
|
||||
}
|
||||
|
||||
var result []Choice
|
||||
for _, i := range config.InputStrings {
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := loader.TemplatePrefix(templateFile, struct {
|
||||
Input string
|
||||
Instruction string
|
||||
}{Input: i})
|
||||
if err == nil {
|
||||
i = templatedInput
|
||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||
}
|
||||
|
||||
r, err := ComputeChoices(i, input, config, loader, func(s string, c *[]Choice) {
|
||||
*c = append(*c, Choice{Text: s})
|
||||
}, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
result = append(result, r...)
|
||||
}
|
||||
|
||||
resp := &OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: result,
|
||||
Object: "edit",
|
||||
}
|
||||
|
||||
jsonResult, _ := json.Marshal(resp)
|
||||
log.Debug().Msgf("Response: %s", jsonResult)
|
||||
|
||||
if input.Stream {
|
||||
log.Debug().Msgf("Handling stream request")
|
||||
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
||||
fmt.Fprintf(w, "event: data\n")
|
||||
w.Flush()
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "data: %s\n\n", jsonResult)
|
||||
w.Flush()
|
||||
|
||||
fmt.Fprintf(w, "event: data\n")
|
||||
w.Flush()
|
||||
|
||||
resp := &OpenAIResponse{
|
||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []Choice{Choice{FinishReason: "stop"}},
|
||||
}
|
||||
respData, _ := json.Marshal(resp)
|
||||
|
||||
fmt.Fprintf(w, "data: %s\n\n", respData)
|
||||
w.Flush()
|
||||
|
||||
// fmt.Fprintf(w, "data: [DONE]\n\n")
|
||||
// w.Flush()
|
||||
}))
|
||||
return nil
|
||||
} else {
|
||||
// Return the prediction in the response body
|
||||
return c.JSON(resp)
|
||||
// https://platform.openai.com/docs/api-reference/audio/create
|
||||
func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
config, input, err := readConfig(cm, c, loader, debug, threads, ctx, f16)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||
}
|
||||
|
||||
// retrieve the file data from the request
|
||||
file, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
|
||||
}
|
||||
f, err := file.Open()
|
||||
if err != nil {
|
||||
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
|
||||
}
|
||||
defer f.Close()
|
||||
log.Debug().Msgf("Audio file: %+v", file)
|
||||
|
||||
dir, err := os.MkdirTemp("", "whisper")
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
dst := filepath.Join(dir, path.Base(file.Filename))
|
||||
dstFile, err := os.Create(dst)
|
||||
if err != nil {
|
||||
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
|
||||
}
|
||||
|
||||
if _, err := io.Copy(dstFile, f); err != nil {
|
||||
log.Debug().Msgf("Audio file %+v - %+v - err %+v", file.Filename, dst, err)
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Audio file copied to: %+v", dst)
|
||||
|
||||
whisperModel, err := loader.BackendLoader("whisper", config.Model, []llama.ModelOption{}, uint32(config.Threads))
|
||||
if err != nil {
|
||||
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
|
||||
}
|
||||
|
||||
w := whisperModel.(whisper.Model)
|
||||
|
||||
tr, err := whisperutil.Transcript(w, dst, input.Language)
|
||||
if err != nil {
|
||||
return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()})
|
||||
}
|
||||
|
||||
log.Debug().Msgf("Trascribed: %+v", tr)
|
||||
// TODO: handle different outputs here
|
||||
return c.Status(http.StatusOK).JSON(fiber.Map{"text": tr})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,28 +2,24 @@ package api
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/donomii/go-rwkv.cpp"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/bloomz.cpp"
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
|
||||
)
|
||||
|
||||
// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
var mutexMap sync.Mutex
|
||||
var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
|
||||
|
||||
func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (string, error), error) {
|
||||
var model *llama.LLama
|
||||
var gptModel *gptj.GPTJ
|
||||
var gpt2Model *gpt2.GPT2
|
||||
var stableLMModel *gpt2.StableLM
|
||||
|
||||
modelFile := c.Model
|
||||
|
||||
// Try to load the model
|
||||
var llamaerr, gpt2err, gptjerr, stableerr error
|
||||
func defaultLLamaOpts(c Config) []llama.ModelOption {
|
||||
llamaOpts := []llama.ModelOption{}
|
||||
if c.ContextSize != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize))
|
||||
@@ -31,26 +27,179 @@ func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (stri
|
||||
if c.F16 {
|
||||
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
||||
}
|
||||
if c.Embeddings {
|
||||
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
|
||||
}
|
||||
|
||||
// TODO: this is ugly, better identifying the model somehow! however, it is a good stab for a first implementation..
|
||||
model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...)
|
||||
if llamaerr != nil {
|
||||
gptModel, gptjerr = loader.LoadGPTJModel(modelFile)
|
||||
if gptjerr != nil {
|
||||
gpt2Model, gpt2err = loader.LoadGPT2Model(modelFile)
|
||||
if gpt2err != nil {
|
||||
stableLMModel, stableerr = loader.LoadStableLMModel(modelFile)
|
||||
if stableerr != nil {
|
||||
return nil, fmt.Errorf("llama: %s gpt: %s gpt2: %s stableLM: %s", llamaerr.Error(), gptjerr.Error(), gpt2err.Error(), stableerr.Error()) // llama failed first, so we want to catch both errors
|
||||
}
|
||||
return llamaOpts
|
||||
}
|
||||
|
||||
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
|
||||
if !c.Embeddings {
|
||||
return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
|
||||
}
|
||||
|
||||
modelFile := c.Model
|
||||
|
||||
llamaOpts := defaultLLamaOpts(c)
|
||||
|
||||
var inferenceModel interface{}
|
||||
var err error
|
||||
if c.Backend == "" {
|
||||
inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
|
||||
} else {
|
||||
inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var fn func() ([]float32, error)
|
||||
switch model := inferenceModel.(type) {
|
||||
case *llama.LLama:
|
||||
fn = func() ([]float32, error) {
|
||||
predictOptions := buildLLamaPredictOptions(c)
|
||||
if len(tokens) > 0 {
|
||||
return model.TokenEmbeddings(tokens, predictOptions...)
|
||||
}
|
||||
return model.Embeddings(s, predictOptions...)
|
||||
}
|
||||
// bert embeddings
|
||||
case *bert.Bert:
|
||||
fn = func() ([]float32, error) {
|
||||
if len(tokens) > 0 {
|
||||
return nil, fmt.Errorf("embeddings endpoint for this model supports only string")
|
||||
}
|
||||
return model.Embeddings(s, bert.SetThreads(c.Threads))
|
||||
}
|
||||
default:
|
||||
fn = func() ([]float32, error) {
|
||||
return nil, fmt.Errorf("embeddings not supported by the backend")
|
||||
}
|
||||
}
|
||||
|
||||
return func() ([]float32, error) {
|
||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
mutexMap.Lock()
|
||||
l, ok := mutexes[modelFile]
|
||||
if !ok {
|
||||
m := &sync.Mutex{}
|
||||
mutexes[modelFile] = m
|
||||
l = m
|
||||
}
|
||||
mutexMap.Unlock()
|
||||
l.Lock()
|
||||
defer l.Unlock()
|
||||
|
||||
embeds, err := fn()
|
||||
if err != nil {
|
||||
return embeds, err
|
||||
}
|
||||
// Remove trailing 0s
|
||||
for i := len(embeds) - 1; i >= 0; i-- {
|
||||
if embeds[i] == 0.0 {
|
||||
embeds = embeds[:i]
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return embeds, nil
|
||||
}, nil
|
||||
}
|
||||
|
||||
func buildLLamaPredictOptions(c Config) []llama.PredictOption {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []llama.PredictOption{
|
||||
llama.SetTemperature(c.Temperature),
|
||||
llama.SetTopP(c.TopP),
|
||||
llama.SetTopK(c.TopK),
|
||||
llama.SetTokens(c.Maxtokens),
|
||||
llama.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Mirostat != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
|
||||
}
|
||||
|
||||
if c.MirostatETA != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA))
|
||||
}
|
||||
|
||||
if c.MirostatTAU != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU))
|
||||
}
|
||||
|
||||
if c.Debug {
|
||||
predictOptions = append(predictOptions, llama.Debug)
|
||||
}
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
|
||||
|
||||
if c.RepeatPenalty != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
|
||||
}
|
||||
|
||||
if c.Keep != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.F16 {
|
||||
predictOptions = append(predictOptions, llama.EnableF16KV)
|
||||
}
|
||||
|
||||
if c.IgnoreEOS {
|
||||
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return predictOptions
|
||||
}
|
||||
|
||||
func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
|
||||
supportStreams := false
|
||||
modelFile := c.Model
|
||||
|
||||
llamaOpts := defaultLLamaOpts(c)
|
||||
|
||||
var inferenceModel interface{}
|
||||
var err error
|
||||
if c.Backend == "" {
|
||||
inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
|
||||
} else {
|
||||
inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var fn func() (string, error)
|
||||
|
||||
switch {
|
||||
case stableLMModel != nil:
|
||||
switch model := inferenceModel.(type) {
|
||||
case *rwkv.RwkvState:
|
||||
supportStreams = true
|
||||
|
||||
fn = func() (string, error) {
|
||||
stopWord := "\n"
|
||||
if len(c.StopWords) > 0 {
|
||||
stopWord = c.StopWords[0]
|
||||
}
|
||||
|
||||
if err := model.ProcessInput(s); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
|
||||
|
||||
return response, nil
|
||||
}
|
||||
case *gpt2.Starcoder:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
@@ -69,105 +218,177 @@ func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (stri
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return stableLMModel.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case gpt2Model != nil:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return gpt2Model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case gptModel != nil:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gptj.PredictOption{
|
||||
gptj.SetTemperature(c.Temperature),
|
||||
gptj.SetTopP(c.TopP),
|
||||
gptj.SetTopK(c.TopK),
|
||||
gptj.SetTokens(c.Maxtokens),
|
||||
gptj.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gptj.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gptj.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return gptModel.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case model != nil:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []llama.PredictOption{
|
||||
llama.SetTemperature(c.Temperature),
|
||||
llama.SetTopP(c.TopP),
|
||||
llama.SetTopK(c.TopK),
|
||||
llama.SetTokens(c.Maxtokens),
|
||||
llama.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Debug {
|
||||
predictOptions = append(predictOptions, llama.Debug)
|
||||
}
|
||||
|
||||
predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
|
||||
|
||||
if c.RepeatPenalty != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
|
||||
}
|
||||
|
||||
if c.Keep != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.F16 {
|
||||
predictOptions = append(predictOptions, llama.EnableF16KV)
|
||||
}
|
||||
|
||||
if c.IgnoreEOS {
|
||||
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.RedPajama:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *bloomz.Bloomz:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []bloomz.PredictOption{
|
||||
bloomz.SetTemperature(c.Temperature),
|
||||
bloomz.SetTopP(c.TopP),
|
||||
bloomz.SetTopK(c.TopK),
|
||||
bloomz.SetTokens(c.Maxtokens),
|
||||
bloomz.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, bloomz.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.StableLM:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.Dolly:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt2.GPT2:
|
||||
fn = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(c.Temperature),
|
||||
gpt2.SetTopP(c.TopP),
|
||||
gpt2.SetTopK(c.TopK),
|
||||
gpt2.SetTokens(c.Maxtokens),
|
||||
gpt2.SetThreads(c.Threads),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
if c.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||
}
|
||||
|
||||
return model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case *gpt4all.Model:
|
||||
supportStreams = true
|
||||
|
||||
fn = func() (string, error) {
|
||||
if tokenCallback != nil {
|
||||
model.SetTokenCallback(tokenCallback)
|
||||
}
|
||||
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt4all.PredictOption{
|
||||
gpt4all.SetTemperature(c.Temperature),
|
||||
gpt4all.SetTopP(c.TopP),
|
||||
gpt4all.SetTopK(c.TopK),
|
||||
gpt4all.SetTokens(c.Maxtokens),
|
||||
}
|
||||
|
||||
if c.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch))
|
||||
}
|
||||
|
||||
str, er := model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
|
||||
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
|
||||
// after a stream event has occurred
|
||||
model.SetTokenCallback(nil)
|
||||
return str, er
|
||||
}
|
||||
case *llama.LLama:
|
||||
supportStreams = true
|
||||
fn = func() (string, error) {
|
||||
|
||||
if tokenCallback != nil {
|
||||
model.SetTokenCallback(tokenCallback)
|
||||
}
|
||||
|
||||
predictOptions := buildLLamaPredictOptions(c)
|
||||
|
||||
str, er := model.Predict(
|
||||
s,
|
||||
predictOptions...,
|
||||
)
|
||||
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
|
||||
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
|
||||
// after a stream event has occurred
|
||||
model.SetTokenCallback(nil)
|
||||
return str, er
|
||||
}
|
||||
}
|
||||
|
||||
return func() (string, error) {
|
||||
@@ -183,6 +404,66 @@ func ModelInference(s string, loader *model.ModelLoader, c Config) (func() (stri
|
||||
l.Lock()
|
||||
defer l.Unlock()
|
||||
|
||||
return fn()
|
||||
res, err := fn()
|
||||
if tokenCallback != nil && !supportStreams {
|
||||
tokenCallback(res)
|
||||
}
|
||||
return res, err
|
||||
}, nil
|
||||
}
|
||||
|
||||
func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) {
|
||||
result := []Choice{}
|
||||
|
||||
n := input.N
|
||||
|
||||
if input.N == 0 {
|
||||
n = 1
|
||||
}
|
||||
|
||||
// get the model function to call for the result
|
||||
predFunc, err := ModelInference(predInput, loader, *config, tokenCallback)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
prediction, err := predFunc()
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
|
||||
prediction = Finetune(*config, predInput, prediction)
|
||||
cb(prediction, &result)
|
||||
|
||||
//result = append(result, Choice{Text: prediction})
|
||||
|
||||
}
|
||||
return result, err
|
||||
}
|
||||
|
||||
var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
|
||||
var mu sync.Mutex = sync.Mutex{}
|
||||
|
||||
func Finetune(config Config, input, prediction string) string {
|
||||
if config.Echo {
|
||||
prediction = input + prediction
|
||||
}
|
||||
|
||||
for _, c := range config.Cutstrings {
|
||||
mu.Lock()
|
||||
reg, ok := cutstrings[c]
|
||||
if !ok {
|
||||
cutstrings[c] = regexp.MustCompile(c)
|
||||
reg = cutstrings[c]
|
||||
}
|
||||
mu.Unlock()
|
||||
prediction = reg.ReplaceAllString(prediction, "")
|
||||
}
|
||||
|
||||
for _, c := range config.TrimSpace {
|
||||
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
|
||||
}
|
||||
return prediction
|
||||
|
||||
}
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
apiVersion: v2
|
||||
appVersion: 0.1.0
|
||||
description: A Helm chart for LocalAI
|
||||
name: local-ai
|
||||
type: application
|
||||
version: 1.0.0
|
||||
@@ -1,44 +0,0 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "local-ai.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "local-ai.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "local-ai.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "local-ai.labels" -}}
|
||||
helm.sh/chart: {{ include "local-ai.chart" . }}
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
app.kubernetes.io/instance: "{{ .Release.Name }}"
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -1,39 +0,0 @@
|
||||
{{- if .Values.dataVolume.enabled }}
|
||||
apiVersion: cdi.kubevirt.io/v1beta1
|
||||
kind: DataVolume
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
spec:
|
||||
contentType: archive
|
||||
source:
|
||||
{{ .Values.dataVolume.source.type }}:
|
||||
url: {{ .Values.dataVolume.source.url }}
|
||||
secretRef: {{ template "local-ai.fullname" . }}
|
||||
{{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
|
||||
secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
|
||||
{{- end }}
|
||||
{{- if .Values.dataVolume.source.caCertConfigMap }}
|
||||
caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
|
||||
{{- end }}
|
||||
pvc:
|
||||
accessModes: {{ .Values.dataVolume.pvc.accessModes }}
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.dataVolume.pvc.size }}
|
||||
---
|
||||
{{- if .Values.dataVolume.secret.enabled }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
data:
|
||||
accessKeyId: {{ .Values.dataVolume.secret.username }}
|
||||
secretKey: {{ .Values.dataVolume.secret.password }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -1,39 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
spec:
|
||||
containers:
|
||||
- name: {{ template "local-ai.fullname" . }}
|
||||
image: {{ .Values.deployment.image }}
|
||||
env:
|
||||
- name: THREADS
|
||||
value: {{ .Values.deployment.env.threads | quote }}
|
||||
- name: CONTEXT_SIZE
|
||||
value: {{ .Values.deployment.env.contextSize | quote }}
|
||||
- name: MODELS_PATH
|
||||
value: {{ .Values.deployment.env.modelsPath }}
|
||||
{{- if .Values.deployment.volume.enabled }}
|
||||
volumeMounts:
|
||||
- mountPath: {{ .Values.deployment.env.modelsPath }}
|
||||
name: models
|
||||
volumes:
|
||||
- name: models
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ template "local-ai.fullname" . }}
|
||||
{{- end }}
|
||||
@@ -1,19 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
{{- if .Values.service.annotations }}
|
||||
annotations:
|
||||
{{ toYaml .Values.service.annotations | indent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
type: "{{ .Values.service.type }}"
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
@@ -1,38 +0,0 @@
|
||||
deployment:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
env:
|
||||
threads: 14
|
||||
contextSize: 512
|
||||
modelsPath: "/models"
|
||||
volume:
|
||||
enabled: false
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
annotations: {}
|
||||
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
|
||||
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
||||
|
||||
# Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
|
||||
# (requires https://github.com/kubevirt/containerized-data-importer)
|
||||
dataVolume:
|
||||
enabled: false
|
||||
source:
|
||||
type: "http" # Source type. One of: [ http | s3 ]
|
||||
url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
|
||||
|
||||
# CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
|
||||
# and a base64 encoded pem certificate
|
||||
caCertConfigMap: ""
|
||||
|
||||
# SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
|
||||
# that may include sensitive information. Only applicable for the http source type.
|
||||
secretExtraHeaders: []
|
||||
pvc:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
size: 5Gi
|
||||
secret:
|
||||
enabled: false
|
||||
username: "" # base64 encoded
|
||||
password: "" # base64 encoded
|
||||
@@ -5,11 +5,11 @@ services:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
|
||||
7
entrypoint.sh
Executable file
7
entrypoint.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
cd /build
|
||||
|
||||
make build
|
||||
|
||||
./local-ai "$@"
|
||||
@@ -2,10 +2,85 @@
|
||||
|
||||
Here is a list of projects that can easily be integrated with the LocalAI backend.
|
||||
|
||||
## Projects
|
||||
### Projects
|
||||
|
||||
- [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/) (by [@mudler](https://github.com/mudler))
|
||||
|
||||
### Chatbot-UI
|
||||
|
||||
_by [@mkellerman](https://github.com/mkellerman)_
|
||||
|
||||

|
||||
|
||||
This integration shows how to use LocalAI with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/)
|
||||
|
||||
### Discord bot
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
Run a discord bot which lets you talk directly with a model
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/), or for a live demo you can talk with our bot in #random-bot in our discord server.
|
||||
|
||||
### Langchain
|
||||
|
||||
_by [@dave-gray101](https://github.com/dave-gray101)_
|
||||
|
||||
A ready to use example to show e2e how to integrate LocalAI with langchain
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/)
|
||||
|
||||
### Langchain Python
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
A ready to use example to show e2e how to integrate LocalAI with langchain
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/)
|
||||
|
||||
### LocalAI WebUI
|
||||
|
||||
_by [@dhruvgera](https://github.com/dhruvgera)_
|
||||
|
||||

|
||||
|
||||
A light, community-maintained web interface for LocalAI
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/localai-webui/)
|
||||
|
||||
### How to run rwkv models
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
A full example on how to run RWKV models with LocalAI
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/)
|
||||
|
||||
### Slack bot
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
Run a slack bot which lets you talk directly with a model
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/)
|
||||
|
||||
### Question answering on documents
|
||||
|
||||
_by [@mudler](https://github.com/mudler)_
|
||||
|
||||
Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
|
||||
|
||||
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)
|
||||
|
||||
### Template for Runpod.io
|
||||
|
||||
_by [@fHachenberg](https://github.com/fHachenberg)_
|
||||
|
||||
Allows to run any LocalAI-compatible model as a backend on the servers of https://runpod.io
|
||||
|
||||
[Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra)
|
||||
|
||||
## Want to contribute?
|
||||
|
||||
Create an issue, and put `Example: <description>` in the title! We will post your examples here.
|
||||
Create an issue, and put `Example: <description>` in the title! We will post your examples here.
|
||||
|
||||
@@ -19,8 +19,28 @@ cd LocalAI/examples/chatbot-ui
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose
|
||||
docker compose up -d --build
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
## Pointing chatbot-ui to a separately managed LocalAI service
|
||||
|
||||
If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
|
||||
```
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
chatgpt:
|
||||
image: ghcr.io/mckaywrigley/chatbot-ui:main
|
||||
ports:
|
||||
- 3000:3000
|
||||
environment:
|
||||
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
|
||||
- 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
|
||||
```
|
||||
|
||||
Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`.
|
||||
|
||||
## Accessing chatbot-ui
|
||||
|
||||
Open http://localhost:3000 for the Web UI.
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ services:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
|
||||
6
examples/discord-bot/.env.example
Normal file
6
examples/discord-bot/.env.example
Normal file
@@ -0,0 +1,6 @@
|
||||
OPENAI_API_KEY=x
|
||||
DISCORD_BOT_TOKEN=x
|
||||
DISCORD_CLIENT_ID=x
|
||||
OPENAI_API_BASE=http://api:8080
|
||||
ALLOWED_SERVER_IDS=x
|
||||
SERVER_TO_MODERATION_CHANNEL=1:1
|
||||
76
examples/discord-bot/README.md
Normal file
76
examples/discord-bot/README.md
Normal file
@@ -0,0 +1,76 @@
|
||||
# discord-bot
|
||||
|
||||

|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/discord-bot
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# Set the discord bot options (see: https://github.com/go-skynet/gpt-discord-bot#setup)
|
||||
cp -rfv .env.example .env
|
||||
vim .env
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
Note: see setup options here: https://github.com/go-skynet/gpt-discord-bot#setup
|
||||
|
||||
Open up the URL in the console and give permission to the bot in your server. Start a thread with `/chat ..`
|
||||
|
||||
## Kubernetes
|
||||
|
||||
- install the local-ai chart first
|
||||
- change OPENAI_API_BASE to point to the API address and apply the discord-bot manifest:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: discord-bot
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: localai
|
||||
namespace: discord-bot
|
||||
labels:
|
||||
app: localai
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: localai
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: localai
|
||||
name: localai
|
||||
spec:
|
||||
containers:
|
||||
- name: localai-discord
|
||||
env:
|
||||
- name: OPENAI_API_KEY
|
||||
value: "x"
|
||||
- name: DISCORD_BOT_TOKEN
|
||||
value: ""
|
||||
- name: DISCORD_CLIENT_ID
|
||||
value: ""
|
||||
- name: OPENAI_API_BASE
|
||||
value: "http://local-ai.default.svc.cluster.local:8080"
|
||||
- name: ALLOWED_SERVER_IDS
|
||||
value: "xx"
|
||||
- name: SERVER_TO_MODERATION_CHANNEL
|
||||
value: "1:1"
|
||||
image: quay.io/go-skynet/gpt-discord-bot:main
|
||||
```
|
||||
21
examples/discord-bot/docker-compose.yaml
Normal file
21
examples/discord-bot/docker-compose.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DEBUG=true
|
||||
- MODELS_PATH=/models
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
|
||||
bot:
|
||||
image: quay.io/go-skynet/gpt-discord-bot:main
|
||||
env_file:
|
||||
- .env
|
||||
1
examples/discord-bot/models
Symbolic link
1
examples/discord-bot/models
Symbolic link
@@ -0,0 +1 @@
|
||||
../chatbot-ui/models/
|
||||
35
examples/langchain-python/README.md
Normal file
35
examples/langchain-python/README.md
Normal file
@@ -0,0 +1,35 @@
|
||||
## Langchain-python
|
||||
|
||||
Langchain example from [quickstart](https://python.langchain.com/en/latest/getting_started/getting_started.html).
|
||||
|
||||
To interact with langchain, you can just set the `OPENAI_API_BASE` URL and provide a token with a random string.
|
||||
|
||||
See the example below:
|
||||
|
||||
```
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/langchain-python
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
|
||||
|
||||
pip install langchain
|
||||
pip install openai
|
||||
|
||||
export OPENAI_API_BASE=http://localhost:8080
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python test.py
|
||||
# A good company name for a company that makes colorful socks would be "Colorsocks".
|
||||
|
||||
python agent.py
|
||||
```
|
||||
44
examples/langchain-python/agent.py
Normal file
44
examples/langchain-python/agent.py
Normal file
@@ -0,0 +1,44 @@
|
||||
## This is a fork/based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6
|
||||
|
||||
from io import StringIO
|
||||
import sys
|
||||
import os
|
||||
from typing import Dict, Optional
|
||||
|
||||
from langchain.agents import load_tools
|
||||
from langchain.agents import initialize_agent
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.llms import OpenAI
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
|
||||
|
||||
class PythonREPL:
|
||||
"""Simulates a standalone Python REPL."""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def run(self, command: str) -> str:
|
||||
"""Run command and returns anything printed."""
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = mystdout = StringIO()
|
||||
try:
|
||||
exec(command, globals())
|
||||
sys.stdout = old_stdout
|
||||
output = mystdout.getvalue()
|
||||
except Exception as e:
|
||||
sys.stdout = old_stdout
|
||||
output = str(e)
|
||||
return output
|
||||
|
||||
llm = OpenAI(temperature=0.0, openai_api_base=base_path, model_name=model_name)
|
||||
python_repl = Tool(
|
||||
"Python REPL",
|
||||
PythonREPL().run,
|
||||
"""A Python shell. Use this to execute python commands. Input should be a valid python command.
|
||||
If you expect output it should be printed out.""",
|
||||
)
|
||||
tools = [python_repl]
|
||||
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
|
||||
agent.run("What is the 10th fibonacci number?")
|
||||
16
examples/langchain-python/docker-compose.yaml
Normal file
16
examples/langchain-python/docker-compose.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DEBUG=true
|
||||
- MODELS_PATH=/models
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
1
examples/langchain-python/models
Symbolic link
1
examples/langchain-python/models
Symbolic link
@@ -0,0 +1 @@
|
||||
../chatbot-ui/models
|
||||
6
examples/langchain-python/test.py
Normal file
6
examples/langchain-python/test.py
Normal file
@@ -0,0 +1,6 @@
|
||||
|
||||
from langchain.llms import OpenAI
|
||||
|
||||
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
|
||||
text = "What would be a good company name for a company that makes colorful socks?"
|
||||
print(llm(text))
|
||||
2
examples/langchain/.gitignore
vendored
Normal file
2
examples/langchain/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
models/ggml-koala-13B-4bit-128g
|
||||
models/ggml-gpt4all-j
|
||||
6
examples/langchain/JS.Dockerfile
Normal file
6
examples/langchain/JS.Dockerfile
Normal file
@@ -0,0 +1,6 @@
|
||||
FROM node:latest
|
||||
COPY ./langchainjs-localai-example /app
|
||||
WORKDIR /app
|
||||
RUN npm install
|
||||
RUN npm run build
|
||||
ENTRYPOINT [ "npm", "run", "start" ]
|
||||
5
examples/langchain/PY.Dockerfile
Normal file
5
examples/langchain/PY.Dockerfile
Normal file
@@ -0,0 +1,5 @@
|
||||
FROM python:3.10-bullseye
|
||||
COPY ./langchainpy-localai-example /app
|
||||
WORKDIR /app
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
ENTRYPOINT [ "python", "./full_demo.py" ];
|
||||
30
examples/langchain/README.md
Normal file
30
examples/langchain/README.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# langchain
|
||||
|
||||
Example of using langchain, with the standard OpenAI llm module, and LocalAI. Has docker compose profiles for both the Typescript and Python versions.
|
||||
|
||||
**Please Note** - This is a tech demo example at this time. ggml-gpt4all-j has pretty terrible results for most langchain applications with the settings used in this example.
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/langchain
|
||||
|
||||
# (optional) - Edit the example code in typescript.
|
||||
# vi ./langchainjs-localai-example/index.ts
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose for typescript!
|
||||
docker-compose --profile ts up --build
|
||||
|
||||
# or start with docker-compose for python!
|
||||
docker-compose --profile py up --build
|
||||
```
|
||||
|
||||
## Copyright
|
||||
|
||||
Some of the example code in index.mts and full_demo.py is adapted from the langchainjs project and is Copyright (c) Harrison Chase. Used under the terms of the MIT license, as is the remainder of this code.
|
||||
43
examples/langchain/docker-compose.yaml
Normal file
43
examples/langchain/docker-compose.yaml
Normal file
@@ -0,0 +1,43 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DEBUG=true
|
||||
- MODELS_PATH=/models
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
|
||||
js:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: JS.Dockerfile
|
||||
profiles:
|
||||
- js
|
||||
- ts
|
||||
depends_on:
|
||||
- "api"
|
||||
environment:
|
||||
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
|
||||
- 'OPENAI_API_BASE=http://api:8080/v1'
|
||||
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
|
||||
|
||||
py:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: PY.Dockerfile
|
||||
profiles:
|
||||
- py
|
||||
depends_on:
|
||||
- "api"
|
||||
environment:
|
||||
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
|
||||
- 'OPENAI_API_BASE=http://api:8080/v1'
|
||||
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
|
||||
2
examples/langchain/langchainjs-localai-example/.gitignore
vendored
Normal file
2
examples/langchain/langchainjs-localai-example/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
node_modules/
|
||||
dist/
|
||||
20
examples/langchain/langchainjs-localai-example/.vscode/launch.json
vendored
Normal file
20
examples/langchain/langchainjs-localai-example/.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "Launch Program",
|
||||
// "skipFiles": [
|
||||
// "<node_internals>/**"
|
||||
// ],
|
||||
"program": "${workspaceFolder}\\dist\\index.mjs",
|
||||
"outFiles": [
|
||||
"${workspaceFolder}/**/*.js"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
1951
examples/langchain/langchainjs-localai-example/package-lock.json
generated
Normal file
1951
examples/langchain/langchainjs-localai-example/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
21
examples/langchain/langchainjs-localai-example/package.json
Normal file
21
examples/langchain/langchainjs-localai-example/package.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "langchainjs-localai-example",
|
||||
"version": "0.1.0",
|
||||
"description": "Trivial Example of using langchain + the OpenAI API + LocalAI together",
|
||||
"main": "index.mjs",
|
||||
"scripts": {
|
||||
"build": "tsc --build",
|
||||
"clean": "tsc --build --clean",
|
||||
"start": "node --trace-warnings dist/index.mjs"
|
||||
},
|
||||
"author": "dave@gray101.com",
|
||||
"license": "MIT",
|
||||
"devDependencies": {
|
||||
"@types/node": "^18.16.4",
|
||||
"typescript": "^5.0.4"
|
||||
},
|
||||
"dependencies": {
|
||||
"langchain": "^0.0.67",
|
||||
"typeorm": "^0.3.15"
|
||||
}
|
||||
}
|
||||
79
examples/langchain/langchainjs-localai-example/src/index.mts
Normal file
79
examples/langchain/langchainjs-localai-example/src/index.mts
Normal file
@@ -0,0 +1,79 @@
|
||||
import { OpenAIChat } from "langchain/llms/openai";
|
||||
import { loadQAStuffChain } from "langchain/chains";
|
||||
import { Document } from "langchain/document";
|
||||
import { initializeAgentExecutorWithOptions } from "langchain/agents";
|
||||
import {Calculator} from "langchain/tools/calculator";
|
||||
|
||||
const pathToLocalAi = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1';
|
||||
const fakeApiKey = process.env['OPENAI_API_KEY'] || '-';
|
||||
const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo';
|
||||
|
||||
function getModel(): OpenAIChat {
|
||||
return new OpenAIChat({
|
||||
prefixMessages: [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are a helpful assistant that answers in pirate language",
|
||||
},
|
||||
],
|
||||
modelName: modelName,
|
||||
maxTokens: 50,
|
||||
openAIApiKey: fakeApiKey,
|
||||
maxRetries: 2
|
||||
}, {
|
||||
basePath: pathToLocalAi,
|
||||
apiKey: fakeApiKey,
|
||||
});
|
||||
}
|
||||
|
||||
// Minimal example.
|
||||
export const run = async () => {
|
||||
const model = getModel();
|
||||
console.log(`about to model.call at ${new Date().toUTCString()}`);
|
||||
const res = await model.call(
|
||||
"What would be a good company name a company that makes colorful socks?"
|
||||
);
|
||||
console.log(`${new Date().toUTCString()}`);
|
||||
console.log({ res });
|
||||
};
|
||||
|
||||
await run();
|
||||
|
||||
// This example uses the `StuffDocumentsChain`
|
||||
export const run2 = async () => {
|
||||
const model = getModel();
|
||||
const chainA = loadQAStuffChain(model);
|
||||
const docs = [
|
||||
new Document({ pageContent: "Harrison went to Harvard." }),
|
||||
new Document({ pageContent: "Ankush went to Princeton." }),
|
||||
];
|
||||
const resA = await chainA.call({
|
||||
input_documents: docs,
|
||||
question: "Where did Harrison go to college?",
|
||||
});
|
||||
console.log({ resA });
|
||||
};
|
||||
|
||||
await run2();
|
||||
|
||||
// Quickly thrown together example of using tools + agents.
|
||||
// This seems like it should work, but it doesn't yet.
|
||||
export const temporarilyBrokenToolTest = async () => {
|
||||
const model = getModel();
|
||||
|
||||
const executor = await initializeAgentExecutorWithOptions([new Calculator(true)], model, {
|
||||
agentType: "zero-shot-react-description",
|
||||
});
|
||||
|
||||
console.log("Loaded agent.");
|
||||
|
||||
const input = `What is the value of (500 *2) + 350 - 13?`;
|
||||
|
||||
console.log(`Executing with input "${input}"...`);
|
||||
|
||||
const result = await executor.call({ input });
|
||||
|
||||
console.log(`Got output ${result.output}`);
|
||||
}
|
||||
|
||||
await temporarilyBrokenToolTest();
|
||||
15
examples/langchain/langchainjs-localai-example/tsconfig.json
Normal file
15
examples/langchain/langchainjs-localai-example/tsconfig.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "es2022",
|
||||
"lib": ["ES2022", "DOM"],
|
||||
"module": "ES2022",
|
||||
"moduleResolution": "node",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"isolatedModules": true,
|
||||
"outDir": "./dist"
|
||||
},
|
||||
"include": ["src", "test"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
24
examples/langchain/langchainpy-localai-example/.vscode/launch.json
vendored
Normal file
24
examples/langchain/langchainpy-localai-example/.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Current File",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"redirectOutput": true,
|
||||
"justMyCode": false
|
||||
},
|
||||
{
|
||||
"name": "Python: Attach to Port 5678",
|
||||
"type": "python",
|
||||
"request": "attach",
|
||||
"connect": {
|
||||
"host": "localhost",
|
||||
"port": 5678
|
||||
},
|
||||
"justMyCode": false
|
||||
}
|
||||
]
|
||||
}
|
||||
3
examples/langchain/langchainpy-localai-example/.vscode/settings.json
vendored
Normal file
3
examples/langchain/langchainpy-localai-example/.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python"
|
||||
}
|
||||
46
examples/langchain/langchainpy-localai-example/full_demo.py
Normal file
46
examples/langchain/langchainpy-localai-example/full_demo.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import os
|
||||
import logging
|
||||
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain import PromptTemplate, LLMChain
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
AIMessagePromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import (
|
||||
AIMessage,
|
||||
HumanMessage,
|
||||
SystemMessage
|
||||
)
|
||||
|
||||
# This logging incantation makes it easy to see that you're actually reaching your LocalAI instance rather than OpenAI.
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
print('Langchain + LocalAI PYTHON Tests')
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
|
||||
key = os.environ.get('OPENAI_API_KEY', '-')
|
||||
model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
|
||||
|
||||
|
||||
chat = ChatOpenAI(temperature=0, openai_api_base=base_path, openai_api_key=key, model_name=model_name, max_tokens=100)
|
||||
|
||||
print("Created ChatOpenAI for ", chat.model_name)
|
||||
|
||||
template = "You are a helpful assistant that translates {input_language} to {output_language}. The next message will be a sentence in {input_language}. Respond ONLY with the translation in {output_language}. Do not respond in {input_language}!"
|
||||
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
|
||||
human_template = "{text}"
|
||||
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
|
||||
|
||||
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
|
||||
|
||||
print("ABOUT to execute")
|
||||
|
||||
# get a chat completion from the formatted messages
|
||||
response = chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
|
||||
|
||||
print(response)
|
||||
|
||||
print(".");
|
||||
@@ -0,0 +1,32 @@
|
||||
aiohttp==3.8.4
|
||||
aiosignal==1.3.1
|
||||
async-timeout==4.0.2
|
||||
attrs==23.1.0
|
||||
certifi==2022.12.7
|
||||
charset-normalizer==3.1.0
|
||||
colorama==0.4.6
|
||||
dataclasses-json==0.5.7
|
||||
debugpy==1.6.7
|
||||
frozenlist==1.3.3
|
||||
greenlet==2.0.2
|
||||
idna==3.4
|
||||
langchain==0.0.159
|
||||
marshmallow==3.19.0
|
||||
marshmallow-enum==1.5.1
|
||||
multidict==6.0.4
|
||||
mypy-extensions==1.0.0
|
||||
numexpr==2.8.4
|
||||
numpy==1.24.3
|
||||
openai==0.27.6
|
||||
openapi-schema-pydantic==1.2.4
|
||||
packaging==23.1
|
||||
pydantic==1.10.7
|
||||
PyYAML==6.0
|
||||
requests==2.29.0
|
||||
SQLAlchemy==2.0.12
|
||||
tenacity==8.2.2
|
||||
tqdm==4.65.0
|
||||
typing-inspect==0.8.0
|
||||
typing_extensions==4.5.0
|
||||
urllib3==1.26.15
|
||||
yarl==1.9.2
|
||||
@@ -0,0 +1,6 @@
|
||||
|
||||
from langchain.llms import OpenAI
|
||||
|
||||
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
|
||||
text = "What would be a good company name for a company that makes colorful socks?"
|
||||
print(llm(text))
|
||||
1
examples/langchain/models/completion.tmpl
Normal file
1
examples/langchain/models/completion.tmpl
Normal file
@@ -0,0 +1 @@
|
||||
{{.Input}}
|
||||
18
examples/langchain/models/gpt-3.5-turbo.yaml
Normal file
18
examples/langchain/models/gpt-3.5-turbo.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: ggml-gpt4all-j # ggml-koala-13B-4bit-128g
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
threads: 4
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
backend: "gptj"
|
||||
template:
|
||||
completion: completion
|
||||
chat: gpt4all
|
||||
4
examples/langchain/models/gpt4all.tmpl
Normal file
4
examples/langchain/models/gpt4all.tmpl
Normal file
@@ -0,0 +1,4 @@
|
||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
||||
### Prompt:
|
||||
{{.Input}}
|
||||
### Response:
|
||||
26
examples/localai-webui/README.md
Normal file
26
examples/localai-webui/README.md
Normal file
@@ -0,0 +1,26 @@
|
||||
# localai-webui
|
||||
|
||||
Example of integration with [dhruvgera/localai-frontend](https://github.com/Dhruvgera/LocalAI-frontend).
|
||||
|
||||

|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/localai-webui
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# Download any desired models to models/ in the parent LocalAI project dir
|
||||
# For example: wget https://gpt4all.io/models/ggml-gpt4all-j.bin
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
Open http://localhost:3000 for the Web UI.
|
||||
|
||||
20
examples/localai-webui/docker-compose.yml
Normal file
20
examples/localai-webui/docker-compose.yml
Normal file
@@ -0,0 +1,20 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- 8080:8080
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai"]
|
||||
|
||||
frontend:
|
||||
image: quay.io/go-skynet/localai-frontend:master
|
||||
ports:
|
||||
- 3000:3000
|
||||
1
examples/query_data/.gitignore
vendored
Normal file
1
examples/query_data/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
storage/
|
||||
67
examples/query_data/README.md
Normal file
67
examples/query_data/README.md
Normal file
@@ -0,0 +1,67 @@
|
||||
# Data query example
|
||||
|
||||
This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
|
||||
|
||||
It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
|
||||
|
||||
Summary of the steps:
|
||||
|
||||
- prepare the dataset (and store it into `data`)
|
||||
- prepare a vector index database to run queries on
|
||||
- run queries
|
||||
|
||||
## Requirements
|
||||
|
||||
You will need a training data set. Copy that over `data`.
|
||||
|
||||
## Setup
|
||||
|
||||
Start the API:
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/query_data
|
||||
|
||||
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
### Create a storage
|
||||
|
||||
In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python store.py
|
||||
```
|
||||
|
||||
After it finishes, a directory "storage" will be created with the vector index database.
|
||||
|
||||
## Query
|
||||
|
||||
We can now query the dataset.
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python query.py
|
||||
```
|
||||
|
||||
## Update
|
||||
|
||||
To update our vector database, run `update.py`
|
||||
|
||||
```bash
|
||||
export OPENAI_API_BASE=http://localhost:8080/v1
|
||||
export OPENAI_API_KEY=sk-
|
||||
|
||||
python update.py
|
||||
```
|
||||
0
examples/query_data/data/.keep
Normal file
0
examples/query_data/data/.keep
Normal file
15
examples/query_data/docker-compose.yml
Normal file
15
examples/query_data/docker-compose.yml
Normal file
@@ -0,0 +1,15 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- 8080:8080
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai"]
|
||||
1
examples/query_data/models/completion.tmpl
Normal file
1
examples/query_data/models/completion.tmpl
Normal file
@@ -0,0 +1 @@
|
||||
{{.Input}}
|
||||
6
examples/query_data/models/embeddings.yaml
Normal file
6
examples/query_data/models/embeddings.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
name: text-embedding-ada-002
|
||||
parameters:
|
||||
model: bert
|
||||
threads: 14
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
17
examples/query_data/models/gpt-3.5-turbo.yaml
Normal file
17
examples/query_data/models/gpt-3.5-turbo.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: ggml-gpt4all-j
|
||||
top_k: 80
|
||||
temperature: 0.2
|
||||
top_p: 0.7
|
||||
context_size: 1024
|
||||
threads: 14
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "GPT:"
|
||||
roles:
|
||||
user: " "
|
||||
system: " "
|
||||
template:
|
||||
completion: completion
|
||||
chat: gpt4all
|
||||
35
examples/query_data/query.py
Normal file
35
examples/query_data/query.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import os
|
||||
|
||||
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
|
||||
# os.environ['OPENAI_API_KEY']= ""
|
||||
|
||||
from llama_index import LLMPredictor, PromptHelper, ServiceContext
|
||||
from langchain.llms.openai import OpenAI
|
||||
from llama_index import StorageContext, load_index_from_storage
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
|
||||
# This example uses text-davinci-003 by default; feel free to change if desired
|
||||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||
|
||||
# Configure prompt parameters and initialise helper
|
||||
max_input_size = 500
|
||||
num_output = 256
|
||||
max_chunk_overlap = 20
|
||||
|
||||
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
||||
|
||||
# Load documents from the 'data' directory
|
||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
|
||||
|
||||
# rebuild storage context
|
||||
storage_context = StorageContext.from_defaults(persist_dir='./storage')
|
||||
|
||||
# load index
|
||||
index = load_index_from_storage(storage_context, service_context=service_context, )
|
||||
|
||||
query_engine = index.as_query_engine()
|
||||
|
||||
data = input("Question: ")
|
||||
response = query_engine.query(data)
|
||||
print(response)
|
||||
27
examples/query_data/store.py
Normal file
27
examples/query_data/store.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import os
|
||||
|
||||
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
|
||||
# os.environ['OPENAI_API_KEY']= ""
|
||||
|
||||
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext
|
||||
from langchain.llms.openai import OpenAI
|
||||
from llama_index import StorageContext, load_index_from_storage
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
|
||||
# This example uses text-davinci-003 by default; feel free to change if desired
|
||||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||
|
||||
# Configure prompt parameters and initialise helper
|
||||
max_input_size = 400
|
||||
num_output = 400
|
||||
max_chunk_overlap = 30
|
||||
|
||||
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
||||
|
||||
# Load documents from the 'data' directory
|
||||
documents = SimpleDirectoryReader('data').load_data()
|
||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 400)
|
||||
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
|
||||
index.storage_context.persist(persist_dir="./storage")
|
||||
|
||||
32
examples/query_data/update.py
Normal file
32
examples/query_data/update.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import os
|
||||
|
||||
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
|
||||
# os.environ['OPENAI_API_KEY']= ""
|
||||
|
||||
from llama_index import LLMPredictor, PromptHelper, SimpleDirectoryReader, ServiceContext
|
||||
from langchain.llms.openai import OpenAI
|
||||
from llama_index import StorageContext, load_index_from_storage
|
||||
|
||||
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
||||
|
||||
# This example uses text-davinci-003 by default; feel free to change if desired
|
||||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||
|
||||
# Configure prompt parameters and initialise helper
|
||||
max_input_size = 512
|
||||
num_output = 256
|
||||
max_chunk_overlap = 20
|
||||
|
||||
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
||||
|
||||
# Load documents from the 'data' directory
|
||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
|
||||
|
||||
# rebuild storage context
|
||||
storage_context = StorageContext.from_defaults(persist_dir='./storage')
|
||||
|
||||
# load index
|
||||
index = load_index_from_storage(storage_context, service_context=service_context, )
|
||||
documents = SimpleDirectoryReader('data').load_data()
|
||||
index.refresh(documents)
|
||||
index.storage_context.persist(persist_dir="./storage")
|
||||
10
examples/rwkv/Dockerfile.build
Normal file
10
examples/rwkv/Dockerfile.build
Normal file
@@ -0,0 +1,10 @@
|
||||
FROM python
|
||||
|
||||
# convert the model (one-off)
|
||||
RUN pip3 install torch numpy
|
||||
|
||||
WORKDIR /build
|
||||
COPY ./scripts/ .
|
||||
|
||||
RUN git clone --recurse-submodules https://github.com/saharNooby/rwkv.cpp && cd rwkv.cpp && cmake . && cmake --build . --config Release
|
||||
ENTRYPOINT [ "/build/build.sh" ]
|
||||
59
examples/rwkv/README.md
Normal file
59
examples/rwkv/README.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# rwkv
|
||||
|
||||
Example of how to run rwkv models.
|
||||
|
||||
## Run models
|
||||
|
||||
Setup:
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/rwkv
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# build the tooling image to convert an rwkv model locally:
|
||||
docker build -t rwkv-converter -f Dockerfile.build .
|
||||
|
||||
# download and convert a model (one-off) - it's going to be fast on CPU too!
|
||||
docker run -ti --name converter -v $PWD:/data rwkv-converter https://huggingface.co/BlinkDL/rwkv-4-raven/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%25-Other1%25-20230425-ctx4096.pth /data/models/rwkv
|
||||
|
||||
# Get the tokenizer
|
||||
wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O models/rwkv.tokenizer.json
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
Test it out:
|
||||
|
||||
```bash
|
||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"prompt": "A long time ago, in a galaxy far away",
|
||||
"max_tokens": 100,
|
||||
"temperature": 0.9, "top_p": 0.8, "top_k": 80
|
||||
}'
|
||||
|
||||
# {"object":"text_completion","model":"gpt-3.5-turbo","choices":[{"text":", there was a small group of five friends: Annie, Bryan, Charlie, Emily, and Jesse."}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
|
||||
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"messages": [{"role": "user", "content": "How are you?"}],
|
||||
"temperature": 0.9, "top_p": 0.8, "top_k": 80
|
||||
}'
|
||||
|
||||
# {"object":"chat.completion","model":"gpt-3.5-turbo","choices":[{"message":{"role":"assistant","content":" Good, thanks. I am about to go to bed. I' ll talk to you later.Bye."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
|
||||
```
|
||||
|
||||
### Fine tuning
|
||||
|
||||
See [RWKV-LM](https://github.com/BlinkDL/RWKV-LM#training--fine-tuning). There is also a Google [colab](https://colab.research.google.com/github/resloved/RWKV-notebooks/blob/master/RWKV_v4_RNN_Pile_Fine_Tuning.ipynb).
|
||||
|
||||
## See also
|
||||
|
||||
- [RWKV-LM](https://github.com/BlinkDL/RWKV-LM)
|
||||
- [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
|
||||
16
examples/rwkv/docker-compose.yaml
Normal file
16
examples/rwkv/docker-compose.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DEBUG=true
|
||||
- MODELS_PATH=/models
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
19
examples/rwkv/models/gpt-3.5-turbo.yaml
Normal file
19
examples/rwkv/models/gpt-3.5-turbo.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
name: gpt-3.5-turbo
|
||||
parameters:
|
||||
model: rwkv
|
||||
top_k: 80
|
||||
temperature: 0.9
|
||||
max_tokens: 100
|
||||
top_p: 0.8
|
||||
context_size: 1024
|
||||
threads: 14
|
||||
backend: "rwkv"
|
||||
cutwords:
|
||||
- "Bob:.*"
|
||||
roles:
|
||||
user: "Bob:"
|
||||
system: "Alice:"
|
||||
assistant: "Alice:"
|
||||
template:
|
||||
completion: rwkv_completion
|
||||
chat: rwkv_chat
|
||||
13
examples/rwkv/models/rwkv_chat.tmpl
Normal file
13
examples/rwkv/models/rwkv_chat.tmpl
Normal file
@@ -0,0 +1,13 @@
|
||||
The following is a verbose detailed conversation between Bob and a woman, Alice. Alice is intelligent, friendly and likeable. Alice is likely to agree with Bob.
|
||||
|
||||
Bob: Hello Alice, how are you doing?
|
||||
|
||||
Alice: Hi Bob! Thanks, I'm fine. What about you?
|
||||
|
||||
Bob: I am very good! It's nice to see you. Would you mind me chatting with you for a while?
|
||||
|
||||
Alice: Not at all! I'm listening.
|
||||
|
||||
{{.Input}}
|
||||
|
||||
Alice:
|
||||
1
examples/rwkv/models/rwkv_completion.tmpl
Normal file
1
examples/rwkv/models/rwkv_completion.tmpl
Normal file
@@ -0,0 +1 @@
|
||||
Complete the following sentence: {{.Input}}
|
||||
11
examples/rwkv/scripts/build.sh
Executable file
11
examples/rwkv/scripts/build.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
URL=$1
|
||||
OUT=$2
|
||||
FILENAME=$(basename $URL)
|
||||
|
||||
wget -nc $URL -O /build/$FILENAME
|
||||
|
||||
python3 /build/rwkv.cpp/rwkv/convert_pytorch_to_ggml.py /build/$FILENAME /build/float-model float16
|
||||
python3 /build/rwkv.cpp/rwkv/quantize.py /build/float-model $OUT Q4_2
|
||||
11
examples/slack-bot/.env.example
Normal file
11
examples/slack-bot/.env.example
Normal file
@@ -0,0 +1,11 @@
|
||||
SLACK_APP_TOKEN=xapp-1-...
|
||||
SLACK_BOT_TOKEN=xoxb-...
|
||||
OPENAI_API_KEY=sk-...
|
||||
OPENAI_API_BASE=http://api:8080
|
||||
OPENAI_MODEL=gpt-3.5-turbo
|
||||
OPENAI_TIMEOUT_SECONDS=60
|
||||
#OPENAI_SYSTEM_TEXT="You proofread text. When you receive a message, you will check
|
||||
#for mistakes and make suggestion to improve the language of the given text"
|
||||
USE_SLACK_LANGUAGE=true
|
||||
SLACK_APP_LOG_LEVEL=INFO
|
||||
TRANSLATE_MARKDOWN=true
|
||||
27
examples/slack-bot/README.md
Normal file
27
examples/slack-bot/README.md
Normal file
@@ -0,0 +1,27 @@
|
||||
# Slack bot
|
||||
|
||||
Slackbot using: https://github.com/seratch/ChatGPT-in-Slack
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI/examples/slack-bot
|
||||
|
||||
git clone https://github.com/seratch/ChatGPT-in-Slack
|
||||
|
||||
# (optional) Checkout a specific LocalAI tag
|
||||
# git checkout -b build <TAG>
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# Set the discord bot options (see: https://github.com/seratch/ChatGPT-in-Slack)
|
||||
cp -rfv .env.example .env
|
||||
vim .env
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
```
|
||||
23
examples/slack-bot/docker-compose.yaml
Normal file
23
examples/slack-bot/docker-compose.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
version: '3.6'
|
||||
|
||||
services:
|
||||
api:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: Dockerfile.dev
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DEBUG=true
|
||||
- MODELS_PATH=/models
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
|
||||
bot:
|
||||
build:
|
||||
context: ./ChatGPT-in-Slack
|
||||
dockerfile: Dockerfile
|
||||
env_file:
|
||||
- .env
|
||||
1
examples/slack-bot/models
Symbolic link
1
examples/slack-bot/models
Symbolic link
@@ -0,0 +1 @@
|
||||
../chatbot-ui/models
|
||||
57
go.mod
57
go.mod
@@ -3,52 +3,65 @@ module github.com/go-skynet/LocalAI
|
||||
go 1.19
|
||||
|
||||
require (
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a
|
||||
github.com/go-audio/wav v1.1.0
|
||||
github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04
|
||||
github.com/gofiber/fiber/v2 v2.44.0
|
||||
github.com/jaypipes/ghw v0.10.0
|
||||
github.com/onsi/ginkgo/v2 v2.9.2
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b
|
||||
github.com/gofiber/fiber/v2 v2.45.0
|
||||
github.com/hashicorp/go-multierror v1.1.1
|
||||
github.com/onsi/ginkgo/v2 v2.9.4
|
||||
github.com/onsi/gomega v1.27.6
|
||||
github.com/otiai10/copy v1.11.0
|
||||
github.com/otiai10/openaigo v1.1.0
|
||||
github.com/rs/zerolog v1.29.1
|
||||
github.com/sashabaranov/go-openai v1.9.0
|
||||
github.com/urfave/cli/v2 v2.25.1
|
||||
github.com/sashabaranov/go-openai v1.9.4
|
||||
github.com/swaggo/swag v1.16.1
|
||||
github.com/urfave/cli/v2 v2.25.3
|
||||
github.com/valyala/fasthttp v1.47.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/StackExchange/wmi v1.2.1 // indirect
|
||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||
github.com/PuerkitoBio/purell v1.1.1 // indirect
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
|
||||
github.com/andybalholm/brotli v1.0.5 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||
github.com/ghodss/yaml v1.0.0 // indirect
|
||||
github.com/go-logr/logr v1.2.3 // indirect
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/go-audio/audio v1.0.0 // indirect
|
||||
github.com/go-audio/riff v1.0.0 // indirect
|
||||
github.com/go-logr/logr v1.2.4 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.19.5 // indirect
|
||||
github.com/go-openapi/jsonreference v0.19.6 // indirect
|
||||
github.com/go-openapi/spec v0.20.4 // indirect
|
||||
github.com/go-openapi/swag v0.19.15 // indirect
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
||||
github.com/google/go-cmp v0.5.9 // indirect
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/jaypipes/pcidb v1.0.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.0.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/klauspost/compress v1.16.3 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/mailru/easyjson v0.7.6 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.18 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.14 // indirect
|
||||
github.com/mitchellh/go-homedir v1.1.0 // indirect
|
||||
github.com/nomic/gpt4all/gpt4all-bindings/golang v0.0.0-00010101000000-000000000000 // indirect
|
||||
github.com/philhofer/fwd v1.1.2 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
|
||||
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
|
||||
github.com/tinylib/msgp v1.1.8 // indirect
|
||||
github.com/valyala/bytebufferpool v1.0.0 // indirect
|
||||
github.com/valyala/fasthttp v1.45.0 // indirect
|
||||
github.com/valyala/tcplisten v1.0.0 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
||||
golang.org/x/net v0.8.0 // indirect
|
||||
golang.org/x/sys v0.7.0 // indirect
|
||||
golang.org/x/text v0.8.0 // indirect
|
||||
golang.org/x/tools v0.7.0 // indirect
|
||||
golang.org/x/net v0.9.0 // indirect
|
||||
golang.org/x/sys v0.8.0 // indirect
|
||||
golang.org/x/text v0.9.0 // indirect
|
||||
golang.org/x/tools v0.8.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
howett.net/plist v1.0.0 // indirect
|
||||
)
|
||||
|
||||
167
go.sum
167
go.sum
@@ -1,7 +1,9 @@
|
||||
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
|
||||
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
|
||||
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
|
||||
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
||||
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
||||
github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
|
||||
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
|
||||
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
@@ -14,32 +16,49 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
|
||||
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
||||
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
|
||||
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
|
||||
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4 h1:GkGuqnhDFKlCsT6Bo8sdY00A7rFXCzfU1nBOSS4ZnYM=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ6JsAm9Q1A8I8j9YfNy10bmIfwOiyGyU5wQ=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94 h1:rtrrMvlIq+g0/ltXjDdLeNtz0uc4wJ4Qs15GFU4ba4c=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2 h1:YNbUAyIRtaLODitigJU1EM5ubmMu5FmHtYAayJD6Vbg=
|
||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230510174014-07166da10cb2/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a h1:MlyiDLNCM/wjbv8U5Elj18NvaAgl61SGiRUpqQz5dfs=
|
||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230509153812-1d17cd5bb37a/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
||||
github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
|
||||
github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
|
||||
github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
|
||||
github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
|
||||
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
|
||||
github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
|
||||
github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
|
||||
github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
|
||||
github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
|
||||
github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs=
|
||||
github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns=
|
||||
github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M=
|
||||
github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I=
|
||||
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
|
||||
github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
|
||||
github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
|
||||
github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f h1:GW8RQa1RVeDF1dOuAP/y6xWVC+BRtf9tJOuEza6Asbg=
|
||||
github.com/go-skynet/bloomz.cpp v0.0.0-20230510195113-ad7e89a0885f/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea h1:8Isk9D+Auth5OuXVAQPC3MO+5zF/2S7mvs2JZLw6a+8=
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557 h1:LD66fKtvP2lmyuuKL8pBat/pVTKUbLs3L5fM/5lyi4w=
|
||||
github.com/go-skynet/go-bert.cpp v0.0.0-20230510124618-ec771ec71557/go.mod h1:NHwIVvsg7Jh6p0M4uBLVmSMEaPUia6O6yjXUpLWVJmQ=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6 h1:XshpypO6ekU09CI19vuzke2a1Es1lV5ZaxA7CUehu0E=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230509180201-d49823284cc6/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead h1:C+lcH1srw+c0qPDx1WF8zjGiiOqoPxVICt7bI1sj5cM=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04 h1:NPiv7mcIjU71MhEv3v4RRWKSBNXnnCyu6VX4CaaHz2I=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b h1:qqxrjY8fYDXQahmCMTCACahm1tbiqHLPUHALkFLyBfo=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230510072905-70593fccbe4b/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
|
||||
github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
|
||||
github.com/gofiber/fiber/v2 v2.44.0 h1:Z90bEvPcJM5GFJnu1py0E1ojoerkyew3iiNJ78MQCM8=
|
||||
github.com/gofiber/fiber/v2 v2.44.0/go.mod h1:VTMtb/au8g01iqvHyaCzftuM/xmZgKOZCtFzz6CdV9w=
|
||||
github.com/gofiber/fiber/v2 v2.45.0 h1:p4RpkJT9GAW6parBSbcNFH2ApnAuW3OzaQzbOCoDu+s=
|
||||
github.com/gofiber/fiber/v2 v2.45.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
|
||||
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
@@ -47,41 +66,47 @@ github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
|
||||
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
|
||||
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
|
||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||
github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
|
||||
github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
|
||||
github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8=
|
||||
github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk=
|
||||
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
|
||||
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
|
||||
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
|
||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||
github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
|
||||
github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
|
||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||
github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
|
||||
github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
|
||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
|
||||
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98=
|
||||
github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
|
||||
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||
github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
|
||||
github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
|
||||
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
|
||||
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
|
||||
github.com/onsi/ginkgo/v2 v2.9.4 h1:xR7vG4IXt5RWx6FfIjyAtsoMAtnc3C/rFXBBd2AjZwE=
|
||||
github.com/onsi/ginkgo/v2 v2.9.4/go.mod h1:gCQYp2Q+kSoIj7ykSVb9nskRSsR6PUj4AiLywzIhbKM=
|
||||
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
|
||||
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
|
||||
github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
|
||||
github.com/otiai10/copy v1.11.0 h1:OKBD80J/mLBrwnzXqGtFCzprFSGioo30JcmR4APsNwc=
|
||||
github.com/otiai10/copy v1.11.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww=
|
||||
github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks=
|
||||
github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg=
|
||||
github.com/otiai10/openaigo v1.1.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg=
|
||||
github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
|
||||
github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
|
||||
github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
@@ -92,31 +117,30 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
|
||||
github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/sashabaranov/go-openai v1.9.0 h1:NoiO++IISxxJ1pRc0n7uZvMGMake0G+FJ1XPwXtprsA=
|
||||
github.com/sashabaranov/go-openai v1.9.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sashabaranov/go-openai v1.9.3 h1:uNak3Rn5pPsKRs9bdT7RqRZEyej/zdZOEI2/8wvrFtM=
|
||||
github.com/sashabaranov/go-openai v1.9.3/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9MyPBfZM=
|
||||
github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
|
||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
|
||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
|
||||
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk=
|
||||
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
|
||||
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||
github.com/swaggo/swag v1.16.1 h1:fTNRhKstPKxcnoKsytm4sahr8FaYzUcT7i1/3nd/fBg=
|
||||
github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKikGxto=
|
||||
github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
|
||||
github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
|
||||
github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
|
||||
github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
|
||||
github.com/urfave/cli/v2 v2.25.0/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
|
||||
github.com/urfave/cli/v2 v2.25.1 h1:zw8dSP7ghX0Gmm8vugrs6q9Ku0wzweqPyshy+syu9Gw=
|
||||
github.com/urfave/cli/v2 v2.25.1/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
|
||||
github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY=
|
||||
github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
|
||||
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||
github.com/valyala/fasthttp v1.44.0 h1:R+gLUhldIsfg1HokMuQjdQ5bh9nuXHPIfvkYUu9eR5Q=
|
||||
github.com/valyala/fasthttp v1.44.0/go.mod h1:f6VbjjoI3z1NDOZOv17o6RvtRSWxC77seBFc2uWtgiY=
|
||||
github.com/valyala/fasthttp v1.45.0 h1:zPkkzpIn8tdHZUrVa6PzYd0i5verqiPSkgTd3bSUcpA=
|
||||
github.com/valyala/fasthttp v1.45.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
|
||||
github.com/valyala/fasthttp v1.47.0 h1:y7moDoxYzMooFpT5aHgNgVOQDrS3qlkfiP9mDtGGK9c=
|
||||
github.com/valyala/fasthttp v1.47.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
|
||||
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
|
||||
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
|
||||
@@ -127,43 +151,39 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
|
||||
golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
|
||||
golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
|
||||
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
|
||||
golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
|
||||
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
|
||||
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
|
||||
@@ -172,26 +192,27 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
|
||||
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
|
||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
|
||||
golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
|
||||
golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
|
||||
golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y=
|
||||
golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
||||
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
|
||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
|
||||
howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
|
||||
|
||||
14
main.go
14
main.go
@@ -1,11 +1,12 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
api "github.com/go-skynet/LocalAI/api"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/jaypipes/ghw"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/urfave/cli/v2"
|
||||
@@ -20,12 +21,6 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
threads := 4
|
||||
cpu, err := ghw.CPU()
|
||||
if err == nil {
|
||||
threads = int(cpu.TotalCores)
|
||||
}
|
||||
|
||||
app := &cli.App{
|
||||
Name: "LocalAI",
|
||||
Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.",
|
||||
@@ -42,13 +37,13 @@ func main() {
|
||||
Name: "threads",
|
||||
DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
|
||||
EnvVars: []string{"THREADS"},
|
||||
Value: threads,
|
||||
Value: 4,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "models-path",
|
||||
DefaultText: "Path containing models used for inferencing",
|
||||
EnvVars: []string{"MODELS_PATH"},
|
||||
Value: path,
|
||||
Value: filepath.Join(path, "models"),
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "config-file",
|
||||
@@ -85,6 +80,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
|
||||
UsageText: `local-ai [options]`,
|
||||
Copyright: "go-skynet authors",
|
||||
Action: func(ctx *cli.Context) error {
|
||||
fmt.Printf("Starting LocalAI using %d threads, with models path: %s\n", ctx.Int("threads"), ctx.String("models-path"))
|
||||
return api.App(ctx.String("config-file"), model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
|
||||
},
|
||||
}
|
||||
|
||||
166
pkg/model/initializers.go
Normal file
166
pkg/model/initializers.go
Normal file
@@ -0,0 +1,166 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
rwkv "github.com/donomii/go-rwkv.cpp"
|
||||
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
bloomz "github.com/go-skynet/bloomz.cpp"
|
||||
bert "github.com/go-skynet/go-bert.cpp"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
const tokenizerSuffix = ".tokenizer.json"
|
||||
|
||||
const (
|
||||
LlamaBackend = "llama"
|
||||
BloomzBackend = "bloomz"
|
||||
StarcoderBackend = "starcoder"
|
||||
StableLMBackend = "stablelm"
|
||||
DollyBackend = "dolly"
|
||||
RedPajamaBackend = "redpajama"
|
||||
Gpt2Backend = "gpt2"
|
||||
Gpt4AllLlamaBackend = "gpt4all-llama"
|
||||
Gpt4AllMptBackend = "gpt4all-mpt"
|
||||
Gpt4AllJBackend = "gpt4all-j"
|
||||
BertEmbeddingsBackend = "bert-embeddings"
|
||||
RwkvBackend = "rwkv"
|
||||
WhisperBackend = "whisper"
|
||||
)
|
||||
|
||||
var backends []string = []string{
|
||||
LlamaBackend,
|
||||
Gpt4AllLlamaBackend,
|
||||
Gpt4AllMptBackend,
|
||||
Gpt4AllJBackend,
|
||||
Gpt2Backend,
|
||||
WhisperBackend,
|
||||
RwkvBackend,
|
||||
BloomzBackend,
|
||||
StableLMBackend,
|
||||
DollyBackend,
|
||||
RedPajamaBackend,
|
||||
BertEmbeddingsBackend,
|
||||
StarcoderBackend,
|
||||
}
|
||||
|
||||
var starCoder = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewStarcoder(modelFile)
|
||||
}
|
||||
|
||||
var redPajama = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewRedPajama(modelFile)
|
||||
}
|
||||
|
||||
var dolly = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewDolly(modelFile)
|
||||
}
|
||||
|
||||
var stableLM = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.NewStableLM(modelFile)
|
||||
}
|
||||
|
||||
var bertEmbeddings = func(modelFile string) (interface{}, error) {
|
||||
return bert.New(modelFile)
|
||||
}
|
||||
|
||||
var bloomzLM = func(modelFile string) (interface{}, error) {
|
||||
return bloomz.New(modelFile)
|
||||
}
|
||||
var gpt2LM = func(modelFile string) (interface{}, error) {
|
||||
return gpt2.New(modelFile)
|
||||
}
|
||||
|
||||
var whisperModel = func(modelFile string) (interface{}, error) {
|
||||
return whisper.New(modelFile)
|
||||
}
|
||||
|
||||
func llamaLM(opts ...llama.ModelOption) func(string) (interface{}, error) {
|
||||
return func(s string) (interface{}, error) {
|
||||
return llama.New(s, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
func gpt4allLM(opts ...gpt4all.ModelOption) func(string) (interface{}, error) {
|
||||
return func(s string) (interface{}, error) {
|
||||
return gpt4all.New(s, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) {
|
||||
return func(s string) (interface{}, error) {
|
||||
model := rwkv.LoadFiles(s, tokenFile, threads)
|
||||
if model == nil {
|
||||
return nil, fmt.Errorf("could not load model")
|
||||
}
|
||||
return model, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
|
||||
switch strings.ToLower(backendString) {
|
||||
case LlamaBackend:
|
||||
return ml.LoadModel(modelFile, llamaLM(llamaOpts...))
|
||||
case BloomzBackend:
|
||||
return ml.LoadModel(modelFile, bloomzLM)
|
||||
case StableLMBackend:
|
||||
return ml.LoadModel(modelFile, stableLM)
|
||||
case DollyBackend:
|
||||
return ml.LoadModel(modelFile, dolly)
|
||||
case RedPajamaBackend:
|
||||
return ml.LoadModel(modelFile, redPajama)
|
||||
case Gpt2Backend:
|
||||
return ml.LoadModel(modelFile, gpt2LM)
|
||||
case StarcoderBackend:
|
||||
return ml.LoadModel(modelFile, starCoder)
|
||||
case Gpt4AllLlamaBackend:
|
||||
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)))
|
||||
case Gpt4AllMptBackend:
|
||||
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType)))
|
||||
case Gpt4AllJBackend:
|
||||
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType)))
|
||||
case BertEmbeddingsBackend:
|
||||
return ml.LoadModel(modelFile, bertEmbeddings)
|
||||
case RwkvBackend:
|
||||
return ml.LoadModel(modelFile, rwkvLM(modelFile+tokenizerSuffix, threads))
|
||||
case WhisperBackend:
|
||||
return ml.LoadModel(modelFile, whisperModel)
|
||||
default:
|
||||
return nil, fmt.Errorf("backend unsupported: %s", backendString)
|
||||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (interface{}, error) {
|
||||
log.Debug().Msgf("Loading models greedly")
|
||||
|
||||
ml.mu.Lock()
|
||||
m, exists := ml.models[modelFile]
|
||||
if exists {
|
||||
ml.mu.Unlock()
|
||||
return m, nil
|
||||
}
|
||||
ml.mu.Unlock()
|
||||
var err error
|
||||
|
||||
for _, b := range backends {
|
||||
if b == BloomzBackend || b == WhisperBackend || b == RwkvBackend { // do not autoload bloomz/whisper/rwkv
|
||||
continue
|
||||
}
|
||||
log.Debug().Msgf("[%s] Attempting to load", b)
|
||||
model, modelerr := ml.BackendLoader(b, modelFile, llamaOpts, threads)
|
||||
if modelerr == nil && model != nil {
|
||||
log.Debug().Msgf("[%s] Loads OK", b)
|
||||
return model, nil
|
||||
} else if modelerr != nil {
|
||||
err = multierror.Append(err, modelerr)
|
||||
log.Debug().Msgf("[%s] Fails: %s", b, modelerr.Error())
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
||||
}
|
||||
@@ -11,32 +11,21 @@ import (
|
||||
"text/template"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
)
|
||||
|
||||
type ModelLoader struct {
|
||||
ModelPath string
|
||||
mu sync.Mutex
|
||||
|
||||
models map[string]*llama.LLama
|
||||
gptmodels map[string]*gptj.GPTJ
|
||||
gpt2models map[string]*gpt2.GPT2
|
||||
gptstablelmmodels map[string]*gpt2.StableLM
|
||||
|
||||
// TODO: this needs generics
|
||||
models map[string]interface{}
|
||||
promptsTemplates map[string]*template.Template
|
||||
}
|
||||
|
||||
func NewModelLoader(modelPath string) *ModelLoader {
|
||||
return &ModelLoader{
|
||||
ModelPath: modelPath,
|
||||
gpt2models: make(map[string]*gpt2.GPT2),
|
||||
gptmodels: make(map[string]*gptj.GPTJ),
|
||||
gptstablelmmodels: make(map[string]*gpt2.StableLM),
|
||||
models: make(map[string]*llama.LLama),
|
||||
promptsTemplates: make(map[string]*template.Template),
|
||||
ModelPath: modelPath,
|
||||
models: make(map[string]interface{}),
|
||||
promptsTemplates: make(map[string]*template.Template),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,10 +68,9 @@ func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string,
|
||||
if exists {
|
||||
m = t
|
||||
}
|
||||
|
||||
}
|
||||
if m == nil {
|
||||
return "", nil
|
||||
return "", fmt.Errorf("failed loading any template")
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
@@ -122,156 +110,21 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
|
||||
func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (interface{}, error)) (interface{}, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gpt2.NewStableLM(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.gptstablelmmodels[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.gpt2models[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// TODO: This needs refactoring, it's really bad to have it in here
|
||||
// Check if we have a GPTStable model loaded instead - if we do we return an error so the API tries with StableLM
|
||||
if _, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPTStableLM one")
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gpt2.New(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.gpt2models[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.gptmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// TODO: This needs refactoring, it's really bad to have it in here
|
||||
// Check if we have a GPT2 model loaded instead - if we do we return an error so the API tries with GPT2
|
||||
if _, ok := ml.gpt2models[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPT2: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPT2 one")
|
||||
}
|
||||
if _, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPTStableLM one")
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gptj.New(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.gptmodels[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
log.Debug().Msgf("Loading model name: %s", modelName)
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.models[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// TODO: This needs refactoring, it's really bad to have it in here
|
||||
// Check if we have a GPTJ model loaded instead - if we do we return an error so the API tries with GPTJ
|
||||
if _, ok := ml.gptmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPTJ: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPTJ one")
|
||||
}
|
||||
if _, ok := ml.gpt2models[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPT2: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPT2 one")
|
||||
}
|
||||
if _, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPTStableLM one")
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := llama.New(modelFile, opts...)
|
||||
model, err := loader(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -282,5 +135,5 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
|
||||
}
|
||||
|
||||
ml.models[modelName] = model
|
||||
return model, err
|
||||
return model, nil
|
||||
}
|
||||
|
||||
86
pkg/whisper/whisper.go
Normal file
86
pkg/whisper/whisper.go
Normal file
@@ -0,0 +1,86 @@
|
||||
package whisper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
wav "github.com/go-audio/wav"
|
||||
)
|
||||
|
||||
func sh(c string) (string, error) {
|
||||
cmd := exec.Command("/bin/sh", "-c", c)
|
||||
cmd.Env = os.Environ()
|
||||
o, err := cmd.CombinedOutput()
|
||||
return string(o), err
|
||||
}
|
||||
|
||||
// AudioToWav converts audio to wav for transcribe. It bashes out to ffmpeg
|
||||
// TODO: use https://github.com/mccoyst/ogg?
|
||||
func audioToWav(src, dst string) error {
|
||||
out, err := sh(fmt.Sprintf("ffmpeg -i %s -format s16le -ar 16000 -ac 1 -acodec pcm_s16le %s", src, dst))
|
||||
if err != nil {
|
||||
return fmt.Errorf("error: %w out: %s", err, out)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func Transcript(model whisper.Model, audiopath, language string) (string, error) {
|
||||
|
||||
dir, err := os.MkdirTemp("", "whisper")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
convertedPath := filepath.Join(dir, "converted.wav")
|
||||
|
||||
if err := audioToWav(audiopath, convertedPath); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Open samples
|
||||
fh, err := os.Open(convertedPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Read samples
|
||||
d := wav.NewDecoder(fh)
|
||||
buf, err := d.FullPCMBuffer()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
data := buf.AsFloat32Buffer().Data
|
||||
|
||||
// Process samples
|
||||
context, err := model.NewContext()
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
||||
}
|
||||
|
||||
if language != "" {
|
||||
context.SetLanguage(language)
|
||||
}
|
||||
|
||||
if err := context.Process(data, nil); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
text := ""
|
||||
for {
|
||||
segment, err := context.NextSegment()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
text += segment.Text
|
||||
}
|
||||
|
||||
return text, nil
|
||||
}
|
||||
3
prompt-templates/wizardlm.tmpl
Normal file
3
prompt-templates/wizardlm.tmpl
Normal file
@@ -0,0 +1,3 @@
|
||||
{{.Input}}
|
||||
|
||||
### Response:
|
||||
@@ -1,17 +1,4 @@
|
||||
{
|
||||
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
||||
"extends": [
|
||||
"config:base"
|
||||
],
|
||||
"regexManagers": [
|
||||
{
|
||||
"fileMatch": [
|
||||
"^Makefile$"
|
||||
],
|
||||
"matchStrings": [
|
||||
"#\\s*renovate:\\s*datasource=(?<datasource>.*?) depName=(?<depName>.*?)( datasourceTemplate=(?<datasourceTemplate>.*?))?( packageNameTemplate=(?<packageNameTemplate>.*?))?( depNameTemplate=(?<depNameTemplate>.*?))?( valueTemplate=(?<currentValueTemplate>.*?))?( versioning=(?<versioning>.*?))?\\s+.+_VERSION=(?<currentValue>.*?)\\s"
|
||||
],
|
||||
"versioningTemplate": "{{#if versioning}}{{versioning}}{{/if}}"
|
||||
}
|
||||
]
|
||||
"extends": ["config:base"]
|
||||
}
|
||||
|
||||
12
tests/fixtures/config.yaml
vendored
12
tests/fixtures/config.yaml
vendored
@@ -1,8 +1,10 @@
|
||||
- name: list1
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
top_p: 80
|
||||
top_k: 0.9
|
||||
temperature: 0.1
|
||||
context_size: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
@@ -14,9 +16,11 @@
|
||||
chat: ggml-gpt4all-j
|
||||
- name: list2
|
||||
parameters:
|
||||
top_p: 80
|
||||
top_k: 0.9
|
||||
temperature: 0.1
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
context_size: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
|
||||
6
tests/fixtures/gpt4.yaml
vendored
6
tests/fixtures/gpt4.yaml
vendored
@@ -1,8 +1,10 @@
|
||||
name: gpt4all
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 512
|
||||
threads: 10
|
||||
top_p: 80
|
||||
top_k: 0.9
|
||||
temperature: 0.1
|
||||
context_size: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
|
||||
6
tests/fixtures/gpt4_2.yaml
vendored
6
tests/fixtures/gpt4_2.yaml
vendored
@@ -1,8 +1,10 @@
|
||||
name: gpt4all-2
|
||||
parameters:
|
||||
model: testmodel
|
||||
context_size: 1024
|
||||
threads: 5
|
||||
top_p: 80
|
||||
top_k: 0.9
|
||||
temperature: 0.1
|
||||
context_size: 10
|
||||
stopwords:
|
||||
- "HUMAN:"
|
||||
- "### Response:"
|
||||
|
||||
Reference in New Issue
Block a user