mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 16:51:44 -04:00
Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eb137c8a84 | ||
|
|
4e2061636e | ||
|
|
e3ef171968 | ||
|
|
12d83a4184 | ||
|
|
045412e8dd | ||
|
|
9896a9a58b | ||
|
|
b9011bda59 | ||
|
|
2b2f5fa36a | ||
|
|
43c557dc5c | ||
|
|
7abb2c9bd7 | ||
|
|
7a9ea4480a | ||
|
|
31bcc558de |
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v1
|
uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
- name: Dependencies
|
- name: Dependencies
|
||||||
@@ -31,7 +31,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
uses: actions/checkout@v1
|
uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
|
||||||
|
|||||||
13
Dockerfile
13
Dockerfile
@@ -1,13 +0,0 @@
|
|||||||
ARG GO_VERSION=1.20
|
|
||||||
ARG DEBIAN_VERSION=11
|
|
||||||
ARG BUILD_TYPE=
|
|
||||||
|
|
||||||
FROM golang:$GO_VERSION as builder
|
|
||||||
WORKDIR /build
|
|
||||||
RUN apt-get update && apt-get install -y cmake
|
|
||||||
COPY . .
|
|
||||||
RUN make build
|
|
||||||
|
|
||||||
FROM debian:$DEBIAN_VERSION
|
|
||||||
COPY --from=builder /build/local-ai /usr/bin/local-ai
|
|
||||||
ENTRYPOINT [ "/usr/bin/local-ai" ]
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
VERSION 0.7
|
|
||||||
|
|
||||||
build:
|
|
||||||
FROM DOCKERFILE -f Dockerfile .
|
|
||||||
SAVE ARTIFACT /usr/bin/local-ai AS LOCAL local-ai
|
|
||||||
21
LICENSE
21
LICENSE
@@ -1,21 +0,0 @@
|
|||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2023 go-skynet authors
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE.
|
|
||||||
116
Makefile
116
Makefile
@@ -1,116 +0,0 @@
|
|||||||
GOCMD=go
|
|
||||||
GOTEST=$(GOCMD) test
|
|
||||||
GOVET=$(GOCMD) vet
|
|
||||||
BINARY_NAME=local-ai
|
|
||||||
GOLLAMA_VERSION?=llama.cpp-8687c1f
|
|
||||||
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
|
|
||||||
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
|
|
||||||
|
|
||||||
GREEN := $(shell tput -Txterm setaf 2)
|
|
||||||
YELLOW := $(shell tput -Txterm setaf 3)
|
|
||||||
WHITE := $(shell tput -Txterm setaf 7)
|
|
||||||
CYAN := $(shell tput -Txterm setaf 6)
|
|
||||||
RESET := $(shell tput -Txterm sgr0)
|
|
||||||
|
|
||||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
|
|
||||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
|
|
||||||
|
|
||||||
# Use this if you want to set the default behavior
|
|
||||||
ifndef BUILD_TYPE
|
|
||||||
BUILD_TYPE:=default
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(BUILD_TYPE), "generic")
|
|
||||||
GENERIC_PREFIX:=generic-
|
|
||||||
else
|
|
||||||
GENERIC_PREFIX:=
|
|
||||||
endif
|
|
||||||
|
|
||||||
.PHONY: all test build vendor
|
|
||||||
|
|
||||||
all: help
|
|
||||||
|
|
||||||
## Build:
|
|
||||||
|
|
||||||
build: prepare ## Build the project
|
|
||||||
$(info ${GREEN}I local-ai build info:${RESET})
|
|
||||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
|
||||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
|
|
||||||
|
|
||||||
generic-build: ## Build the project using generic
|
|
||||||
BUILD_TYPE="generic" $(MAKE) build
|
|
||||||
|
|
||||||
## GPT4ALL-J
|
|
||||||
go-gpt4all-j:
|
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
|
|
||||||
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
|
|
||||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
|
||||||
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
|
||||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
|
||||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
|
||||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
|
||||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
|
||||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
|
|
||||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
|
||||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
|
||||||
|
|
||||||
go-gpt4all-j/libgptj.a: go-gpt4all-j
|
|
||||||
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
|
|
||||||
|
|
||||||
# CEREBRAS GPT
|
|
||||||
go-gpt2:
|
|
||||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
|
|
||||||
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION)
|
|
||||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
|
||||||
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
|
||||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
|
||||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
|
||||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
|
||||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
|
||||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
|
|
||||||
|
|
||||||
go-gpt2/libgpt2.a: go-gpt2
|
|
||||||
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
|
|
||||||
|
|
||||||
|
|
||||||
go-llama:
|
|
||||||
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
|
||||||
|
|
||||||
go-llama/libbinding.a: go-llama
|
|
||||||
$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
|
|
||||||
|
|
||||||
replace:
|
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
|
|
||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
|
|
||||||
|
|
||||||
prepare: go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a replace
|
|
||||||
|
|
||||||
clean: ## Remove build related file
|
|
||||||
rm -fr ./go-llama
|
|
||||||
rm -rf ./go-gpt4all-j
|
|
||||||
rm -rf ./go-gpt2
|
|
||||||
rm -rf $(BINARY_NAME)
|
|
||||||
|
|
||||||
## Run:
|
|
||||||
run: prepare
|
|
||||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
|
|
||||||
|
|
||||||
test-models/testmodel:
|
|
||||||
mkdir test-models
|
|
||||||
wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
|
|
||||||
|
|
||||||
test: prepare test-models/testmodel
|
|
||||||
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v ./...
|
|
||||||
|
|
||||||
## Help:
|
|
||||||
help: ## Show this help.
|
|
||||||
@echo ''
|
|
||||||
@echo 'Usage:'
|
|
||||||
@echo ' ${YELLOW}make${RESET} ${GREEN}<target>${RESET}'
|
|
||||||
@echo ''
|
|
||||||
@echo 'Targets:'
|
|
||||||
@awk 'BEGIN {FS = ":.*?## "} { \
|
|
||||||
if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf " ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \
|
|
||||||
else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \
|
|
||||||
}' $(MAKEFILE_LIST)
|
|
||||||
244
README.md
244
README.md
@@ -1,244 +0,0 @@
|
|||||||
<h1 align="center">
|
|
||||||
<br>
|
|
||||||
<img height="300" src="https://user-images.githubusercontent.com/2420543/233147843-88697415-6dbf-4368-a862-ab217f9f7342.jpeg"> <br>
|
|
||||||
LocalAI
|
|
||||||
<br>
|
|
||||||
</h1>
|
|
||||||
|
|
||||||
> :warning: This project has been renamed from `llama-cli` to `LocalAI` to reflect the fact that we are focusing on a fast drop-in OpenAI API rather on the CLI interface. We think that there are already many projects that can be used as a CLI interface already, for instance [llama.cpp](https://github.com/ggerganov/llama.cpp) and [gpt4all](https://github.com/nomic-ai/gpt4all). If you are were using `llama-cli` for CLI interactions and want to keep using it, use older versions or please open up an issue - contributions are welcome!
|
|
||||||
|
|
||||||
LocalAI is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.
|
|
||||||
|
|
||||||
- OpenAI compatible API
|
|
||||||
- Supports multiple-models
|
|
||||||
- Once loaded the first time, it keep models loaded in memory for faster inference
|
|
||||||
- Support for prompt templates
|
|
||||||
- Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).
|
|
||||||
|
|
||||||
Discord channel: [Discord](https://discord.gg/uJAeKSAGDy)
|
|
||||||
|
|
||||||
## Model compatibility
|
|
||||||
|
|
||||||
It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) supports also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all) and [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml).
|
|
||||||
|
|
||||||
Tested with:
|
|
||||||
- Vicuna
|
|
||||||
- Alpaca
|
|
||||||
- [GPT4ALL](https://github.com/nomic-ai/gpt4all)
|
|
||||||
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin)
|
|
||||||
- Koala
|
|
||||||
- [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
|
|
||||||
|
|
||||||
It should also be compatible with StableLM and GPTNeoX ggml models (untested)
|
|
||||||
|
|
||||||
Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
|
|
||||||
|
|
||||||
The easiest way to run LocalAI is by using `docker-compose`:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
|
|
||||||
git clone https://github.com/go-skynet/LocalAI
|
|
||||||
|
|
||||||
cd LocalAI
|
|
||||||
|
|
||||||
# copy your models to models/
|
|
||||||
cp your-model.bin models/
|
|
||||||
|
|
||||||
# (optional) Edit the .env file to set things like context size and threads
|
|
||||||
# vim .env
|
|
||||||
|
|
||||||
# start with docker-compose
|
|
||||||
docker compose up -d --build
|
|
||||||
|
|
||||||
# Now API is accessible at localhost:8080
|
|
||||||
curl http://localhost:8080/v1/models
|
|
||||||
# {"object":"list","data":[{"id":"your-model.bin","object":"model"}]}
|
|
||||||
|
|
||||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "your-model.bin",
|
|
||||||
"prompt": "A long time ago in a galaxy far, far away",
|
|
||||||
"temperature": 0.7
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
## Helm Chart Installation (run LocalAI in Kubernetes)
|
|
||||||
The local-ai Helm chart supports two options for the LocalAI server's models directory:
|
|
||||||
1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
|
|
||||||
|
|
||||||
Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
|
|
||||||
|
|
||||||
2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
|
|
||||||
|
|
||||||
First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
|
|
||||||
```bash
|
|
||||||
helm install local-ai charts/local-ai -n local-ai --create-namespace
|
|
||||||
```
|
|
||||||
Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
|
|
||||||
|
|
||||||
Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
|
|
||||||
```bash
|
|
||||||
helm upgrade local-ai -n local-ai charts/local-ai
|
|
||||||
```
|
|
||||||
This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.
|
|
||||||
|
|
||||||
## Prompt templates
|
|
||||||
|
|
||||||
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
|
|
||||||
|
|
||||||
<details>
|
|
||||||
You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
|
|
||||||
|
|
||||||
```
|
|
||||||
Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
|
||||||
|
|
||||||
### Instruction:
|
|
||||||
{{.Input}}
|
|
||||||
|
|
||||||
### Response:
|
|
||||||
```
|
|
||||||
|
|
||||||
See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for most popular models.
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
## API
|
|
||||||
|
|
||||||
`LocalAI` provides an API for running text generation as a service, that follows the OpenAI reference and can be used as a drop-in. The models once loaded the first time will be kept in memory.
|
|
||||||
|
|
||||||
<details>
|
|
||||||
Example of starting the API with `docker`:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:latest --models-path /path/to/models --context-size 700 --threads 4
|
|
||||||
```
|
|
||||||
|
|
||||||
And you'll see:
|
|
||||||
```
|
|
||||||
┌───────────────────────────────────────────────────┐
|
|
||||||
│ Fiber v2.42.0 │
|
|
||||||
│ http://127.0.0.1:8080 │
|
|
||||||
│ (bound on host 0.0.0.0 and port 8080) │
|
|
||||||
│ │
|
|
||||||
│ Handlers ............. 1 Processes ........... 1 │
|
|
||||||
│ Prefork ....... Disabled PID ................. 1 │
|
|
||||||
└───────────────────────────────────────────────────┘
|
|
||||||
```
|
|
||||||
|
|
||||||
You can control the API server options with command line arguments:
|
|
||||||
|
|
||||||
```
|
|
||||||
local-api --models-path <model_path> [--address <address>] [--threads <num_threads>]
|
|
||||||
```
|
|
||||||
|
|
||||||
The API takes takes the following parameters:
|
|
||||||
|
|
||||||
| Parameter | Environment Variable | Default Value | Description |
|
|
||||||
| ------------ | -------------------- | ------------- | -------------------------------------- |
|
|
||||||
| models-path | MODELS_PATH | | The path where you have models (ending with `.bin`). |
|
|
||||||
| threads | THREADS | Number of Physical cores | The number of threads to use for text generation. |
|
|
||||||
| address | ADDRESS | :8080 | The address and port to listen on. |
|
|
||||||
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
|
|
||||||
|
|
||||||
Once the server is running, you can start making requests to it using HTTP, using the OpenAI API.
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
### Supported OpenAI API endpoints
|
|
||||||
|
|
||||||
You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create).
|
|
||||||
|
|
||||||
Following the list of endpoints/parameters supported.
|
|
||||||
|
|
||||||
#### Chat completions
|
|
||||||
|
|
||||||
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
|
|
||||||
|
|
||||||
```
|
|
||||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
|
||||||
"messages": [{"role": "user", "content": "Say this is a test!"}],
|
|
||||||
"temperature": 0.7
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
Available additional parameters: `top_p`, `top_k`, `max_tokens`
|
|
||||||
|
|
||||||
#### Completions
|
|
||||||
|
|
||||||
For example, to generate a comletion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
|
|
||||||
```
|
|
||||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
|
||||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
|
||||||
"prompt": "A long time ago in a galaxy far, far away",
|
|
||||||
"temperature": 0.7
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
Available additional parameters: `top_p`, `top_k`, `max_tokens`
|
|
||||||
|
|
||||||
#### List models
|
|
||||||
|
|
||||||
You can list all the models available with:
|
|
||||||
|
|
||||||
```
|
|
||||||
curl http://localhost:8080/v1/models
|
|
||||||
```
|
|
||||||
|
|
||||||
## Using other models
|
|
||||||
|
|
||||||
gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
wget -O tokenizer.model https://huggingface.co/decapoda-research/llama-30b-hf/resolve/main/tokenizer.model
|
|
||||||
mkdir models
|
|
||||||
cp gpt4all.. models/
|
|
||||||
git clone https://gist.github.com/eiz/828bddec6162a023114ce19146cb2b82
|
|
||||||
pip install sentencepiece
|
|
||||||
python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
|
|
||||||
# There will be a new model with the ".tmp" extension, you have to use that one!
|
|
||||||
```
|
|
||||||
|
|
||||||
### Windows compatibility
|
|
||||||
|
|
||||||
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
|
|
||||||
|
|
||||||
### Build locally
|
|
||||||
|
|
||||||
Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
|
|
||||||
|
|
||||||
In order to build the `LocalAI` container image locally you can use `docker`:
|
|
||||||
|
|
||||||
```
|
|
||||||
# build the image
|
|
||||||
docker build -t LocalAI .
|
|
||||||
docker run LocalAI
|
|
||||||
```
|
|
||||||
|
|
||||||
Or build the binary with `make`:
|
|
||||||
|
|
||||||
```
|
|
||||||
make build
|
|
||||||
```
|
|
||||||
|
|
||||||
## Short-term roadmap
|
|
||||||
|
|
||||||
- [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)
|
|
||||||
- Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
|
|
||||||
- Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
|
|
||||||
- [x] Multi-model support
|
|
||||||
- Have a webUI!
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
MIT
|
|
||||||
|
|
||||||
## Acknowledgements
|
|
||||||
|
|
||||||
- [llama.cpp](https://github.com/ggerganov/llama.cpp)
|
|
||||||
- https://github.com/tatsu-lab/stanford_alpaca
|
|
||||||
- https://github.com/cornelk/llama-go for the initial ideas
|
|
||||||
- https://github.com/antimatter15/alpaca.cpp for the light model version (this is compatible and tested only with that checkpoint model!)
|
|
||||||
423
api/api.go
423
api/api.go
@@ -1,423 +0,0 @@
|
|||||||
package api
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
|
||||||
|
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
|
||||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
|
||||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
|
||||||
llama "github.com/go-skynet/go-llama.cpp"
|
|
||||||
"github.com/gofiber/fiber/v2"
|
|
||||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
|
||||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
|
||||||
"github.com/rs/zerolog"
|
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
)
|
|
||||||
|
|
||||||
type OpenAIResponse struct {
|
|
||||||
Created int `json:"created,omitempty"`
|
|
||||||
Object string `json:"chat.completion,omitempty"`
|
|
||||||
ID string `json:"id,omitempty"`
|
|
||||||
Model string `json:"model,omitempty"`
|
|
||||||
Choices []Choice `json:"choices,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type Choice struct {
|
|
||||||
Index int `json:"index,omitempty"`
|
|
||||||
FinishReason string `json:"finish_reason,omitempty"`
|
|
||||||
Message *Message `json:"message,omitempty"`
|
|
||||||
Text string `json:"text,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type Message struct {
|
|
||||||
Role string `json:"role,omitempty"`
|
|
||||||
Content string `json:"content,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type OpenAIModel struct {
|
|
||||||
ID string `json:"id"`
|
|
||||||
Object string `json:"object"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type OpenAIRequest struct {
|
|
||||||
Model string `json:"model"`
|
|
||||||
|
|
||||||
// Prompt is read only by completion API calls
|
|
||||||
Prompt string `json:"prompt"`
|
|
||||||
|
|
||||||
Stop string `json:"stop"`
|
|
||||||
|
|
||||||
// Messages is read only by chat/completion API calls
|
|
||||||
Messages []Message `json:"messages"`
|
|
||||||
|
|
||||||
Echo bool `json:"echo"`
|
|
||||||
// Common options between all the API calls
|
|
||||||
TopP float64 `json:"top_p"`
|
|
||||||
TopK int `json:"top_k"`
|
|
||||||
Temperature float64 `json:"temperature"`
|
|
||||||
Maxtokens int `json:"max_tokens"`
|
|
||||||
|
|
||||||
N int `json:"n"`
|
|
||||||
|
|
||||||
// Custom parameters - not present in the OpenAI API
|
|
||||||
Batch int `json:"batch"`
|
|
||||||
F16 bool `json:"f16kv"`
|
|
||||||
IgnoreEOS bool `json:"ignore_eos"`
|
|
||||||
RepeatPenalty float64 `json:"repeat_penalty"`
|
|
||||||
Keep int `json:"n_keep"`
|
|
||||||
|
|
||||||
Seed int `json:"seed"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// https://platform.openai.com/docs/api-reference/completions
|
|
||||||
func openAIEndpoint(chat, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
|
|
||||||
return func(c *fiber.Ctx) error {
|
|
||||||
var err error
|
|
||||||
var model *llama.LLama
|
|
||||||
var gptModel *gptj.GPTJ
|
|
||||||
var gpt2Model *gpt2.GPT2
|
|
||||||
var stableLMModel *gpt2.StableLM
|
|
||||||
|
|
||||||
input := new(OpenAIRequest)
|
|
||||||
// Get input data from the request body
|
|
||||||
if err := c.BodyParser(input); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
modelFile := input.Model
|
|
||||||
received, _ := json.Marshal(input)
|
|
||||||
|
|
||||||
log.Debug().Msgf("Request received: %s", string(received))
|
|
||||||
|
|
||||||
// Set model from bearer token, if available
|
|
||||||
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
|
|
||||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
|
||||||
|
|
||||||
// If no model was specified, take the first available
|
|
||||||
if modelFile == "" {
|
|
||||||
models, _ := loader.ListModels()
|
|
||||||
if len(models) > 0 {
|
|
||||||
modelFile = models[0]
|
|
||||||
log.Debug().Msgf("No model specified, using: %s", modelFile)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If no model is found or specified, we bail out
|
|
||||||
if modelFile == "" && !bearerExists {
|
|
||||||
return fmt.Errorf("no model specified")
|
|
||||||
}
|
|
||||||
|
|
||||||
// If a model is found in bearer token takes precedence
|
|
||||||
if bearerExists {
|
|
||||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
|
||||||
modelFile = bearer
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to load the model
|
|
||||||
var llamaerr, gpt2err, gptjerr, stableerr error
|
|
||||||
llamaOpts := []llama.ModelOption{}
|
|
||||||
if ctx != 0 {
|
|
||||||
llamaOpts = append(llamaOpts, llama.SetContext(ctx))
|
|
||||||
}
|
|
||||||
if f16 {
|
|
||||||
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: this is ugly, better identifying the model somehow! however, it is a good stab for a first implementation..
|
|
||||||
model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...)
|
|
||||||
if llamaerr != nil {
|
|
||||||
gptModel, gptjerr = loader.LoadGPTJModel(modelFile)
|
|
||||||
if gptjerr != nil {
|
|
||||||
gpt2Model, gpt2err = loader.LoadGPT2Model(modelFile)
|
|
||||||
if gpt2err != nil {
|
|
||||||
stableLMModel, stableerr = loader.LoadStableLMModel(modelFile)
|
|
||||||
if stableerr != nil {
|
|
||||||
return fmt.Errorf("llama: %s gpt: %s gpt2: %s stableLM: %s", llamaerr.Error(), gptjerr.Error(), gpt2err.Error(), stableerr.Error()) // llama failed first, so we want to catch both errors
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
|
||||||
mutexMap.Lock()
|
|
||||||
l, ok := mutexes[modelFile]
|
|
||||||
if !ok {
|
|
||||||
m := &sync.Mutex{}
|
|
||||||
mutexes[modelFile] = m
|
|
||||||
l = m
|
|
||||||
}
|
|
||||||
mutexMap.Unlock()
|
|
||||||
l.Lock()
|
|
||||||
defer l.Unlock()
|
|
||||||
|
|
||||||
// Set the parameters for the language model prediction
|
|
||||||
topP := input.TopP
|
|
||||||
if topP == 0 {
|
|
||||||
topP = 0.7
|
|
||||||
}
|
|
||||||
topK := input.TopK
|
|
||||||
if topK == 0 {
|
|
||||||
topK = 80
|
|
||||||
}
|
|
||||||
|
|
||||||
temperature := input.Temperature
|
|
||||||
if temperature == 0 {
|
|
||||||
temperature = 0.9
|
|
||||||
}
|
|
||||||
|
|
||||||
tokens := input.Maxtokens
|
|
||||||
if tokens == 0 {
|
|
||||||
tokens = 512
|
|
||||||
}
|
|
||||||
|
|
||||||
predInput := input.Prompt
|
|
||||||
if chat {
|
|
||||||
mess := []string{}
|
|
||||||
// TODO: encode roles
|
|
||||||
for _, i := range input.Messages {
|
|
||||||
mess = append(mess, i.Content)
|
|
||||||
}
|
|
||||||
|
|
||||||
predInput = strings.Join(mess, "\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
|
||||||
templatedInput, err := loader.TemplatePrefix(modelFile, struct {
|
|
||||||
Input string
|
|
||||||
}{Input: predInput})
|
|
||||||
if err == nil {
|
|
||||||
predInput = templatedInput
|
|
||||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
|
||||||
}
|
|
||||||
|
|
||||||
result := []Choice{}
|
|
||||||
|
|
||||||
n := input.N
|
|
||||||
|
|
||||||
if input.N == 0 {
|
|
||||||
n = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
var predFunc func() (string, error)
|
|
||||||
switch {
|
|
||||||
case stableLMModel != nil:
|
|
||||||
predFunc = func() (string, error) {
|
|
||||||
// Generate the prediction using the language model
|
|
||||||
predictOptions := []gpt2.PredictOption{
|
|
||||||
gpt2.SetTemperature(temperature),
|
|
||||||
gpt2.SetTopP(topP),
|
|
||||||
gpt2.SetTopK(topK),
|
|
||||||
gpt2.SetTokens(tokens),
|
|
||||||
gpt2.SetThreads(threads),
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.Batch != 0 {
|
|
||||||
predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.Seed != 0 {
|
|
||||||
predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
|
|
||||||
}
|
|
||||||
|
|
||||||
return stableLMModel.Predict(
|
|
||||||
predInput,
|
|
||||||
predictOptions...,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
case gpt2Model != nil:
|
|
||||||
predFunc = func() (string, error) {
|
|
||||||
// Generate the prediction using the language model
|
|
||||||
predictOptions := []gpt2.PredictOption{
|
|
||||||
gpt2.SetTemperature(temperature),
|
|
||||||
gpt2.SetTopP(topP),
|
|
||||||
gpt2.SetTopK(topK),
|
|
||||||
gpt2.SetTokens(tokens),
|
|
||||||
gpt2.SetThreads(threads),
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.Batch != 0 {
|
|
||||||
predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.Seed != 0 {
|
|
||||||
predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
|
|
||||||
}
|
|
||||||
|
|
||||||
return gpt2Model.Predict(
|
|
||||||
predInput,
|
|
||||||
predictOptions...,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
case gptModel != nil:
|
|
||||||
predFunc = func() (string, error) {
|
|
||||||
// Generate the prediction using the language model
|
|
||||||
predictOptions := []gptj.PredictOption{
|
|
||||||
gptj.SetTemperature(temperature),
|
|
||||||
gptj.SetTopP(topP),
|
|
||||||
gptj.SetTopK(topK),
|
|
||||||
gptj.SetTokens(tokens),
|
|
||||||
gptj.SetThreads(threads),
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.Batch != 0 {
|
|
||||||
predictOptions = append(predictOptions, gptj.SetBatch(input.Batch))
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.Seed != 0 {
|
|
||||||
predictOptions = append(predictOptions, gptj.SetSeed(input.Seed))
|
|
||||||
}
|
|
||||||
|
|
||||||
return gptModel.Predict(
|
|
||||||
predInput,
|
|
||||||
predictOptions...,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
case model != nil:
|
|
||||||
predFunc = func() (string, error) {
|
|
||||||
// Generate the prediction using the language model
|
|
||||||
predictOptions := []llama.PredictOption{
|
|
||||||
llama.SetTemperature(temperature),
|
|
||||||
llama.SetTopP(topP),
|
|
||||||
llama.SetTopK(topK),
|
|
||||||
llama.SetTokens(tokens),
|
|
||||||
llama.SetThreads(threads),
|
|
||||||
}
|
|
||||||
|
|
||||||
if debug {
|
|
||||||
predictOptions = append(predictOptions, llama.Debug)
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.Stop != "" {
|
|
||||||
predictOptions = append(predictOptions, llama.SetStopWords(input.Stop))
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.RepeatPenalty != 0 {
|
|
||||||
predictOptions = append(predictOptions, llama.SetPenalty(input.RepeatPenalty))
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.Keep != 0 {
|
|
||||||
predictOptions = append(predictOptions, llama.SetNKeep(input.Keep))
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.Batch != 0 {
|
|
||||||
predictOptions = append(predictOptions, llama.SetBatch(input.Batch))
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.F16 {
|
|
||||||
predictOptions = append(predictOptions, llama.EnableF16KV)
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.IgnoreEOS {
|
|
||||||
predictOptions = append(predictOptions, llama.IgnoreEOS)
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.Seed != 0 {
|
|
||||||
predictOptions = append(predictOptions, llama.SetSeed(input.Seed))
|
|
||||||
}
|
|
||||||
|
|
||||||
return model.Predict(
|
|
||||||
predInput,
|
|
||||||
predictOptions...,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
prediction, err := predFunc()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.Echo {
|
|
||||||
prediction = predInput + prediction
|
|
||||||
}
|
|
||||||
|
|
||||||
if chat {
|
|
||||||
result = append(result, Choice{Message: &Message{Role: "assistant", Content: prediction}})
|
|
||||||
} else {
|
|
||||||
result = append(result, Choice{Text: prediction})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
jsonResult, _ := json.Marshal(result)
|
|
||||||
log.Debug().Msgf("Response: %s", jsonResult)
|
|
||||||
|
|
||||||
// Return the prediction in the response body
|
|
||||||
return c.JSON(OpenAIResponse{
|
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
|
||||||
Choices: result,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func listModels(loader *model.ModelLoader) func(ctx *fiber.Ctx) error {
|
|
||||||
return func(c *fiber.Ctx) error {
|
|
||||||
models, err := loader.ListModels()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
dataModels := []OpenAIModel{}
|
|
||||||
for _, m := range models {
|
|
||||||
dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
|
|
||||||
}
|
|
||||||
return c.JSON(struct {
|
|
||||||
Object string `json:"object"`
|
|
||||||
Data []OpenAIModel `json:"data"`
|
|
||||||
}{
|
|
||||||
Object: "list",
|
|
||||||
Data: dataModels,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func App(loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
|
|
||||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
|
||||||
if debug {
|
|
||||||
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return errors as JSON responses
|
|
||||||
app := fiber.New(fiber.Config{
|
|
||||||
DisableStartupMessage: disableMessage,
|
|
||||||
// Override default error handler
|
|
||||||
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
|
|
||||||
// Status code defaults to 500
|
|
||||||
code := fiber.StatusInternalServerError
|
|
||||||
|
|
||||||
// Retrieve the custom status code if it's a *fiber.Error
|
|
||||||
var e *fiber.Error
|
|
||||||
if errors.As(err, &e) {
|
|
||||||
code = e.Code
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send custom error page
|
|
||||||
return ctx.Status(code).JSON(struct {
|
|
||||||
Error string `json:"error"`
|
|
||||||
}{Error: err.Error()})
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
// Default middleware config
|
|
||||||
app.Use(recover.New())
|
|
||||||
app.Use(cors.New())
|
|
||||||
|
|
||||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
|
||||||
mu := map[string]*sync.Mutex{}
|
|
||||||
var mumutex = &sync.Mutex{}
|
|
||||||
|
|
||||||
// openAI compatible API endpoint
|
|
||||||
app.Post("/v1/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
|
||||||
app.Post("/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
|
||||||
|
|
||||||
app.Post("/v1/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
|
||||||
app.Post("/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
|
||||||
|
|
||||||
app.Get("/v1/models", listModels(loader))
|
|
||||||
app.Get("/models", listModels(loader))
|
|
||||||
|
|
||||||
return app
|
|
||||||
}
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
package api_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"os"
|
|
||||||
|
|
||||||
. "github.com/go-skynet/LocalAI/api"
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/model"
|
|
||||||
"github.com/gofiber/fiber/v2"
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
|
||||||
. "github.com/onsi/gomega"
|
|
||||||
|
|
||||||
"github.com/sashabaranov/go-openai"
|
|
||||||
)
|
|
||||||
|
|
||||||
var _ = Describe("API test", func() {
|
|
||||||
|
|
||||||
var app *fiber.App
|
|
||||||
var modelLoader *model.ModelLoader
|
|
||||||
var client *openai.Client
|
|
||||||
Context("API query", func() {
|
|
||||||
BeforeEach(func() {
|
|
||||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
|
||||||
app = App(modelLoader, 1, 512, false, false, true)
|
|
||||||
go app.Listen("127.0.0.1:9090")
|
|
||||||
|
|
||||||
defaultConfig := openai.DefaultConfig("")
|
|
||||||
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
|
|
||||||
|
|
||||||
// Wait for API to be ready
|
|
||||||
client = openai.NewClientWithConfig(defaultConfig)
|
|
||||||
Eventually(func() error {
|
|
||||||
_, err := client.ListModels(context.TODO())
|
|
||||||
return err
|
|
||||||
}, "2m").ShouldNot(HaveOccurred())
|
|
||||||
})
|
|
||||||
AfterEach(func() {
|
|
||||||
app.Shutdown()
|
|
||||||
})
|
|
||||||
It("returns the models list", func() {
|
|
||||||
models, err := client.ListModels(context.TODO())
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
Expect(len(models.Models)).To(Equal(1))
|
|
||||||
Expect(models.Models[0].ID).To(Equal("testmodel"))
|
|
||||||
})
|
|
||||||
It("can generate completions", func() {
|
|
||||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
Expect(len(resp.Choices)).To(Equal(1))
|
|
||||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
package api_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo/v2"
|
|
||||||
. "github.com/onsi/gomega"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestLocalAI(t *testing.T) {
|
|
||||||
RegisterFailHandler(Fail)
|
|
||||||
RunSpecs(t, "LocalAI test suite")
|
|
||||||
}
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
apiVersion: v2
|
|
||||||
appVersion: 0.1.0
|
|
||||||
description: A Helm chart for LocalAI
|
|
||||||
name: local-ai
|
|
||||||
type: application
|
|
||||||
version: 1.0.0
|
|
||||||
@@ -1,44 +0,0 @@
|
|||||||
{{/*
|
|
||||||
Expand the name of the chart.
|
|
||||||
*/}}
|
|
||||||
{{- define "local-ai.name" -}}
|
|
||||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Create a default fully qualified app name.
|
|
||||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
|
||||||
If release name contains chart name it will be used as a full name.
|
|
||||||
*/}}
|
|
||||||
{{- define "local-ai.fullname" -}}
|
|
||||||
{{- if .Values.fullnameOverride }}
|
|
||||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- else }}
|
|
||||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
|
||||||
{{- if contains $name .Release.Name }}
|
|
||||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- else }}
|
|
||||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Create chart name and version as used by the chart label.
|
|
||||||
*/}}
|
|
||||||
{{- define "local-ai.chart" -}}
|
|
||||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
|
||||||
Common labels
|
|
||||||
*/}}
|
|
||||||
{{- define "local-ai.labels" -}}
|
|
||||||
helm.sh/chart: {{ include "local-ai.chart" . }}
|
|
||||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
|
||||||
app.kubernetes.io/instance: "{{ .Release.Name }}"
|
|
||||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
|
||||||
{{- if .Chart.AppVersion }}
|
|
||||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
{{- if .Values.dataVolume.enabled }}
|
|
||||||
apiVersion: cdi.kubevirt.io/v1beta1
|
|
||||||
kind: DataVolume
|
|
||||||
metadata:
|
|
||||||
name: {{ template "local-ai.fullname" . }}
|
|
||||||
namespace: {{ .Release.Namespace | quote }}
|
|
||||||
labels:
|
|
||||||
{{- include "local-ai.labels" . | nindent 4 }}
|
|
||||||
spec:
|
|
||||||
contentType: archive
|
|
||||||
source:
|
|
||||||
{{ .Values.dataVolume.source.type }}:
|
|
||||||
url: {{ .Values.dataVolume.source.url }}
|
|
||||||
secretRef: {{ template "local-ai.fullname" . }}
|
|
||||||
{{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
|
|
||||||
secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.dataVolume.source.caCertConfigMap }}
|
|
||||||
caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
|
|
||||||
{{- end }}
|
|
||||||
pvc:
|
|
||||||
accessModes: {{ .Values.dataVolume.pvc.accessModes }}
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: {{ .Values.dataVolume.pvc.size }}
|
|
||||||
---
|
|
||||||
{{- if .Values.dataVolume.secret.enabled }}
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Secret
|
|
||||||
metadata:
|
|
||||||
name: {{ template "local-ai.fullname" . }}
|
|
||||||
namespace: {{ .Release.Namespace | quote }}
|
|
||||||
labels:
|
|
||||||
{{- include "local-ai.labels" . | nindent 4 }}
|
|
||||||
data:
|
|
||||||
accessKeyId: {{ .Values.dataVolume.secret.username }}
|
|
||||||
secretKey: {{ .Values.dataVolume.secret.password }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: {{ template "local-ai.fullname" . }}
|
|
||||||
namespace: {{ .Release.Namespace | quote }}
|
|
||||||
labels:
|
|
||||||
{{- include "local-ai.labels" . | nindent 4 }}
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
|
||||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
||||||
replicas: 1
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
name: {{ template "local-ai.fullname" . }}
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
|
||||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: {{ template "local-ai.fullname" . }}
|
|
||||||
image: {{ .Values.deployment.image }}
|
|
||||||
env:
|
|
||||||
- name: THREADS
|
|
||||||
value: {{ .Values.deployment.env.threads | quote }}
|
|
||||||
- name: CONTEXT_SIZE
|
|
||||||
value: {{ .Values.deployment.env.contextSize | quote }}
|
|
||||||
- name: MODELS_PATH
|
|
||||||
value: {{ .Values.deployment.env.modelsPath }}
|
|
||||||
{{- if .Values.deployment.volume.enabled }}
|
|
||||||
volumeMounts:
|
|
||||||
- mountPath: {{ .Values.deployment.env.modelsPath }}
|
|
||||||
name: models
|
|
||||||
volumes:
|
|
||||||
- name: models
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: {{ template "local-ai.fullname" . }}
|
|
||||||
{{- end }}
|
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: {{ template "local-ai.fullname" . }}
|
|
||||||
namespace: {{ .Release.Namespace | quote }}
|
|
||||||
labels:
|
|
||||||
{{- include "local-ai.labels" . | nindent 4 }}
|
|
||||||
{{- if .Values.service.annotations }}
|
|
||||||
annotations:
|
|
||||||
{{ toYaml .Values.service.annotations | indent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
|
||||||
type: "{{ .Values.service.type }}"
|
|
||||||
ports:
|
|
||||||
- protocol: TCP
|
|
||||||
port: 8080
|
|
||||||
targetPort: 8080
|
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
deployment:
|
|
||||||
image: quay.io/go-skynet/local-ai:latest
|
|
||||||
env:
|
|
||||||
threads: 14
|
|
||||||
contextSize: 512
|
|
||||||
modelsPath: "/models"
|
|
||||||
volume:
|
|
||||||
enabled: false
|
|
||||||
|
|
||||||
service:
|
|
||||||
type: ClusterIP
|
|
||||||
annotations: {}
|
|
||||||
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
|
|
||||||
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
|
||||||
|
|
||||||
# Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
|
|
||||||
# (requires https://github.com/kubevirt/containerized-data-importer)
|
|
||||||
dataVolume:
|
|
||||||
enabled: false
|
|
||||||
source:
|
|
||||||
type: "http" # Source type. One of: [ http | s3 ]
|
|
||||||
url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
|
|
||||||
|
|
||||||
# CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
|
|
||||||
# and a base64 encoded pem certificate
|
|
||||||
caCertConfigMap: ""
|
|
||||||
|
|
||||||
# SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
|
|
||||||
# that may include sensitive information. Only applicable for the http source type.
|
|
||||||
secretExtraHeaders: []
|
|
||||||
pvc:
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
size: 5Gi
|
|
||||||
secret:
|
|
||||||
enabled: false
|
|
||||||
username: "" # base64 encoded
|
|
||||||
password: "" # base64 encoded
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
version: '3.6'
|
|
||||||
|
|
||||||
services:
|
|
||||||
api:
|
|
||||||
image: quay.io/go-skynet/local-ai:latest
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
ports:
|
|
||||||
- 8080:8080
|
|
||||||
env_file:
|
|
||||||
- .env
|
|
||||||
volumes:
|
|
||||||
- ./models:/models:cached
|
|
||||||
command: ["/usr/bin/local-ai" ]
|
|
||||||
54
go.mod
54
go.mod
@@ -1,54 +0,0 @@
|
|||||||
module github.com/go-skynet/LocalAI
|
|
||||||
|
|
||||||
go 1.19
|
|
||||||
|
|
||||||
require (
|
|
||||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4
|
|
||||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94
|
|
||||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead
|
|
||||||
github.com/gofiber/fiber/v2 v2.42.0
|
|
||||||
github.com/jaypipes/ghw v0.10.0
|
|
||||||
github.com/onsi/ginkgo/v2 v2.9.2
|
|
||||||
github.com/onsi/gomega v1.27.6
|
|
||||||
github.com/rs/zerolog v1.29.1
|
|
||||||
github.com/sashabaranov/go-openai v1.9.0
|
|
||||||
github.com/urfave/cli/v2 v2.25.0
|
|
||||||
)
|
|
||||||
|
|
||||||
require (
|
|
||||||
github.com/StackExchange/wmi v1.2.1 // indirect
|
|
||||||
github.com/andybalholm/brotli v1.0.4 // indirect
|
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
|
||||||
github.com/ghodss/yaml v1.0.0 // indirect
|
|
||||||
github.com/go-logr/logr v1.2.3 // indirect
|
|
||||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
|
||||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
|
||||||
github.com/google/go-cmp v0.5.9 // indirect
|
|
||||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
|
|
||||||
github.com/google/uuid v1.3.0 // indirect
|
|
||||||
github.com/jaypipes/pcidb v1.0.0 // indirect
|
|
||||||
github.com/klauspost/compress v1.15.9 // indirect
|
|
||||||
github.com/kr/text v0.2.0 // indirect
|
|
||||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
|
||||||
github.com/mattn/go-isatty v0.0.17 // indirect
|
|
||||||
github.com/mattn/go-runewidth v0.0.14 // indirect
|
|
||||||
github.com/mitchellh/go-homedir v1.1.0 // indirect
|
|
||||||
github.com/philhofer/fwd v1.1.1 // indirect
|
|
||||||
github.com/pkg/errors v0.9.1 // indirect
|
|
||||||
github.com/rivo/uniseg v0.2.0 // indirect
|
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
|
||||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
|
|
||||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d // indirect
|
|
||||||
github.com/tinylib/msgp v1.1.6 // indirect
|
|
||||||
github.com/valyala/bytebufferpool v1.0.0 // indirect
|
|
||||||
github.com/valyala/fasthttp v1.44.0 // indirect
|
|
||||||
github.com/valyala/tcplisten v1.0.0 // indirect
|
|
||||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
|
||||||
golang.org/x/net v0.8.0 // indirect
|
|
||||||
golang.org/x/sys v0.6.0 // indirect
|
|
||||||
golang.org/x/text v0.8.0 // indirect
|
|
||||||
golang.org/x/tools v0.7.0 // indirect
|
|
||||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
|
||||||
howett.net/plist v1.0.0 // indirect
|
|
||||||
)
|
|
||||||
155
go.sum
155
go.sum
@@ -1,155 +0,0 @@
|
|||||||
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
|
|
||||||
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
|
|
||||||
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
|
|
||||||
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
|
||||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
|
||||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
|
||||||
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
|
|
||||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
|
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
|
||||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
|
||||||
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
|
|
||||||
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
|
||||||
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
|
|
||||||
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
|
||||||
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
|
||||||
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
|
|
||||||
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
|
||||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4 h1:GkGuqnhDFKlCsT6Bo8sdY00A7rFXCzfU1nBOSS4ZnYM=
|
|
||||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
|
||||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94 h1:rtrrMvlIq+g0/ltXjDdLeNtz0uc4wJ4Qs15GFU4ba4c=
|
|
||||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
|
|
||||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead h1:C+lcH1srw+c0qPDx1WF8zjGiiOqoPxVICt7bI1sj5cM=
|
|
||||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
|
||||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
|
||||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
|
||||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
|
||||||
github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
|
|
||||||
github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
|
|
||||||
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
|
|
||||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
|
||||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
|
||||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
|
|
||||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
|
|
||||||
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
|
|
||||||
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
|
||||||
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
|
||||||
github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
|
|
||||||
github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
|
|
||||||
github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8=
|
|
||||||
github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk=
|
|
||||||
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
|
|
||||||
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
|
|
||||||
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
|
|
||||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
|
||||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
|
||||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
|
||||||
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
|
|
||||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
|
||||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
|
||||||
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
|
|
||||||
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
|
||||||
github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
|
|
||||||
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
|
||||||
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
|
|
||||||
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
|
||||||
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
|
||||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
|
||||||
github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
|
|
||||||
github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
|
|
||||||
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
|
|
||||||
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
|
|
||||||
github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
|
|
||||||
github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
|
|
||||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
|
||||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
|
||||||
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
|
|
||||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
|
||||||
github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
|
||||||
github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
|
|
||||||
github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
|
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
|
||||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
|
||||||
github.com/sashabaranov/go-openai v1.9.0 h1:NoiO++IISxxJ1pRc0n7uZvMGMake0G+FJ1XPwXtprsA=
|
|
||||||
github.com/sashabaranov/go-openai v1.9.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
|
||||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
|
|
||||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
|
|
||||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
|
|
||||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
|
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
|
||||||
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
|
|
||||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
|
||||||
github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
|
|
||||||
github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
|
|
||||||
github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
|
|
||||||
github.com/urfave/cli/v2 v2.25.0/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
|
|
||||||
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
|
||||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
|
||||||
github.com/valyala/fasthttp v1.44.0 h1:R+gLUhldIsfg1HokMuQjdQ5bh9nuXHPIfvkYUu9eR5Q=
|
|
||||||
github.com/valyala/fasthttp v1.44.0/go.mod h1:f6VbjjoI3z1NDOZOv17o6RvtRSWxC77seBFc2uWtgiY=
|
|
||||||
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
|
|
||||||
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
|
|
||||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
|
|
||||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
|
|
||||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
|
||||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
|
||||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
|
||||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
|
||||||
golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
|
||||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
|
||||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
|
||||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
|
||||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
|
||||||
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
|
||||||
golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
|
|
||||||
golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
|
|
||||||
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
|
|
||||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
|
||||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
|
||||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
|
||||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|
||||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|
||||||
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|
||||||
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|
||||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|
||||||
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
|
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|
||||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
|
||||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
|
||||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
|
||||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
|
||||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
|
||||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
|
||||||
golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
|
|
||||||
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
|
||||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
|
||||||
golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
|
||||||
golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
|
|
||||||
golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
|
|
||||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
|
||||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
|
||||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
|
||||||
google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
|
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
|
||||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
|
||||||
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
|
|
||||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
|
||||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
|
||||||
howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
|
|
||||||
howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
|
|
||||||
92
main.go
92
main.go
@@ -1,92 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
|
|
||||||
api "github.com/go-skynet/LocalAI/api"
|
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
|
||||||
"github.com/jaypipes/ghw"
|
|
||||||
"github.com/rs/zerolog"
|
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
"github.com/urfave/cli/v2"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
|
|
||||||
|
|
||||||
path, err := os.Getwd()
|
|
||||||
if err != nil {
|
|
||||||
log.Error().Msgf("error: %s", err.Error())
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
threads := 4
|
|
||||||
cpu, err := ghw.CPU()
|
|
||||||
if err == nil {
|
|
||||||
threads = int(cpu.TotalCores)
|
|
||||||
}
|
|
||||||
|
|
||||||
app := &cli.App{
|
|
||||||
Name: "LocalAI",
|
|
||||||
Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.",
|
|
||||||
Flags: []cli.Flag{
|
|
||||||
&cli.BoolFlag{
|
|
||||||
Name: "f16",
|
|
||||||
EnvVars: []string{"F16"},
|
|
||||||
},
|
|
||||||
&cli.BoolFlag{
|
|
||||||
Name: "debug",
|
|
||||||
EnvVars: []string{"DEBUG"},
|
|
||||||
},
|
|
||||||
&cli.IntFlag{
|
|
||||||
Name: "threads",
|
|
||||||
DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
|
|
||||||
EnvVars: []string{"THREADS"},
|
|
||||||
Value: threads,
|
|
||||||
},
|
|
||||||
&cli.StringFlag{
|
|
||||||
Name: "models-path",
|
|
||||||
DefaultText: "Path containing models used for inferencing",
|
|
||||||
EnvVars: []string{"MODELS_PATH"},
|
|
||||||
Value: path,
|
|
||||||
},
|
|
||||||
&cli.StringFlag{
|
|
||||||
Name: "address",
|
|
||||||
DefaultText: "Bind address for the API server.",
|
|
||||||
EnvVars: []string{"ADDRESS"},
|
|
||||||
Value: ":8080",
|
|
||||||
},
|
|
||||||
&cli.IntFlag{
|
|
||||||
Name: "context-size",
|
|
||||||
DefaultText: "Default context size of the model",
|
|
||||||
EnvVars: []string{"CONTEXT_SIZE"},
|
|
||||||
Value: 512,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Description: `
|
|
||||||
LocalAI is a drop-in replacement OpenAI API which runs inference locally.
|
|
||||||
|
|
||||||
Some of the models compatible are:
|
|
||||||
- Vicuna
|
|
||||||
- Koala
|
|
||||||
- GPT4ALL
|
|
||||||
- GPT4ALL-J
|
|
||||||
- Cerebras
|
|
||||||
- Alpaca
|
|
||||||
- StableLM (ggml quantized)
|
|
||||||
|
|
||||||
It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
|
|
||||||
`,
|
|
||||||
UsageText: `local-ai [options]`,
|
|
||||||
Copyright: "go-skynet authors",
|
|
||||||
Action: func(ctx *cli.Context) error {
|
|
||||||
return api.App(model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
err = app.Run(os.Args)
|
|
||||||
if err != nil {
|
|
||||||
log.Error().Msgf("error: %s", err.Error())
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,274 +0,0 @@
|
|||||||
package model
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
|
||||||
"text/template"
|
|
||||||
|
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
|
||||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
|
||||||
llama "github.com/go-skynet/go-llama.cpp"
|
|
||||||
)
|
|
||||||
|
|
||||||
type ModelLoader struct {
|
|
||||||
modelPath string
|
|
||||||
mu sync.Mutex
|
|
||||||
|
|
||||||
models map[string]*llama.LLama
|
|
||||||
gptmodels map[string]*gptj.GPTJ
|
|
||||||
gpt2models map[string]*gpt2.GPT2
|
|
||||||
gptstablelmmodels map[string]*gpt2.StableLM
|
|
||||||
|
|
||||||
promptsTemplates map[string]*template.Template
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewModelLoader(modelPath string) *ModelLoader {
|
|
||||||
return &ModelLoader{
|
|
||||||
modelPath: modelPath,
|
|
||||||
gpt2models: make(map[string]*gpt2.GPT2),
|
|
||||||
gptmodels: make(map[string]*gptj.GPTJ),
|
|
||||||
gptstablelmmodels: make(map[string]*gpt2.StableLM),
|
|
||||||
models: make(map[string]*llama.LLama),
|
|
||||||
promptsTemplates: make(map[string]*template.Template),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ml *ModelLoader) ExistsInModelPath(s string) bool {
|
|
||||||
_, err := os.Stat(filepath.Join(ml.modelPath, s))
|
|
||||||
return err == nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ml *ModelLoader) ListModels() ([]string, error) {
|
|
||||||
files, err := ioutil.ReadDir(ml.modelPath)
|
|
||||||
if err != nil {
|
|
||||||
return []string{}, err
|
|
||||||
}
|
|
||||||
|
|
||||||
models := []string{}
|
|
||||||
for _, file := range files {
|
|
||||||
// Skip templates, YAML and .keep files
|
|
||||||
if strings.HasSuffix(file.Name(), ".tmpl") || strings.HasSuffix(file.Name(), ".keep") || strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
models = append(models, file.Name())
|
|
||||||
}
|
|
||||||
|
|
||||||
return models, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string, error) {
|
|
||||||
ml.mu.Lock()
|
|
||||||
defer ml.mu.Unlock()
|
|
||||||
|
|
||||||
m, ok := ml.promptsTemplates[modelName]
|
|
||||||
if !ok {
|
|
||||||
return "", fmt.Errorf("no prompt template available")
|
|
||||||
}
|
|
||||||
|
|
||||||
var buf bytes.Buffer
|
|
||||||
|
|
||||||
if err := m.Execute(&buf, in); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
return buf.String(), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
|
|
||||||
// Check if the template was already loaded
|
|
||||||
if _, ok := ml.promptsTemplates[modelName]; ok {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if the model path exists
|
|
||||||
// skip any error here - we run anyway if a template is not exist
|
|
||||||
modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName)
|
|
||||||
|
|
||||||
if !ml.ExistsInModelPath(modelTemplateFile) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
dat, err := os.ReadFile(filepath.Join(ml.modelPath, modelTemplateFile))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse the template
|
|
||||||
tmpl, err := template.New("prompt").Parse(string(dat))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
ml.promptsTemplates[modelName] = tmpl
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
|
|
||||||
ml.mu.Lock()
|
|
||||||
defer ml.mu.Unlock()
|
|
||||||
|
|
||||||
// Check if we already have a loaded model
|
|
||||||
if !ml.ExistsInModelPath(modelName) {
|
|
||||||
return nil, fmt.Errorf("model does not exist")
|
|
||||||
}
|
|
||||||
|
|
||||||
if m, ok := ml.gptstablelmmodels[modelName]; ok {
|
|
||||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
||||||
return m, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load the model and keep it in memory for later use
|
|
||||||
modelFile := filepath.Join(ml.modelPath, modelName)
|
|
||||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
||||||
|
|
||||||
model, err := gpt2.NewStableLM(modelFile)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// If there is a prompt template, load it
|
|
||||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
ml.gptstablelmmodels[modelName] = model
|
|
||||||
return model, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
|
|
||||||
ml.mu.Lock()
|
|
||||||
defer ml.mu.Unlock()
|
|
||||||
|
|
||||||
// Check if we already have a loaded model
|
|
||||||
if !ml.ExistsInModelPath(modelName) {
|
|
||||||
return nil, fmt.Errorf("model does not exist")
|
|
||||||
}
|
|
||||||
|
|
||||||
if m, ok := ml.gpt2models[modelName]; ok {
|
|
||||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
||||||
return m, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: This needs refactoring, it's really bad to have it in here
|
|
||||||
// Check if we have a GPTStable model loaded instead - if we do we return an error so the API tries with StableLM
|
|
||||||
if _, ok := ml.gptstablelmmodels[modelName]; ok {
|
|
||||||
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
|
|
||||||
return nil, fmt.Errorf("this model is a GPTStableLM one")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load the model and keep it in memory for later use
|
|
||||||
modelFile := filepath.Join(ml.modelPath, modelName)
|
|
||||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
||||||
|
|
||||||
model, err := gpt2.New(modelFile)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// If there is a prompt template, load it
|
|
||||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
ml.gpt2models[modelName] = model
|
|
||||||
return model, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
|
|
||||||
ml.mu.Lock()
|
|
||||||
defer ml.mu.Unlock()
|
|
||||||
|
|
||||||
// Check if we already have a loaded model
|
|
||||||
if !ml.ExistsInModelPath(modelName) {
|
|
||||||
return nil, fmt.Errorf("model does not exist")
|
|
||||||
}
|
|
||||||
|
|
||||||
if m, ok := ml.gptmodels[modelName]; ok {
|
|
||||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
||||||
return m, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: This needs refactoring, it's really bad to have it in here
|
|
||||||
// Check if we have a GPT2 model loaded instead - if we do we return an error so the API tries with GPT2
|
|
||||||
if _, ok := ml.gpt2models[modelName]; ok {
|
|
||||||
log.Debug().Msgf("Model is GPT2: %s", modelName)
|
|
||||||
return nil, fmt.Errorf("this model is a GPT2 one")
|
|
||||||
}
|
|
||||||
if _, ok := ml.gptstablelmmodels[modelName]; ok {
|
|
||||||
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
|
|
||||||
return nil, fmt.Errorf("this model is a GPTStableLM one")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load the model and keep it in memory for later use
|
|
||||||
modelFile := filepath.Join(ml.modelPath, modelName)
|
|
||||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
||||||
|
|
||||||
model, err := gptj.New(modelFile)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// If there is a prompt template, load it
|
|
||||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
ml.gptmodels[modelName] = model
|
|
||||||
return model, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
|
|
||||||
ml.mu.Lock()
|
|
||||||
defer ml.mu.Unlock()
|
|
||||||
|
|
||||||
log.Debug().Msgf("Loading model name: %s", modelName)
|
|
||||||
|
|
||||||
// Check if we already have a loaded model
|
|
||||||
if !ml.ExistsInModelPath(modelName) {
|
|
||||||
return nil, fmt.Errorf("model does not exist")
|
|
||||||
}
|
|
||||||
|
|
||||||
if m, ok := ml.models[modelName]; ok {
|
|
||||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
||||||
return m, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: This needs refactoring, it's really bad to have it in here
|
|
||||||
// Check if we have a GPTJ model loaded instead - if we do we return an error so the API tries with GPTJ
|
|
||||||
if _, ok := ml.gptmodels[modelName]; ok {
|
|
||||||
log.Debug().Msgf("Model is GPTJ: %s", modelName)
|
|
||||||
return nil, fmt.Errorf("this model is a GPTJ one")
|
|
||||||
}
|
|
||||||
if _, ok := ml.gpt2models[modelName]; ok {
|
|
||||||
log.Debug().Msgf("Model is GPT2: %s", modelName)
|
|
||||||
return nil, fmt.Errorf("this model is a GPT2 one")
|
|
||||||
}
|
|
||||||
if _, ok := ml.gptstablelmmodels[modelName]; ok {
|
|
||||||
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
|
|
||||||
return nil, fmt.Errorf("this model is a GPTStableLM one")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load the model and keep it in memory for later use
|
|
||||||
modelFile := filepath.Join(ml.modelPath, modelName)
|
|
||||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
||||||
|
|
||||||
model, err := llama.New(modelFile, opts...)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// If there is a prompt template, load it
|
|
||||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
ml.models[modelName] = model
|
|
||||||
return model, err
|
|
||||||
}
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
|
||||||
|
|
||||||
### Instruction:
|
|
||||||
{{.Input}}
|
|
||||||
|
|
||||||
### Response:
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
|
||||||
### Prompt:
|
|
||||||
{{.Input}}
|
|
||||||
### Response:
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
BEGINNING OF CONVERSATION: USER: {{.Input}} GPT:
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
|
||||||
|
|
||||||
### Instruction:
|
|
||||||
{{.Input}}
|
|
||||||
|
|
||||||
### Response:
|
|
||||||
Reference in New Issue
Block a user