mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
12d83a4184 | ||
|
|
045412e8dd | ||
|
|
9896a9a58b | ||
|
|
b9011bda59 | ||
|
|
2b2f5fa36a | ||
|
|
43c557dc5c | ||
|
|
7abb2c9bd7 | ||
|
|
7a9ea4480a | ||
|
|
31bcc558de | ||
|
|
676e15f785 | ||
|
|
3e71c90949 | ||
|
|
550ae9c968 | ||
|
|
1c872ec326 | ||
|
|
05f35b182c | ||
|
|
79791438fe | ||
|
|
bf20cc34f6 | ||
|
|
5cba71de70 | ||
|
|
4b7e83056d | ||
|
|
ed954d66c3 | ||
|
|
f816dfae65 | ||
|
|
142bcd66ca | ||
|
|
1c4fbaae20 |
3
.devcontainer/Dockerfile
Normal file
3
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,3 @@
|
||||
ARG GO_VERSION=1.20
|
||||
FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
|
||||
RUN apt-get update && apt-get install -y cmake
|
||||
46
.devcontainer/devcontainer.json
Normal file
46
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
|
||||
{
|
||||
"name": "Existing Docker Compose (Extend)",
|
||||
|
||||
// Update the 'dockerComposeFile' list if you have more compose files or use different names.
|
||||
// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
|
||||
"dockerComposeFile": [
|
||||
"../docker-compose.yaml",
|
||||
"docker-compose.yml"
|
||||
],
|
||||
|
||||
// The 'service' property is the name of the service for the container that VS Code should
|
||||
// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
|
||||
"service": "api",
|
||||
|
||||
// The optional 'workspaceFolder' property is the path VS Code should open by default when
|
||||
// connected. This is typically a file mount in .devcontainer/docker-compose.yml
|
||||
"workspaceFolder": "/workspace",
|
||||
|
||||
"features": {
|
||||
"ghcr.io/devcontainers/features/go:1": {},
|
||||
"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
|
||||
},
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Uncomment the next line if you want start specific services in your Docker Compose config.
|
||||
// "runServices": [],
|
||||
|
||||
// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
|
||||
// "shutdownAction": "none",
|
||||
|
||||
// Uncomment the next line to run commands after the container is created.
|
||||
"postCreateCommand": "make prepare"
|
||||
|
||||
// Configure tool-specific properties.
|
||||
// "customizations": {},
|
||||
|
||||
// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "devcontainer"
|
||||
}
|
||||
26
.devcontainer/docker-compose.yml
Normal file
26
.devcontainer/docker-compose.yml
Normal file
@@ -0,0 +1,26 @@
|
||||
version: '3.6'
|
||||
services:
|
||||
# Update this to the name of the service you want to work with in your docker-compose.yml file
|
||||
api:
|
||||
# Uncomment if you want to override the service's Dockerfile to one in the .devcontainer
|
||||
# folder. Note that the path of the Dockerfile and context is relative to the *primary*
|
||||
# docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
|
||||
# array). The sample below assumes your primary file is in the root of your project.
|
||||
#
|
||||
build:
|
||||
context: .
|
||||
dockerfile: .devcontainer/Dockerfile
|
||||
|
||||
volumes:
|
||||
# Update this to wherever you want VS Code to mount the folder of your project
|
||||
- .:/workspace:cached
|
||||
|
||||
# Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
|
||||
# cap_add:
|
||||
# - SYS_PTRACE
|
||||
# security_opt:
|
||||
# - seccomp:unconfined
|
||||
|
||||
# Overrides default command so things don't shut down after the process ends.
|
||||
command: /bin/sh -c "while sleep 1000; do :; done"
|
||||
|
||||
7
.env
7
.env
@@ -1,4 +1,5 @@
|
||||
THREADS=14
|
||||
CONTEXT_SIZE=512
|
||||
# THREADS=14
|
||||
# CONTEXT_SIZE=512
|
||||
MODELS_PATH=/models
|
||||
# DEBUG=true
|
||||
# DEBUG=true
|
||||
# BUILD_TYPE=generic
|
||||
|
||||
44
.github/workflows/test.yml
vendored
Normal file
44
.github/workflows/test.yml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
---
|
||||
name: 'tests'
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
jobs:
|
||||
ubuntu-latest:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v1
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential
|
||||
- name: Test
|
||||
run: |
|
||||
make test
|
||||
|
||||
macOS-latest:
|
||||
runs-on: macOS-latest
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v1
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew update
|
||||
brew install sdl2
|
||||
- name: Test
|
||||
run: |
|
||||
make test
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -1,11 +1,15 @@
|
||||
# go-llama build artifacts
|
||||
go-llama
|
||||
go-gpt4all-j
|
||||
go-gpt2
|
||||
|
||||
# LocalAI build binary
|
||||
LocalAI
|
||||
local-ai
|
||||
# prevent above rules from omitting the helm chart
|
||||
!charts/*
|
||||
|
||||
# Ignore models
|
||||
models/*.bin
|
||||
models/ggml-*
|
||||
models/ggml-*
|
||||
test-models/
|
||||
28
.vscode/launch.json
vendored
28
.vscode/launch.json
vendored
@@ -1,16 +1,20 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "Launch Go",
|
||||
"type": "go",
|
||||
"request": "launch",
|
||||
"mode": "debug",
|
||||
"program": "${workspaceFolder}/main.go",
|
||||
"args": [
|
||||
"api"
|
||||
]
|
||||
}
|
||||
{
|
||||
"name": "Launch Go",
|
||||
"type": "go",
|
||||
"request": "launch",
|
||||
"mode": "debug",
|
||||
"program": "${workspaceFolder}/main.go",
|
||||
"args": [
|
||||
"api"
|
||||
],
|
||||
"env": {
|
||||
"C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
|
||||
"LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
|
||||
"DEBUG": "true"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,12 @@
|
||||
ARG GO_VERSION=1.20
|
||||
ARG DEBIAN_VERSION=11
|
||||
ARG BUILD_TYPE=
|
||||
|
||||
FROM golang:$GO_VERSION as builder
|
||||
WORKDIR /build
|
||||
RUN apt-get update && apt-get install -y cmake
|
||||
COPY . .
|
||||
ARG BUILD_TYPE=
|
||||
RUN make build${BUILD_TYPE}
|
||||
RUN make build
|
||||
|
||||
FROM debian:$DEBIAN_VERSION
|
||||
COPY --from=builder /build/local-ai /usr/bin/local-ai
|
||||
|
||||
82
Makefile
82
Makefile
@@ -2,7 +2,12 @@ GOCMD=go
|
||||
GOTEST=$(GOCMD) test
|
||||
GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
GOLLAMA_VERSION?=llama.cpp-5ecff35
|
||||
# renovate: datasource=github-tags depName=go-skynet/go-llama.cpp
|
||||
GOLLAMA_VERSION?=llama.cpp-25d7abb
|
||||
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt4all-j.cpp currentValueTemplate=master depNameTemplate=go-gpt4all-j.cpp
|
||||
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
|
||||
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt2.cpp currentValueTemplate=master depNameTemplate=go-gpt2.cpp
|
||||
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
|
||||
|
||||
GREEN := $(shell tput -Txterm setaf 2)
|
||||
YELLOW := $(shell tput -Txterm setaf 3)
|
||||
@@ -10,6 +15,20 @@ WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
|
||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
|
||||
|
||||
# Use this if you want to set the default behavior
|
||||
ifndef BUILD_TYPE
|
||||
BUILD_TYPE:=default
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE), "generic")
|
||||
GENERIC_PREFIX:=generic-
|
||||
else
|
||||
GENERIC_PREFIX:=
|
||||
endif
|
||||
|
||||
.PHONY: all test build vendor
|
||||
|
||||
all: help
|
||||
@@ -17,54 +36,75 @@ all: help
|
||||
## Build:
|
||||
|
||||
build: prepare ## Build the project
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j $(GOCMD) build -o $(BINARY_NAME) ./
|
||||
$(info ${GREEN}I local-ai build info:${RESET})
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
|
||||
|
||||
buildgeneric: prepare-generic ## Build the project
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j $(GOCMD) build -o $(BINARY_NAME) ./
|
||||
generic-build: ## Build the project using generic
|
||||
BUILD_TYPE="generic" $(MAKE) build
|
||||
|
||||
## GPT4ALL-J
|
||||
go-gpt4all-j:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
||||
|
||||
go-gpt4all-j/libgptj.a: go-gpt4all-j
|
||||
$(MAKE) -C go-gpt4all-j libgptj.a
|
||||
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
|
||||
|
||||
go-gpt4all-j/libgptj.a-generic: go-gpt4all-j
|
||||
$(MAKE) -C go-gpt4all-j generic-libgptj.a
|
||||
# CEREBRAS GPT
|
||||
go-gpt2:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
|
||||
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION)
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
|
||||
|
||||
go-gpt2/libgpt2.a: go-gpt2
|
||||
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
|
||||
|
||||
|
||||
go-llama:
|
||||
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||
$(MAKE) -C go-llama libbinding.a
|
||||
|
||||
go-llama-generic:
|
||||
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||
$(MAKE) -C go-llama generic-libbinding.a
|
||||
go-llama/libbinding.a: go-llama
|
||||
$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
|
||||
|
||||
prepare: go-llama go-gpt4all-j/libgptj.a
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
|
||||
|
||||
prepare: go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a replace
|
||||
|
||||
prepare-generic: go-llama-generic go-gpt4all-j/libgptj.a-generic
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
|
||||
|
||||
clean: ## Remove build related file
|
||||
rm -fr ./go-llama
|
||||
rm -rf ./go-gpt4all-j
|
||||
rm -rf ./go-gpt2
|
||||
rm -rf $(BINARY_NAME)
|
||||
|
||||
## Run:
|
||||
run: prepare
|
||||
$(GOCMD) run ./ api
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
|
||||
|
||||
## Test:
|
||||
test: ## Run the tests of the project
|
||||
$(GOTEST) -v -race ./... $(OUTPUT_OPTIONS)
|
||||
test-models/testmodel:
|
||||
mkdir test-models
|
||||
wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
|
||||
|
||||
test: prepare test-models/testmodel
|
||||
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v ./...
|
||||
|
||||
## Help:
|
||||
help: ## Show this help.
|
||||
|
||||
175
README.md
175
README.md
@@ -7,18 +7,34 @@
|
||||
|
||||
> :warning: This project has been renamed from `llama-cli` to `LocalAI` to reflect the fact that we are focusing on a fast drop-in OpenAI API rather on the CLI interface. We think that there are already many projects that can be used as a CLI interface already, for instance [llama.cpp](https://github.com/ggerganov/llama.cpp) and [gpt4all](https://github.com/nomic-ai/gpt4all). If you are were using `llama-cli` for CLI interactions and want to keep using it, use older versions or please open up an issue - contributions are welcome!
|
||||
|
||||
LocalAI is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.
|
||||
|
||||
[](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml) [](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)
|
||||
|
||||
[](https://discord.gg/uJAeKSAGDy)
|
||||
|
||||
**LocalAI** is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.
|
||||
|
||||
- OpenAI compatible API
|
||||
- Supports multiple-models
|
||||
- Once loaded the first time, it keep models loaded in memory for faster inference
|
||||
- Provides a simple command line interface that allows text generation directly from the terminal
|
||||
- Support for prompt templates
|
||||
- Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).
|
||||
|
||||
Reddit post: https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/
|
||||
|
||||
## Model compatibility
|
||||
|
||||
It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) and also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all).
|
||||
It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) supports also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all) and [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml).
|
||||
|
||||
Tested with:
|
||||
- Vicuna
|
||||
- Alpaca
|
||||
- [GPT4ALL](https://github.com/nomic-ai/gpt4all)
|
||||
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin)
|
||||
- Koala
|
||||
- [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
|
||||
|
||||
It should also be compatible with StableLM and GPTNeoX ggml models (untested)
|
||||
|
||||
Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
|
||||
|
||||
@@ -41,7 +57,7 @@ cp your-model.bin models/
|
||||
# vim .env
|
||||
|
||||
# start with docker-compose
|
||||
docker compose up -d --build
|
||||
docker-compose up -d --build
|
||||
|
||||
# Now API is accessible at localhost:8080
|
||||
curl http://localhost:8080/v1/models
|
||||
@@ -54,6 +70,42 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
|
||||
}'
|
||||
```
|
||||
|
||||
### Example: Use GPT4ALL-J model
|
||||
|
||||
<details>
|
||||
|
||||
```bash
|
||||
# Clone LocalAI
|
||||
git clone https://github.com/go-skynet/LocalAI
|
||||
|
||||
cd LocalAI
|
||||
|
||||
# Download gpt4all-j to models/
|
||||
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||
|
||||
# Use a template from the examples
|
||||
cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
|
||||
|
||||
# (optional) Edit the .env file to set things like context size and threads
|
||||
# vim .env
|
||||
|
||||
# start with docker-compose
|
||||
docker-compose up -d --build
|
||||
|
||||
# Now API is accessible at localhost:8080
|
||||
curl http://localhost:8080/v1/models
|
||||
# {"object":"list","data":[{"id":"ggml-gpt4all-j","object":"model"}]}
|
||||
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "ggml-gpt4all-j",
|
||||
"messages": [{"role": "user", "content": "How are you?"}],
|
||||
"temperature": 0.9
|
||||
}'
|
||||
|
||||
# {"model":"ggml-gpt4all-j","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]}
|
||||
```
|
||||
</details>
|
||||
|
||||
## Prompt templates
|
||||
|
||||
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
|
||||
@@ -97,8 +149,6 @@ And you'll see:
|
||||
└───────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Note: Models have to end up with `.bin` so can be listed by the `/models` endpoint.
|
||||
|
||||
You can control the API server options with command line arguments:
|
||||
|
||||
```
|
||||
@@ -110,9 +160,10 @@ The API takes takes the following parameters:
|
||||
| Parameter | Environment Variable | Default Value | Description |
|
||||
| ------------ | -------------------- | ------------- | -------------------------------------- |
|
||||
| models-path | MODELS_PATH | | The path where you have models (ending with `.bin`). |
|
||||
| threads | THREADS | CPU cores | The number of threads to use for text generation. |
|
||||
| threads | THREADS | Number of Physical cores | The number of threads to use for text generation. |
|
||||
| address | ADDRESS | :8080 | The address and port to listen on. |
|
||||
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
|
||||
| debug | DEBUG | false | Enable debug mode. |
|
||||
|
||||
Once the server is running, you can start making requests to it using HTTP, using the OpenAI API.
|
||||
|
||||
@@ -122,10 +173,16 @@ Once the server is running, you can start making requests to it using HTTP, usin
|
||||
|
||||
You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create).
|
||||
|
||||
Following the list of endpoints/parameters supported.
|
||||
Following the list of endpoints/parameters supported.
|
||||
|
||||
Note:
|
||||
|
||||
- You can also specify the model a part of the OpenAI token.
|
||||
- If only one model is available, the API will use it for all the requests.
|
||||
|
||||
#### Chat completions
|
||||
|
||||
<details>
|
||||
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
|
||||
|
||||
```
|
||||
@@ -137,10 +194,12 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
|
||||
```
|
||||
|
||||
Available additional parameters: `top_p`, `top_k`, `max_tokens`
|
||||
</details>
|
||||
|
||||
#### Completions
|
||||
|
||||
For example, to generate a comletion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
|
||||
<details>
|
||||
For example, to generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
|
||||
```
|
||||
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "ggml-koala-7b-model-q4_0-r2.bin",
|
||||
@@ -151,14 +210,19 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
|
||||
|
||||
Available additional parameters: `top_p`, `top_k`, `max_tokens`
|
||||
|
||||
</details>
|
||||
|
||||
#### List models
|
||||
|
||||
<details>
|
||||
You can list all the models available with:
|
||||
|
||||
```
|
||||
curl http://localhost:8080/v1/models
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Using other models
|
||||
|
||||
gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
|
||||
@@ -173,15 +237,32 @@ python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
|
||||
# There will be a new model with the ".tmp" extension, you have to use that one!
|
||||
```
|
||||
|
||||
### Windows compatibility
|
||||
|
||||
## Helm Chart Installation (run LocalAI in Kubernetes)
|
||||
The local-ai Helm chart supports two options for the LocalAI server's models directory:
|
||||
1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
|
||||
|
||||
Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
|
||||
|
||||
2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
|
||||
|
||||
First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
|
||||
```bash
|
||||
helm install local-ai charts/local-ai -n local-ai --create-namespace
|
||||
```
|
||||
Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
|
||||
|
||||
Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
|
||||
```bash
|
||||
helm upgrade local-ai -n local-ai charts/local-ai
|
||||
```
|
||||
This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.
|
||||
|
||||
## Windows compatibility
|
||||
|
||||
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
|
||||
|
||||
### Kubernetes
|
||||
|
||||
You can run the API in Kubernetes, see an example deployment in [kubernetes](https://github.com/go-skynet/LocalAI/tree/master/kubernetes)
|
||||
|
||||
### Build locally
|
||||
## Build locally
|
||||
|
||||
Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
|
||||
|
||||
@@ -199,13 +280,71 @@ Or build the binary with `make`:
|
||||
make build
|
||||
```
|
||||
|
||||
## Frequently asked questions
|
||||
|
||||
Here are answers to some of the most common questions.
|
||||
|
||||
|
||||
### How do I get models?
|
||||
|
||||
<details>
|
||||
|
||||
Most ggml-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=ggml, or models from gpt4all should also work: https://github.com/nomic-ai/gpt4all.
|
||||
|
||||
</details>
|
||||
|
||||
### What's the difference with Serge, or XXX?
|
||||
|
||||
|
||||
<details>
|
||||
|
||||
LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference, easy to set up locally and deploy to Kubernetes.
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
### Can I use it with a Discord bot, or XXX?
|
||||
|
||||
<details>
|
||||
|
||||
Yes! If the client uses OpenAI and supports setting a different base URL to send requests to, you can use the LocalAI endpoint. This allows to use this with every application that was supposed to work with OpenAI, but without changing the application!
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
### Can this leverage GPUs?
|
||||
|
||||
<details>
|
||||
|
||||
Not currently, as ggml doesn't support GPUs yet: https://github.com/ggerganov/llama.cpp/discussions/915.
|
||||
|
||||
</details>
|
||||
|
||||
### Where is the webUI?
|
||||
|
||||
<details>
|
||||
We are working on to have a good out of the box experience - however as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
|
||||
|
||||
</details>
|
||||
|
||||
### Does it work with AutoGPT?
|
||||
|
||||
<details>
|
||||
|
||||
AutoGPT currently doesn't allow to set a different API URL, but there is a PR open for it, so this should be possible soon!
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
## Short-term roadmap
|
||||
|
||||
- [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)
|
||||
- Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
|
||||
- Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
|
||||
- [ ] Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
|
||||
- [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
|
||||
- [x] Multi-model support
|
||||
- Have a webUI!
|
||||
- [ ] Have a webUI!
|
||||
- [ ] Allow configuration of defaults for models.
|
||||
- [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models.
|
||||
|
||||
## License
|
||||
|
||||
|
||||
151
api/api.go
151
api/api.go
@@ -8,14 +8,28 @@ import (
|
||||
"sync"
|
||||
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// APIError provides error information returned by the OpenAI API.
|
||||
type APIError struct {
|
||||
Code any `json:"code,omitempty"`
|
||||
Message string `json:"message"`
|
||||
Param *string `json:"param,omitempty"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type ErrorResponse struct {
|
||||
Error *APIError `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type OpenAIResponse struct {
|
||||
Created int `json:"created,omitempty"`
|
||||
Object string `json:"chat.completion,omitempty"`
|
||||
@@ -47,6 +61,8 @@ type OpenAIRequest struct {
|
||||
// Prompt is read only by completion API calls
|
||||
Prompt string `json:"prompt"`
|
||||
|
||||
Stop string `json:"stop"`
|
||||
|
||||
// Messages is read only by chat/completion API calls
|
||||
Messages []Message `json:"messages"`
|
||||
|
||||
@@ -60,19 +76,23 @@ type OpenAIRequest struct {
|
||||
N int `json:"n"`
|
||||
|
||||
// Custom parameters - not present in the OpenAI API
|
||||
Batch int `json:"batch"`
|
||||
F16 bool `json:"f16kv"`
|
||||
IgnoreEOS bool `json:"ignore_eos"`
|
||||
Batch int `json:"batch"`
|
||||
F16 bool `json:"f16kv"`
|
||||
IgnoreEOS bool `json:"ignore_eos"`
|
||||
RepeatPenalty float64 `json:"repeat_penalty"`
|
||||
Keep int `json:"n_keep"`
|
||||
|
||||
Seed int `json:"seed"`
|
||||
}
|
||||
|
||||
// https://platform.openai.com/docs/api-reference/completions
|
||||
func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
|
||||
func openAIEndpoint(chat, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
var err error
|
||||
var model *llama.LLama
|
||||
var gptModel *gptj.GPTJ
|
||||
var gpt2Model *gpt2.GPT2
|
||||
var stableLMModel *gpt2.StableLM
|
||||
|
||||
input := new(OpenAIRequest)
|
||||
// Get input data from the request body
|
||||
@@ -87,17 +107,29 @@ func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
|
||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||
|
||||
// If no model was specified, take the first available
|
||||
if modelFile == "" {
|
||||
models, _ := loader.ListModels()
|
||||
if len(models) > 0 {
|
||||
modelFile = models[0]
|
||||
log.Debug().Msgf("No model specified, using: %s", modelFile)
|
||||
}
|
||||
}
|
||||
|
||||
// If no model is found or specified, we bail out
|
||||
if modelFile == "" && !bearerExists {
|
||||
return fmt.Errorf("no model specified")
|
||||
}
|
||||
|
||||
if bearerExists { // model specified in bearer token takes precedence
|
||||
// If a model is found in bearer token takes precedence
|
||||
if bearerExists {
|
||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||
modelFile = bearer
|
||||
}
|
||||
|
||||
// Try to load the model with both
|
||||
var llamaerr error
|
||||
// Try to load the model
|
||||
var llamaerr, gpt2err, gptjerr, stableerr error
|
||||
llamaOpts := []llama.ModelOption{}
|
||||
if ctx != 0 {
|
||||
llamaOpts = append(llamaOpts, llama.SetContext(ctx))
|
||||
@@ -106,11 +138,18 @@ func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16
|
||||
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
|
||||
}
|
||||
|
||||
// TODO: this is ugly, better identifying the model somehow! however, it is a good stab for a first implementation..
|
||||
model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...)
|
||||
if llamaerr != nil {
|
||||
gptModel, err = loader.LoadGPTJModel(modelFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("llama: %s gpt: %s", llamaerr.Error(), err.Error()) // llama failed first, so we want to catch both errors
|
||||
gptModel, gptjerr = loader.LoadGPTJModel(modelFile)
|
||||
if gptjerr != nil {
|
||||
gpt2Model, gpt2err = loader.LoadGPT2Model(modelFile)
|
||||
if gpt2err != nil {
|
||||
stableLMModel, stableerr = loader.LoadStableLMModel(modelFile)
|
||||
if stableerr != nil {
|
||||
return fmt.Errorf("llama: %s gpt: %s gpt2: %s stableLM: %s", llamaerr.Error(), gptjerr.Error(), gpt2err.Error(), stableerr.Error()) // llama failed first, so we want to catch both errors
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -176,6 +215,54 @@ func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16
|
||||
|
||||
var predFunc func() (string, error)
|
||||
switch {
|
||||
case stableLMModel != nil:
|
||||
predFunc = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(temperature),
|
||||
gpt2.SetTopP(topP),
|
||||
gpt2.SetTopK(topK),
|
||||
gpt2.SetTokens(tokens),
|
||||
gpt2.SetThreads(threads),
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
|
||||
}
|
||||
|
||||
if input.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
|
||||
}
|
||||
|
||||
return stableLMModel.Predict(
|
||||
predInput,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case gpt2Model != nil:
|
||||
predFunc = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
predictOptions := []gpt2.PredictOption{
|
||||
gpt2.SetTemperature(temperature),
|
||||
gpt2.SetTopP(topP),
|
||||
gpt2.SetTopK(topK),
|
||||
gpt2.SetTokens(tokens),
|
||||
gpt2.SetThreads(threads),
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
|
||||
}
|
||||
|
||||
if input.Seed != 0 {
|
||||
predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
|
||||
}
|
||||
|
||||
return gpt2Model.Predict(
|
||||
predInput,
|
||||
predictOptions...,
|
||||
)
|
||||
}
|
||||
case gptModel != nil:
|
||||
predFunc = func() (string, error) {
|
||||
// Generate the prediction using the language model
|
||||
@@ -211,6 +298,22 @@ func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16
|
||||
llama.SetThreads(threads),
|
||||
}
|
||||
|
||||
if debug {
|
||||
predictOptions = append(predictOptions, llama.Debug)
|
||||
}
|
||||
|
||||
if input.Stop != "" {
|
||||
predictOptions = append(predictOptions, llama.SetStopWords(input.Stop))
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetPenalty(input.RepeatPenalty))
|
||||
}
|
||||
|
||||
if input.Keep != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetNKeep(input.Keep))
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetBatch(input.Batch))
|
||||
}
|
||||
@@ -283,9 +386,15 @@ func listModels(loader *model.ModelLoader) func(ctx *fiber.Ctx) error {
|
||||
}
|
||||
}
|
||||
|
||||
func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f16 bool) error {
|
||||
func App(loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
|
||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||
if debug {
|
||||
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
||||
}
|
||||
|
||||
// Return errors as JSON responses
|
||||
app := fiber.New(fiber.Config{
|
||||
DisableStartupMessage: disableMessage,
|
||||
// Override default error handler
|
||||
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
|
||||
// Status code defaults to 500
|
||||
@@ -298,9 +407,11 @@ func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f
|
||||
}
|
||||
|
||||
// Send custom error page
|
||||
return ctx.Status(code).JSON(struct {
|
||||
Error string `json:"error"`
|
||||
}{Error: err.Error()})
|
||||
return ctx.Status(code).JSON(
|
||||
ErrorResponse{
|
||||
Error: &APIError{Message: err.Error(), Code: code},
|
||||
},
|
||||
)
|
||||
},
|
||||
})
|
||||
|
||||
@@ -313,16 +424,14 @@ func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f
|
||||
var mumutex = &sync.Mutex{}
|
||||
|
||||
// openAI compatible API endpoint
|
||||
app.Post("/v1/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/v1/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
|
||||
app.Post("/v1/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/v1/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
|
||||
app.Get("/v1/models", listModels(loader))
|
||||
app.Get("/models", listModels(loader))
|
||||
|
||||
// Start the server
|
||||
app.Listen(listenAddr)
|
||||
return nil
|
||||
return app
|
||||
}
|
||||
|
||||
58
api/api_test.go
Normal file
58
api/api_test.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package api_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
. "github.com/go-skynet/LocalAI/api"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/sashabaranov/go-openai"
|
||||
)
|
||||
|
||||
var _ = Describe("API test", func() {
|
||||
|
||||
var app *fiber.App
|
||||
var modelLoader *model.ModelLoader
|
||||
var client *openai.Client
|
||||
Context("API query", func() {
|
||||
BeforeEach(func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
app = App(modelLoader, 1, 512, false, false, true)
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
|
||||
defaultConfig := openai.DefaultConfig("")
|
||||
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
|
||||
|
||||
// Wait for API to be ready
|
||||
client = openai.NewClientWithConfig(defaultConfig)
|
||||
Eventually(func() error {
|
||||
_, err := client.ListModels(context.TODO())
|
||||
return err
|
||||
}, "2m").ShouldNot(HaveOccurred())
|
||||
})
|
||||
AfterEach(func() {
|
||||
app.Shutdown()
|
||||
})
|
||||
It("returns the models list", func() {
|
||||
models, err := client.ListModels(context.TODO())
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models.Models)).To(Equal(1))
|
||||
Expect(models.Models[0].ID).To(Equal("testmodel"))
|
||||
})
|
||||
It("can generate completions", func() {
|
||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||
})
|
||||
It("returns errors", func() {
|
||||
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
||||
Expect(err).To(HaveOccurred())
|
||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: llama: model does not exist"))
|
||||
})
|
||||
})
|
||||
})
|
||||
13
api/apt_suite_test.go
Normal file
13
api/apt_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package api_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestLocalAI(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "LocalAI test suite")
|
||||
}
|
||||
6
charts/local-ai/Chart.yaml
Normal file
6
charts/local-ai/Chart.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
apiVersion: v2
|
||||
appVersion: 0.1.0
|
||||
description: A Helm chart for LocalAI
|
||||
name: local-ai
|
||||
type: application
|
||||
version: 1.0.0
|
||||
44
charts/local-ai/templates/_helpers.tpl
Normal file
44
charts/local-ai/templates/_helpers.tpl
Normal file
@@ -0,0 +1,44 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "local-ai.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "local-ai.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "local-ai.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "local-ai.labels" -}}
|
||||
helm.sh/chart: {{ include "local-ai.chart" . }}
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
app.kubernetes.io/instance: "{{ .Release.Name }}"
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
39
charts/local-ai/templates/data-volume.yaml
Normal file
39
charts/local-ai/templates/data-volume.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
{{- if .Values.dataVolume.enabled }}
|
||||
apiVersion: cdi.kubevirt.io/v1beta1
|
||||
kind: DataVolume
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
spec:
|
||||
contentType: archive
|
||||
source:
|
||||
{{ .Values.dataVolume.source.type }}:
|
||||
url: {{ .Values.dataVolume.source.url }}
|
||||
secretRef: {{ template "local-ai.fullname" . }}
|
||||
{{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
|
||||
secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
|
||||
{{- end }}
|
||||
{{- if .Values.dataVolume.source.caCertConfigMap }}
|
||||
caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
|
||||
{{- end }}
|
||||
pvc:
|
||||
accessModes: {{ .Values.dataVolume.pvc.accessModes }}
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.dataVolume.pvc.size }}
|
||||
---
|
||||
{{- if .Values.dataVolume.secret.enabled }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
data:
|
||||
accessKeyId: {{ .Values.dataVolume.secret.username }}
|
||||
secretKey: {{ .Values.dataVolume.secret.password }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
39
charts/local-ai/templates/deployment.yaml
Normal file
39
charts/local-ai/templates/deployment.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
spec:
|
||||
containers:
|
||||
- name: {{ template "local-ai.fullname" . }}
|
||||
image: {{ .Values.deployment.image }}
|
||||
env:
|
||||
- name: THREADS
|
||||
value: {{ .Values.deployment.env.threads | quote }}
|
||||
- name: CONTEXT_SIZE
|
||||
value: {{ .Values.deployment.env.contextSize | quote }}
|
||||
- name: MODELS_PATH
|
||||
value: {{ .Values.deployment.env.modelsPath }}
|
||||
{{- if .Values.deployment.volume.enabled }}
|
||||
volumeMounts:
|
||||
- mountPath: {{ .Values.deployment.env.modelsPath }}
|
||||
name: models
|
||||
volumes:
|
||||
- name: models
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ template "local-ai.fullname" . }}
|
||||
{{- end }}
|
||||
19
charts/local-ai/templates/service.yaml
Normal file
19
charts/local-ai/templates/service.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
{{- if .Values.service.annotations }}
|
||||
annotations:
|
||||
{{ toYaml .Values.service.annotations | indent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
type: "{{ .Values.service.type }}"
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
38
charts/local-ai/values.yaml
Normal file
38
charts/local-ai/values.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
deployment:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
env:
|
||||
threads: 14
|
||||
contextSize: 512
|
||||
modelsPath: "/models"
|
||||
volume:
|
||||
enabled: false
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
annotations: {}
|
||||
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
|
||||
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
||||
|
||||
# Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
|
||||
# (requires https://github.com/kubevirt/containerized-data-importer)
|
||||
dataVolume:
|
||||
enabled: false
|
||||
source:
|
||||
type: "http" # Source type. One of: [ http | s3 ]
|
||||
url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
|
||||
|
||||
# CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
|
||||
# and a base64 encoded pem certificate
|
||||
caCertConfigMap: ""
|
||||
|
||||
# SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
|
||||
# that may include sensitive information. Only applicable for the http source type.
|
||||
secretExtraHeaders: []
|
||||
pvc:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
size: 5Gi
|
||||
secret:
|
||||
enabled: false
|
||||
username: "" # base64 encoded
|
||||
password: "" # base64 encoded
|
||||
@@ -6,14 +6,10 @@ services:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
# args:
|
||||
# BUILD_TYPE: generic # Uncomment to build CPU generic code that works on most HW
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- MODELS_PATH=$MODELS_PATH
|
||||
- CONTEXT_SIZE=$CONTEXT_SIZE
|
||||
- THREADS=$THREADS
|
||||
- DEBUG=$DEBUG
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
28
go.mod
28
go.mod
@@ -3,22 +3,38 @@ module github.com/go-skynet/LocalAI
|
||||
go 1.19
|
||||
|
||||
require (
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04
|
||||
github.com/gofiber/fiber/v2 v2.42.0
|
||||
github.com/jaypipes/ghw v0.10.0
|
||||
github.com/onsi/ginkgo/v2 v2.9.2
|
||||
github.com/onsi/gomega v1.27.6
|
||||
github.com/rs/zerolog v1.29.1
|
||||
github.com/urfave/cli/v2 v2.25.0
|
||||
github.com/sashabaranov/go-openai v1.9.0
|
||||
github.com/urfave/cli/v2 v2.25.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/StackExchange/wmi v1.2.1 // indirect
|
||||
github.com/andybalholm/brotli v1.0.4 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||
github.com/ghodss/yaml v1.0.0 // indirect
|
||||
github.com/go-logr/logr v1.2.3 // indirect
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
||||
github.com/google/go-cmp v0.5.9 // indirect
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/jaypipes/pcidb v1.0.0 // indirect
|
||||
github.com/klauspost/compress v1.15.9 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.17 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.14 // indirect
|
||||
github.com/mitchellh/go-homedir v1.1.0 // indirect
|
||||
github.com/philhofer/fwd v1.1.1 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 // indirect
|
||||
@@ -28,5 +44,11 @@ require (
|
||||
github.com/valyala/fasthttp v1.44.0 // indirect
|
||||
github.com/valyala/tcplisten v1.0.0 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
||||
golang.org/x/net v0.8.0 // indirect
|
||||
golang.org/x/sys v0.6.0 // indirect
|
||||
golang.org/x/text v0.8.0 // indirect
|
||||
golang.org/x/tools v0.7.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
howett.net/plist v1.0.0 // indirect
|
||||
)
|
||||
|
||||
69
go.sum
69
go.sum
@@ -1,23 +1,59 @@
|
||||
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
|
||||
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
|
||||
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
|
||||
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
||||
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
|
||||
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
||||
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
|
||||
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
|
||||
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4 h1:GkGuqnhDFKlCsT6Bo8sdY00A7rFXCzfU1nBOSS4ZnYM=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ6JsAm9Q1A8I8j9YfNy10bmIfwOiyGyU5wQ=
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94 h1:rtrrMvlIq+g0/ltXjDdLeNtz0uc4wJ4Qs15GFU4ba4c=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640 h1:8SSVbQ3yvq7JnfLCLF4USV0PkQnnduUkaNCv/hHDa3E=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead h1:C+lcH1srw+c0qPDx1WF8zjGiiOqoPxVICt7bI1sj5cM=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04 h1:NPiv7mcIjU71MhEv3v4RRWKSBNXnnCyu6VX4CaaHz2I=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230424120713-e45cebe33c04/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
|
||||
github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
|
||||
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
|
||||
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
|
||||
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||
github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
|
||||
github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
|
||||
github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8=
|
||||
github.com/jaypipes/pcidb v1.0.0/go.mod h1:TnYUvqhPBzCKnH34KrIX22kAeEbDCSRJ9cqLRCuNDfk=
|
||||
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
|
||||
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
|
||||
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
|
||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
|
||||
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||
@@ -27,11 +63,18 @@ github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPn
|
||||
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
|
||||
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||
github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
|
||||
github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
|
||||
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
|
||||
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
|
||||
github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
|
||||
github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
@@ -39,14 +82,21 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
|
||||
github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/sashabaranov/go-openai v1.9.0 h1:NoiO++IISxxJ1pRc0n7uZvMGMake0G+FJ1XPwXtprsA=
|
||||
github.com/sashabaranov/go-openai v1.9.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
|
||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
|
||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
|
||||
github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
|
||||
github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
|
||||
github.com/urfave/cli/v2 v2.25.0/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
|
||||
github.com/urfave/cli/v2 v2.25.1 h1:zw8dSP7ghX0Gmm8vugrs6q9Ku0wzweqPyshy+syu9Gw=
|
||||
github.com/urfave/cli/v2 v2.25.1/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
|
||||
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||
github.com/valyala/fasthttp v1.44.0 h1:R+gLUhldIsfg1HokMuQjdQ5bh9nuXHPIfvkYUu9eR5Q=
|
||||
@@ -67,10 +117,13 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY
|
||||
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
|
||||
golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
|
||||
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
@@ -88,11 +141,23 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
|
||||
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
|
||||
golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
||||
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
|
||||
howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
# Create a PVC containing a model binary, sourced from an arbitrary HTTP server
|
||||
# (requires https://github.com/kubevirt/containerized-data-importer)
|
||||
apiVersion: cdi.kubevirt.io/v1beta1
|
||||
kind: DataVolume
|
||||
metadata:
|
||||
name: models
|
||||
namespace: local-ai
|
||||
spec:
|
||||
contentType: archive
|
||||
source:
|
||||
http:
|
||||
url: http://<model_server>/koala-7B-4bit-128g.GGML.tar
|
||||
secretRef: model-secret
|
||||
pvc:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: model-secret
|
||||
namespace: local-ai
|
||||
data:
|
||||
accessKeyId: <model_server_username_base64_encoded>
|
||||
secretKey: <model_server_password_base64_encoded>
|
||||
@@ -1,57 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: local-ai
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: local-ai
|
||||
namespace: local-ai
|
||||
labels:
|
||||
app: local-ai
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: local-ai
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: local-ai
|
||||
name: local-ai
|
||||
spec:
|
||||
containers:
|
||||
- name: local-ai
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
env:
|
||||
- name: THREADS
|
||||
value: "14"
|
||||
- name: CONTEXT_SIZE
|
||||
value: "512"
|
||||
- name: MODELS_PATH
|
||||
value: /models
|
||||
volumeMounts:
|
||||
- mountPath: /models
|
||||
name: models
|
||||
volumes:
|
||||
- name: models
|
||||
persistentVolumeClaim:
|
||||
claimName: models
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: local-ai
|
||||
namespace: local-ai
|
||||
# If using AWS, you'll need to override the default 60s load balancer idle timeout
|
||||
# annotations:
|
||||
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
||||
spec:
|
||||
selector:
|
||||
app: local-ai
|
||||
type: LoadBalancer
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
21
main.go
21
main.go
@@ -2,13 +2,12 @@ package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
api "github.com/go-skynet/LocalAI/api"
|
||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/jaypipes/ghw"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
@@ -21,6 +20,12 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
threads := 4
|
||||
cpu, err := ghw.CPU()
|
||||
if err == nil {
|
||||
threads = int(cpu.TotalCores)
|
||||
}
|
||||
|
||||
app := &cli.App{
|
||||
Name: "LocalAI",
|
||||
Usage: "OpenAI compatible API for running LLaMA/GPT models locally on CPU with consumer grade hardware.",
|
||||
@@ -37,7 +42,7 @@ func main() {
|
||||
Name: "threads",
|
||||
DefaultText: "Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested.",
|
||||
EnvVars: []string{"THREADS"},
|
||||
Value: runtime.NumCPU(),
|
||||
Value: threads,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "models-path",
|
||||
@@ -66,18 +71,16 @@ Some of the models compatible are:
|
||||
- Koala
|
||||
- GPT4ALL
|
||||
- GPT4ALL-J
|
||||
- Cerebras
|
||||
- Alpaca
|
||||
- StableLM (ggml quantized)
|
||||
|
||||
It uses llama.cpp and gpt4all as backend, supporting all the models supported by both.
|
||||
It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
|
||||
`,
|
||||
UsageText: `local-ai [options]`,
|
||||
Copyright: "go-skynet authors",
|
||||
Action: func(ctx *cli.Context) error {
|
||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||
if ctx.Bool("debug") {
|
||||
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
||||
}
|
||||
return api.Start(model.NewModelLoader(ctx.String("models-path")), ctx.String("address"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"))
|
||||
return api.App(model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -12,20 +12,32 @@ import (
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||
llama "github.com/go-skynet/go-llama.cpp"
|
||||
)
|
||||
|
||||
type ModelLoader struct {
|
||||
modelPath string
|
||||
mu sync.Mutex
|
||||
models map[string]*llama.LLama
|
||||
gptmodels map[string]*gptj.GPTJ
|
||||
modelPath string
|
||||
mu sync.Mutex
|
||||
|
||||
models map[string]*llama.LLama
|
||||
gptmodels map[string]*gptj.GPTJ
|
||||
gpt2models map[string]*gpt2.GPT2
|
||||
gptstablelmmodels map[string]*gpt2.StableLM
|
||||
|
||||
promptsTemplates map[string]*template.Template
|
||||
}
|
||||
|
||||
func NewModelLoader(modelPath string) *ModelLoader {
|
||||
return &ModelLoader{modelPath: modelPath, gptmodels: make(map[string]*gptj.GPTJ), models: make(map[string]*llama.LLama), promptsTemplates: make(map[string]*template.Template)}
|
||||
return &ModelLoader{
|
||||
modelPath: modelPath,
|
||||
gpt2models: make(map[string]*gpt2.GPT2),
|
||||
gptmodels: make(map[string]*gptj.GPTJ),
|
||||
gptstablelmmodels: make(map[string]*gpt2.StableLM),
|
||||
models: make(map[string]*llama.LLama),
|
||||
promptsTemplates: make(map[string]*template.Template),
|
||||
}
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) ExistsInModelPath(s string) bool {
|
||||
@@ -98,6 +110,77 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.modelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gpt2.NewStableLM(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.gptstablelmmodels[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
|
||||
// Check if we already have a loaded model
|
||||
if !ml.ExistsInModelPath(modelName) {
|
||||
return nil, fmt.Errorf("model does not exist")
|
||||
}
|
||||
|
||||
if m, ok := ml.gpt2models[modelName]; ok {
|
||||
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// TODO: This needs refactoring, it's really bad to have it in here
|
||||
// Check if we have a GPTStable model loaded instead - if we do we return an error so the API tries with StableLM
|
||||
if _, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPTStableLM one")
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.modelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
|
||||
model, err := gpt2.New(modelFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a prompt template, load it
|
||||
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ml.gpt2models[modelName] = model
|
||||
return model, err
|
||||
}
|
||||
|
||||
func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
|
||||
ml.mu.Lock()
|
||||
defer ml.mu.Unlock()
|
||||
@@ -112,6 +195,17 @@ func (ml *ModelLoader) LoadGPTJModel(modelName string) (*gptj.GPTJ, error) {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// TODO: This needs refactoring, it's really bad to have it in here
|
||||
// Check if we have a GPT2 model loaded instead - if we do we return an error so the API tries with GPT2
|
||||
if _, ok := ml.gpt2models[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPT2: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPT2 one")
|
||||
}
|
||||
if _, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPTStableLM one")
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.modelPath, modelName)
|
||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||
@@ -152,6 +246,14 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio
|
||||
log.Debug().Msgf("Model is GPTJ: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPTJ one")
|
||||
}
|
||||
if _, ok := ml.gpt2models[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPT2: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPT2 one")
|
||||
}
|
||||
if _, ok := ml.gptstablelmmodels[modelName]; ok {
|
||||
log.Debug().Msgf("Model is GPTStableLM: %s", modelName)
|
||||
return nil, fmt.Errorf("this model is a GPTStableLM one")
|
||||
}
|
||||
|
||||
// Load the model and keep it in memory for later use
|
||||
modelFile := filepath.Join(ml.modelPath, modelName)
|
||||
|
||||
17
renovate.json
Normal file
17
renovate.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
||||
"extends": [
|
||||
"config:base"
|
||||
],
|
||||
"regexManagers": [
|
||||
{
|
||||
"fileMatch": [
|
||||
"^Makefile$"
|
||||
],
|
||||
"matchStrings": [
|
||||
"#\\s*renovate:\\s*datasource=(?<datasource>.*?) depName=(?<depName>.*?)( datasourceTemplate=(?<datasourceTemplate>.*?))?( packageNameTemplate=(?<packageNameTemplate>.*?))?( depNameTemplate=(?<depNameTemplate>.*?))?( valueTemplate=(?<currentValueTemplate>.*?))?( versioning=(?<versioning>.*?))?\\s+.+_VERSION=(?<currentValue>.*?)\\s"
|
||||
],
|
||||
"versioningTemplate": "{{#if versioning}}{{versioning}}{{/if}}"
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user