mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
676e15f785 | ||
|
|
3e71c90949 | ||
|
|
550ae9c968 | ||
|
|
1c872ec326 | ||
|
|
05f35b182c | ||
|
|
79791438fe | ||
|
|
bf20cc34f6 | ||
|
|
5cba71de70 | ||
|
|
4b7e83056d |
3
.devcontainer/Dockerfile
Normal file
3
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,3 @@
|
||||
ARG GO_VERSION=1.20
|
||||
FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
|
||||
RUN apt-get update && apt-get install -y cmake
|
||||
46
.devcontainer/devcontainer.json
Normal file
46
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,46 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
|
||||
{
|
||||
"name": "Existing Docker Compose (Extend)",
|
||||
|
||||
// Update the 'dockerComposeFile' list if you have more compose files or use different names.
|
||||
// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
|
||||
"dockerComposeFile": [
|
||||
"../docker-compose.yaml",
|
||||
"docker-compose.yml"
|
||||
],
|
||||
|
||||
// The 'service' property is the name of the service for the container that VS Code should
|
||||
// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
|
||||
"service": "api",
|
||||
|
||||
// The optional 'workspaceFolder' property is the path VS Code should open by default when
|
||||
// connected. This is typically a file mount in .devcontainer/docker-compose.yml
|
||||
"workspaceFolder": "/workspace",
|
||||
|
||||
"features": {
|
||||
"ghcr.io/devcontainers/features/go:1": {},
|
||||
"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
|
||||
},
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
// "features": {},
|
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Uncomment the next line if you want start specific services in your Docker Compose config.
|
||||
// "runServices": [],
|
||||
|
||||
// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
|
||||
// "shutdownAction": "none",
|
||||
|
||||
// Uncomment the next line to run commands after the container is created.
|
||||
"postCreateCommand": "make prepare"
|
||||
|
||||
// Configure tool-specific properties.
|
||||
// "customizations": {},
|
||||
|
||||
// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "devcontainer"
|
||||
}
|
||||
26
.devcontainer/docker-compose.yml
Normal file
26
.devcontainer/docker-compose.yml
Normal file
@@ -0,0 +1,26 @@
|
||||
version: '3.6'
|
||||
services:
|
||||
# Update this to the name of the service you want to work with in your docker-compose.yml file
|
||||
api:
|
||||
# Uncomment if you want to override the service's Dockerfile to one in the .devcontainer
|
||||
# folder. Note that the path of the Dockerfile and context is relative to the *primary*
|
||||
# docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
|
||||
# array). The sample below assumes your primary file is in the root of your project.
|
||||
#
|
||||
build:
|
||||
context: .
|
||||
dockerfile: .devcontainer/Dockerfile
|
||||
|
||||
volumes:
|
||||
# Update this to wherever you want VS Code to mount the folder of your project
|
||||
- .:/workspace:cached
|
||||
|
||||
# Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
|
||||
# cap_add:
|
||||
# - SYS_PTRACE
|
||||
# security_opt:
|
||||
# - seccomp:unconfined
|
||||
|
||||
# Overrides default command so things don't shut down after the process ends.
|
||||
command: /bin/sh -c "while sleep 1000; do :; done"
|
||||
|
||||
7
.env
7
.env
@@ -1,4 +1,5 @@
|
||||
THREADS=14
|
||||
CONTEXT_SIZE=512
|
||||
# THREADS=14
|
||||
# CONTEXT_SIZE=512
|
||||
MODELS_PATH=/models
|
||||
# DEBUG=true
|
||||
# DEBUG=true
|
||||
# BUILD_TYPE=generic
|
||||
|
||||
44
.github/workflows/test.yml
vendored
Normal file
44
.github/workflows/test.yml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
---
|
||||
name: 'tests'
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
jobs:
|
||||
ubuntu-latest:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v1
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential
|
||||
- name: Test
|
||||
run: |
|
||||
make test
|
||||
|
||||
macOS-latest:
|
||||
runs-on: macOS-latest
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v1
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew update
|
||||
brew install sdl2
|
||||
- name: Test
|
||||
run: |
|
||||
make test
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -1,11 +1,15 @@
|
||||
# go-llama build artifacts
|
||||
go-llama
|
||||
go-gpt4all-j
|
||||
go-gpt2
|
||||
|
||||
# LocalAI build binary
|
||||
LocalAI
|
||||
local-ai
|
||||
# prevent above rules from omitting the helm chart
|
||||
!charts/*
|
||||
|
||||
# Ignore models
|
||||
models/*.bin
|
||||
models/ggml-*
|
||||
models/ggml-*
|
||||
test-models/
|
||||
28
.vscode/launch.json
vendored
28
.vscode/launch.json
vendored
@@ -1,16 +1,20 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "Launch Go",
|
||||
"type": "go",
|
||||
"request": "launch",
|
||||
"mode": "debug",
|
||||
"program": "${workspaceFolder}/main.go",
|
||||
"args": [
|
||||
"api"
|
||||
]
|
||||
}
|
||||
{
|
||||
"name": "Launch Go",
|
||||
"type": "go",
|
||||
"request": "launch",
|
||||
"mode": "debug",
|
||||
"program": "${workspaceFolder}/main.go",
|
||||
"args": [
|
||||
"api"
|
||||
],
|
||||
"env": {
|
||||
"C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
|
||||
"LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
|
||||
"DEBUG": "true"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,12 @@
|
||||
ARG GO_VERSION=1.20
|
||||
ARG DEBIAN_VERSION=11
|
||||
ARG BUILD_TYPE=
|
||||
|
||||
FROM golang:$GO_VERSION as builder
|
||||
WORKDIR /build
|
||||
RUN apt-get update && apt-get install -y cmake
|
||||
COPY . .
|
||||
ARG BUILD_TYPE=
|
||||
RUN make build${BUILD_TYPE}
|
||||
RUN make build
|
||||
|
||||
FROM debian:$DEBIAN_VERSION
|
||||
COPY --from=builder /build/local-ai /usr/bin/local-ai
|
||||
|
||||
90
Makefile
90
Makefile
@@ -2,9 +2,9 @@ GOCMD=go
|
||||
GOTEST=$(GOCMD) test
|
||||
GOVET=$(GOCMD) vet
|
||||
BINARY_NAME=local-ai
|
||||
GOLLAMA_VERSION?=llama.cpp-5ecff35
|
||||
GOGPT4ALLJ_VERSION?=1f548782d80d48b9a0fac33aae6f129358787bc0
|
||||
GOGPT2_VERSION?=1c24f5b86ac428cd5e81dae1f1427b1463bd2b06
|
||||
GOLLAMA_VERSION?=llama.cpp-8687c1f
|
||||
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
|
||||
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
|
||||
|
||||
GREEN := $(shell tput -Txterm setaf 2)
|
||||
YELLOW := $(shell tput -Txterm setaf 3)
|
||||
@@ -12,6 +12,20 @@ WHITE := $(shell tput -Txterm setaf 7)
|
||||
CYAN := $(shell tput -Txterm setaf 6)
|
||||
RESET := $(shell tput -Txterm sgr0)
|
||||
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
|
||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
|
||||
|
||||
# Use this if you want to set the default behavior
|
||||
ifndef BUILD_TYPE
|
||||
BUILD_TYPE:=default
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE), "generic")
|
||||
GENERIC_PREFIX:=generic-
|
||||
else
|
||||
GENERIC_PREFIX:=
|
||||
endif
|
||||
|
||||
.PHONY: all test build vendor
|
||||
|
||||
all: help
|
||||
@@ -19,15 +33,18 @@ all: help
|
||||
## Build:
|
||||
|
||||
build: prepare ## Build the project
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp $(GOCMD) build -o $(BINARY_NAME) ./
|
||||
$(info ${GREEN}I local-ai build info:${RESET})
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
|
||||
|
||||
buildgeneric: prepare-generic ## Build the project
|
||||
C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp LIBRARY_PATH=$(shell pwd)/go-llama.cpp:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2.cpp $(GOCMD) build -o $(BINARY_NAME) ./
|
||||
generic-build: ## Build the project using generic
|
||||
BUILD_TYPE="generic" $(MAKE) build
|
||||
|
||||
## GPT4ALL-J
|
||||
go-gpt4all-j:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j && cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
|
||||
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
|
||||
@@ -38,58 +55,53 @@ go-gpt4all-j:
|
||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
||||
|
||||
go-gpt4all-j/libgptj.a: go-gpt4all-j
|
||||
$(MAKE) -C go-gpt4all-j libgptj.a
|
||||
|
||||
go-gpt4all-j/libgptj.a-generic: go-gpt4all-j
|
||||
$(MAKE) -C go-gpt4all-j generic-libgptj.a
|
||||
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
|
||||
|
||||
# CEREBRAS GPT
|
||||
go-gpt2.cpp:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2.cpp && cd go-gpt2.cpp && git checkout -b build $(GOGPT2_VERSION)
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt2.cpp -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2.cpp -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
||||
@find ./go-gpt2.cpp -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
||||
@find ./go-gpt2.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
|
||||
go-gpt2:
|
||||
git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
|
||||
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION)
|
||||
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
|
||||
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
|
||||
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
|
||||
|
||||
go-gpt2.cpp/libgpt2.a: go-gpt2.cpp
|
||||
$(MAKE) -C go-gpt2.cpp libgpt2.a
|
||||
|
||||
go-gpt2.cpp/libgpt2.a-generic: go-gpt2.cpp
|
||||
$(MAKE) -C go-gpt2.cpp generic-libgpt2.a
|
||||
go-gpt2/libgpt2.a: go-gpt2
|
||||
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
|
||||
|
||||
|
||||
go-llama:
|
||||
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||
$(MAKE) -C go-llama libbinding.a
|
||||
|
||||
go-llama-generic:
|
||||
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
|
||||
$(MAKE) -C go-llama generic-libbinding.a
|
||||
go-llama/libbinding.a: go-llama
|
||||
$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
|
||||
|
||||
replace:
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2.cpp
|
||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
|
||||
|
||||
prepare: go-llama go-gpt4all-j/libgptj.a go-gpt2.cpp/libgpt2.a replace
|
||||
|
||||
prepare-generic: go-llama-generic go-gpt4all-j/libgptj.a-generic go-gpt2.cpp/libgpt2.a-generic replace
|
||||
prepare: go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a replace
|
||||
|
||||
clean: ## Remove build related file
|
||||
rm -fr ./go-llama
|
||||
rm -rf ./go-gpt4all-j
|
||||
rm -rf ./go-gpt2.cpp
|
||||
rm -rf ./go-gpt2
|
||||
rm -rf $(BINARY_NAME)
|
||||
|
||||
## Run:
|
||||
run: prepare
|
||||
$(GOCMD) run ./ api
|
||||
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
|
||||
|
||||
## Test:
|
||||
test: ## Run the tests of the project
|
||||
$(GOTEST) -v -race ./... $(OUTPUT_OPTIONS)
|
||||
test-models/testmodel:
|
||||
mkdir test-models
|
||||
wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
|
||||
|
||||
test: prepare test-models/testmodel
|
||||
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v ./...
|
||||
|
||||
## Help:
|
||||
help: ## Show this help.
|
||||
|
||||
26
README.md
26
README.md
@@ -15,6 +15,8 @@ LocalAI is a straightforward, drop-in replacement API compatible with OpenAI for
|
||||
- Support for prompt templates
|
||||
- Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).
|
||||
|
||||
Discord channel: [Discord](https://discord.gg/uJAeKSAGDy)
|
||||
|
||||
## Model compatibility
|
||||
|
||||
It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) supports also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all) and [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml).
|
||||
@@ -63,6 +65,26 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
|
||||
}'
|
||||
```
|
||||
|
||||
## Helm Chart Installation (run LocalAI in Kubernetes)
|
||||
The local-ai Helm chart supports two options for the LocalAI server's models directory:
|
||||
1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
|
||||
|
||||
Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
|
||||
|
||||
2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
|
||||
|
||||
First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
|
||||
```bash
|
||||
helm install local-ai charts/local-ai -n local-ai --create-namespace
|
||||
```
|
||||
Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
|
||||
|
||||
Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
|
||||
```bash
|
||||
helm upgrade local-ai -n local-ai charts/local-ai
|
||||
```
|
||||
This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.
|
||||
|
||||
## Prompt templates
|
||||
|
||||
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
|
||||
@@ -184,10 +206,6 @@ python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
|
||||
|
||||
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
|
||||
|
||||
### Kubernetes
|
||||
|
||||
You can run the API in Kubernetes, see an example deployment in [kubernetes](https://github.com/go-skynet/LocalAI/tree/master/kubernetes)
|
||||
|
||||
### Build locally
|
||||
|
||||
Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
|
||||
|
||||
65
api/api.go
65
api/api.go
@@ -14,6 +14,7 @@ import (
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
@@ -48,6 +49,8 @@ type OpenAIRequest struct {
|
||||
// Prompt is read only by completion API calls
|
||||
Prompt string `json:"prompt"`
|
||||
|
||||
Stop string `json:"stop"`
|
||||
|
||||
// Messages is read only by chat/completion API calls
|
||||
Messages []Message `json:"messages"`
|
||||
|
||||
@@ -61,15 +64,17 @@ type OpenAIRequest struct {
|
||||
N int `json:"n"`
|
||||
|
||||
// Custom parameters - not present in the OpenAI API
|
||||
Batch int `json:"batch"`
|
||||
F16 bool `json:"f16kv"`
|
||||
IgnoreEOS bool `json:"ignore_eos"`
|
||||
Batch int `json:"batch"`
|
||||
F16 bool `json:"f16kv"`
|
||||
IgnoreEOS bool `json:"ignore_eos"`
|
||||
RepeatPenalty float64 `json:"repeat_penalty"`
|
||||
Keep int `json:"n_keep"`
|
||||
|
||||
Seed int `json:"seed"`
|
||||
}
|
||||
|
||||
// https://platform.openai.com/docs/api-reference/completions
|
||||
func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
|
||||
func openAIEndpoint(chat, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
var err error
|
||||
var model *llama.LLama
|
||||
@@ -90,16 +95,28 @@ func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16
|
||||
// Set model from bearer token, if available
|
||||
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
|
||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||
|
||||
// If no model was specified, take the first available
|
||||
if modelFile == "" {
|
||||
models, _ := loader.ListModels()
|
||||
if len(models) > 0 {
|
||||
modelFile = models[0]
|
||||
log.Debug().Msgf("No model specified, using: %s", modelFile)
|
||||
}
|
||||
}
|
||||
|
||||
// If no model is found or specified, we bail out
|
||||
if modelFile == "" && !bearerExists {
|
||||
return fmt.Errorf("no model specified")
|
||||
}
|
||||
|
||||
if bearerExists { // model specified in bearer token takes precedence
|
||||
// If a model is found in bearer token takes precedence
|
||||
if bearerExists {
|
||||
log.Debug().Msgf("Using model from bearer token: %s", bearer)
|
||||
modelFile = bearer
|
||||
}
|
||||
|
||||
// Try to load the model with both
|
||||
// Try to load the model
|
||||
var llamaerr, gpt2err, gptjerr, stableerr error
|
||||
llamaOpts := []llama.ModelOption{}
|
||||
if ctx != 0 {
|
||||
@@ -269,6 +286,22 @@ func openAIEndpoint(chat bool, loader *model.ModelLoader, threads, ctx int, f16
|
||||
llama.SetThreads(threads),
|
||||
}
|
||||
|
||||
if debug {
|
||||
predictOptions = append(predictOptions, llama.Debug)
|
||||
}
|
||||
|
||||
if input.Stop != "" {
|
||||
predictOptions = append(predictOptions, llama.SetStopWords(input.Stop))
|
||||
}
|
||||
|
||||
if input.RepeatPenalty != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetPenalty(input.RepeatPenalty))
|
||||
}
|
||||
|
||||
if input.Keep != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetNKeep(input.Keep))
|
||||
}
|
||||
|
||||
if input.Batch != 0 {
|
||||
predictOptions = append(predictOptions, llama.SetBatch(input.Batch))
|
||||
}
|
||||
@@ -341,9 +374,15 @@ func listModels(loader *model.ModelLoader) func(ctx *fiber.Ctx) error {
|
||||
}
|
||||
}
|
||||
|
||||
func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f16 bool) error {
|
||||
func App(loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
|
||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||
if debug {
|
||||
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
||||
}
|
||||
|
||||
// Return errors as JSON responses
|
||||
app := fiber.New(fiber.Config{
|
||||
DisableStartupMessage: disableMessage,
|
||||
// Override default error handler
|
||||
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
|
||||
// Status code defaults to 500
|
||||
@@ -371,16 +410,14 @@ func Start(loader *model.ModelLoader, listenAddr string, threads, ctxSize int, f
|
||||
var mumutex = &sync.Mutex{}
|
||||
|
||||
// openAI compatible API endpoint
|
||||
app.Post("/v1/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/chat/completions", openAIEndpoint(true, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/v1/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
|
||||
app.Post("/v1/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/completions", openAIEndpoint(false, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/v1/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
app.Post("/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
|
||||
|
||||
app.Get("/v1/models", listModels(loader))
|
||||
app.Get("/models", listModels(loader))
|
||||
|
||||
// Start the server
|
||||
app.Listen(listenAddr)
|
||||
return nil
|
||||
return app
|
||||
}
|
||||
|
||||
53
api/api_test.go
Normal file
53
api/api_test.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package api_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
. "github.com/go-skynet/LocalAI/api"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"github.com/sashabaranov/go-openai"
|
||||
)
|
||||
|
||||
var _ = Describe("API test", func() {
|
||||
|
||||
var app *fiber.App
|
||||
var modelLoader *model.ModelLoader
|
||||
var client *openai.Client
|
||||
Context("API query", func() {
|
||||
BeforeEach(func() {
|
||||
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
|
||||
app = App(modelLoader, 1, 512, false, false, true)
|
||||
go app.Listen("127.0.0.1:9090")
|
||||
|
||||
defaultConfig := openai.DefaultConfig("")
|
||||
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
|
||||
|
||||
// Wait for API to be ready
|
||||
client = openai.NewClientWithConfig(defaultConfig)
|
||||
Eventually(func() error {
|
||||
_, err := client.ListModels(context.TODO())
|
||||
return err
|
||||
}, "2m").ShouldNot(HaveOccurred())
|
||||
})
|
||||
AfterEach(func() {
|
||||
app.Shutdown()
|
||||
})
|
||||
It("returns the models list", func() {
|
||||
models, err := client.ListModels(context.TODO())
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(models.Models)).To(Equal(1))
|
||||
Expect(models.Models[0].ID).To(Equal("testmodel"))
|
||||
})
|
||||
It("can generate completions", func() {
|
||||
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(len(resp.Choices)).To(Equal(1))
|
||||
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
|
||||
})
|
||||
})
|
||||
})
|
||||
13
api/apt_suite_test.go
Normal file
13
api/apt_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package api_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestLocalAI(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "LocalAI test suite")
|
||||
}
|
||||
6
charts/local-ai/Chart.yaml
Normal file
6
charts/local-ai/Chart.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
apiVersion: v2
|
||||
appVersion: 0.1.0
|
||||
description: A Helm chart for LocalAI
|
||||
name: local-ai
|
||||
type: application
|
||||
version: 1.0.0
|
||||
44
charts/local-ai/templates/_helpers.tpl
Normal file
44
charts/local-ai/templates/_helpers.tpl
Normal file
@@ -0,0 +1,44 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "local-ai.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "local-ai.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "local-ai.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "local-ai.labels" -}}
|
||||
helm.sh/chart: {{ include "local-ai.chart" . }}
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
app.kubernetes.io/instance: "{{ .Release.Name }}"
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
39
charts/local-ai/templates/data-volume.yaml
Normal file
39
charts/local-ai/templates/data-volume.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
{{- if .Values.dataVolume.enabled }}
|
||||
apiVersion: cdi.kubevirt.io/v1beta1
|
||||
kind: DataVolume
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
spec:
|
||||
contentType: archive
|
||||
source:
|
||||
{{ .Values.dataVolume.source.type }}:
|
||||
url: {{ .Values.dataVolume.source.url }}
|
||||
secretRef: {{ template "local-ai.fullname" . }}
|
||||
{{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
|
||||
secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
|
||||
{{- end }}
|
||||
{{- if .Values.dataVolume.source.caCertConfigMap }}
|
||||
caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
|
||||
{{- end }}
|
||||
pvc:
|
||||
accessModes: {{ .Values.dataVolume.pvc.accessModes }}
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.dataVolume.pvc.size }}
|
||||
---
|
||||
{{- if .Values.dataVolume.secret.enabled }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
data:
|
||||
accessKeyId: {{ .Values.dataVolume.secret.username }}
|
||||
secretKey: {{ .Values.dataVolume.secret.password }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
39
charts/local-ai/templates/deployment.yaml
Normal file
39
charts/local-ai/templates/deployment.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
spec:
|
||||
containers:
|
||||
- name: {{ template "local-ai.fullname" . }}
|
||||
image: {{ .Values.deployment.image }}
|
||||
env:
|
||||
- name: THREADS
|
||||
value: {{ .Values.deployment.env.threads | quote }}
|
||||
- name: CONTEXT_SIZE
|
||||
value: {{ .Values.deployment.env.contextSize | quote }}
|
||||
- name: MODELS_PATH
|
||||
value: {{ .Values.deployment.env.modelsPath }}
|
||||
{{- if .Values.deployment.volume.enabled }}
|
||||
volumeMounts:
|
||||
- mountPath: {{ .Values.deployment.env.modelsPath }}
|
||||
name: models
|
||||
volumes:
|
||||
- name: models
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ template "local-ai.fullname" . }}
|
||||
{{- end }}
|
||||
19
charts/local-ai/templates/service.yaml
Normal file
19
charts/local-ai/templates/service.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ template "local-ai.fullname" . }}
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
labels:
|
||||
{{- include "local-ai.labels" . | nindent 4 }}
|
||||
{{- if .Values.service.annotations }}
|
||||
annotations:
|
||||
{{ toYaml .Values.service.annotations | indent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||
type: "{{ .Values.service.type }}"
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
38
charts/local-ai/values.yaml
Normal file
38
charts/local-ai/values.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
deployment:
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
env:
|
||||
threads: 14
|
||||
contextSize: 512
|
||||
modelsPath: "/models"
|
||||
volume:
|
||||
enabled: false
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
annotations: {}
|
||||
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
|
||||
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
||||
|
||||
# Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
|
||||
# (requires https://github.com/kubevirt/containerized-data-importer)
|
||||
dataVolume:
|
||||
enabled: false
|
||||
source:
|
||||
type: "http" # Source type. One of: [ http | s3 ]
|
||||
url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
|
||||
|
||||
# CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
|
||||
# and a base64 encoded pem certificate
|
||||
caCertConfigMap: ""
|
||||
|
||||
# SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
|
||||
# that may include sensitive information. Only applicable for the http source type.
|
||||
secretExtraHeaders: []
|
||||
pvc:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
size: 5Gi
|
||||
secret:
|
||||
enabled: false
|
||||
username: "" # base64 encoded
|
||||
password: "" # base64 encoded
|
||||
@@ -6,14 +6,10 @@ services:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
# args:
|
||||
# BUILD_TYPE: generic # Uncomment to build CPU generic code that works on most HW
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- MODELS_PATH=$MODELS_PATH
|
||||
- CONTEXT_SIZE=$CONTEXT_SIZE
|
||||
- THREADS=$THREADS
|
||||
- DEBUG=$DEBUG
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./models:/models:cached
|
||||
- ./models:/models:cached
|
||||
command: ["/usr/bin/local-ai" ]
|
||||
13
go.mod
13
go.mod
@@ -5,10 +5,13 @@ go 1.19
|
||||
require (
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead
|
||||
github.com/gofiber/fiber/v2 v2.42.0
|
||||
github.com/jaypipes/ghw v0.10.0
|
||||
github.com/onsi/ginkgo/v2 v2.9.2
|
||||
github.com/onsi/gomega v1.27.6
|
||||
github.com/rs/zerolog v1.29.1
|
||||
github.com/sashabaranov/go-openai v1.9.0
|
||||
github.com/urfave/cli/v2 v2.25.0
|
||||
)
|
||||
|
||||
@@ -17,7 +20,11 @@ require (
|
||||
github.com/andybalholm/brotli v1.0.4 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||
github.com/ghodss/yaml v1.0.0 // indirect
|
||||
github.com/go-logr/logr v1.2.3 // indirect
|
||||
github.com/go-ole/go-ole v1.2.6 // indirect
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
|
||||
github.com/google/go-cmp v0.5.9 // indirect
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/jaypipes/pcidb v1.0.0 // indirect
|
||||
github.com/klauspost/compress v1.15.9 // indirect
|
||||
@@ -37,7 +44,11 @@ require (
|
||||
github.com/valyala/fasthttp v1.44.0 // indirect
|
||||
github.com/valyala/tcplisten v1.0.0 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
||||
golang.org/x/net v0.8.0 // indirect
|
||||
golang.org/x/sys v0.6.0 // indirect
|
||||
golang.org/x/text v0.8.0 // indirect
|
||||
golang.org/x/tools v0.7.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
howett.net/plist v1.0.0 // indirect
|
||||
)
|
||||
|
||||
32
go.sum
32
go.sum
@@ -2,13 +2,20 @@ github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDO
|
||||
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
|
||||
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
|
||||
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
||||
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
|
||||
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
||||
github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
|
||||
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
|
||||
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
|
||||
@@ -16,16 +23,21 @@ github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4 h1:GkGuqnhDF
|
||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230420213900-1c24f5b86ac4/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94 h1:rtrrMvlIq+g0/ltXjDdLeNtz0uc4wJ4Qs15GFU4ba4c=
|
||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230419091210-303cf2a59a94/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640 h1:8SSVbQ3yvq7JnfLCLF4USV0PkQnnduUkaNCv/hHDa3E=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead h1:C+lcH1srw+c0qPDx1WF8zjGiiOqoPxVICt7bI1sj5cM=
|
||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230421172644-351a5a40eead/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
|
||||
github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
|
||||
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
|
||||
github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
|
||||
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
|
||||
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||
github.com/jaypipes/ghw v0.10.0 h1:UHu9UX08Py315iPojADFPOkmjTsNzHj4g4adsNKKteY=
|
||||
github.com/jaypipes/ghw v0.10.0/go.mod h1:jeJGbkRB2lL3/gxYzNYzEDETV1ZJ56OKr+CSeSEym+g=
|
||||
github.com/jaypipes/pcidb v1.0.0 h1:vtZIfkiCUE42oYbJS0TAq9XSfSmcsgo9IdxSm9qzYU8=
|
||||
@@ -48,11 +60,15 @@ github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh
|
||||
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||
github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU=
|
||||
github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts=
|
||||
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
|
||||
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
|
||||
github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
|
||||
github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
|
||||
@@ -60,10 +76,15 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
|
||||
github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/sashabaranov/go-openai v1.9.0 h1:NoiO++IISxxJ1pRc0n7uZvMGMake0G+FJ1XPwXtprsA=
|
||||
github.com/sashabaranov/go-openai v1.9.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
|
||||
github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
|
||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d h1:Q+gqLBOPkFGHyCJxXMRqtUgUbTjI8/Ze8vu8GGyNFwo=
|
||||
github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
|
||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/tinylib/msgp v1.1.6 h1:i+SbKraHhnrf9M5MYmvQhFnbLhAXSDWF8WWsuyRdocw=
|
||||
github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
|
||||
github.com/urfave/cli/v2 v2.25.0 h1:ykdZKuQey2zq0yin/l7JOm9Mh+pg72ngYMeB0ABn6q8=
|
||||
@@ -88,11 +109,13 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY
|
||||
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
|
||||
golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ=
|
||||
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
@@ -110,18 +133,23 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
|
||||
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
golang.org/x/tools v0.7.0 h1:W4OVu8VVOaIO0yzWMNdepAulS7YfoS3Zabrm8DOXXU4=
|
||||
golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
||||
gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
|
||||
howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
# Create a PVC containing a model binary, sourced from an arbitrary HTTP server
|
||||
# (requires https://github.com/kubevirt/containerized-data-importer)
|
||||
apiVersion: cdi.kubevirt.io/v1beta1
|
||||
kind: DataVolume
|
||||
metadata:
|
||||
name: models
|
||||
namespace: local-ai
|
||||
spec:
|
||||
contentType: archive
|
||||
source:
|
||||
http:
|
||||
url: http://<model_server>/koala-7B-4bit-128g.GGML.tar
|
||||
secretRef: model-secret
|
||||
pvc:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: model-secret
|
||||
namespace: local-ai
|
||||
data:
|
||||
accessKeyId: <model_server_username_base64_encoded>
|
||||
secretKey: <model_server_password_base64_encoded>
|
||||
@@ -1,57 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: local-ai
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: local-ai
|
||||
namespace: local-ai
|
||||
labels:
|
||||
app: local-ai
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: local-ai
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: local-ai
|
||||
name: local-ai
|
||||
spec:
|
||||
containers:
|
||||
- name: local-ai
|
||||
image: quay.io/go-skynet/local-ai:latest
|
||||
env:
|
||||
- name: THREADS
|
||||
value: "14"
|
||||
- name: CONTEXT_SIZE
|
||||
value: "512"
|
||||
- name: MODELS_PATH
|
||||
value: /models
|
||||
volumeMounts:
|
||||
- mountPath: /models
|
||||
name: models
|
||||
volumes:
|
||||
- name: models
|
||||
persistentVolumeClaim:
|
||||
claimName: models
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: local-ai
|
||||
namespace: local-ai
|
||||
# If using AWS, you'll need to override the default 60s load balancer idle timeout
|
||||
# annotations:
|
||||
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
||||
spec:
|
||||
selector:
|
||||
app: local-ai
|
||||
type: LoadBalancer
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
6
main.go
6
main.go
@@ -80,11 +80,7 @@ It uses llama.cpp, ggml and gpt4all as backend with golang c bindings.
|
||||
UsageText: `local-ai [options]`,
|
||||
Copyright: "go-skynet authors",
|
||||
Action: func(ctx *cli.Context) error {
|
||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||
if ctx.Bool("debug") {
|
||||
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
||||
}
|
||||
return api.Start(model.NewModelLoader(ctx.String("models-path")), ctx.String("address"), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"))
|
||||
return api.App(model.NewModelLoader(ctx.String("models-path")), ctx.Int("threads"), ctx.Int("context-size"), ctx.Bool("f16"), ctx.Bool("debug"), false).Listen(ctx.String("address"))
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user