Compare commits

..

1 Commits

Author SHA1 Message Date
Marc R Kellerman
325bc78acc add chatbot-ui example 2023-04-25 08:48:43 -07:00
139 changed files with 1330 additions and 7679 deletions

3
.devcontainer/Dockerfile Normal file
View File

@@ -0,0 +1,3 @@
ARG GO_VERSION=1.20
FROM mcr.microsoft.com/devcontainers/go:0-$GO_VERSION-bullseye
RUN apt-get update && apt-get install -y cmake

View File

@@ -0,0 +1,46 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
{
"name": "Existing Docker Compose (Extend)",
// Update the 'dockerComposeFile' list if you have more compose files or use different names.
// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
"dockerComposeFile": [
"../docker-compose.yaml",
"docker-compose.yml"
],
// The 'service' property is the name of the service for the container that VS Code should
// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
"service": "api",
// The optional 'workspaceFolder' property is the path VS Code should open by default when
// connected. This is typically a file mount in .devcontainer/docker-compose.yml
"workspaceFolder": "/workspace",
"features": {
"ghcr.io/devcontainers/features/go:1": {},
"ghcr.io/azutake/devcontainer-features/go-packages-install:0": {}
},
// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Uncomment the next line if you want start specific services in your Docker Compose config.
// "runServices": [],
// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
// "shutdownAction": "none",
// Uncomment the next line to run commands after the container is created.
"postCreateCommand": "make prepare"
// Configure tool-specific properties.
// "customizations": {},
// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "devcontainer"
}

View File

@@ -0,0 +1,26 @@
version: '3.6'
services:
# Update this to the name of the service you want to work with in your docker-compose.yml file
api:
# Uncomment if you want to override the service's Dockerfile to one in the .devcontainer
# folder. Note that the path of the Dockerfile and context is relative to the *primary*
# docker-compose.yml file (the first in the devcontainer.json "dockerComposeFile"
# array). The sample below assumes your primary file is in the root of your project.
#
build:
context: .
dockerfile: .devcontainer/Dockerfile
volumes:
# Update this to wherever you want VS Code to mount the folder of your project
- .:/workspace:cached
# Uncomment the next four lines if you will use a ptrace-based debugger like C++, Go, and Rust.
# cap_add:
# - SYS_PTRACE
# security_opt:
# - seccomp:unconfined
# Overrides default command so things don't shut down after the process ends.
command: /bin/sh -c "while sleep 1000; do :; done"

View File

@@ -1,4 +1 @@
models
examples/chatbot-ui/models
examples/rwkv/models
examples/**/models

27
.env
View File

@@ -1,30 +1,5 @@
## Set number of threads.
## Note: prefer the number of physical cores. Overbooking the CPU degrades performance notably.
# THREADS=14
## Specify a different bind address (defaults to ":8080")
# ADDRESS=127.0.0.1:8080
## Default models context size
# CONTEXT_SIZE=512
## Default path for models
MODELS_PATH=/models
## Enable debug mode
# DEBUG=true
## Specify a build type. Available: cublas, openblas.
# BUILD_TYPE=openblas
## Uncomment and set to false to disable rebuilding from source
# REBUILD=false
## Enable image generation with stablediffusion (requires REBUILD=true)
# GO_TAGS=stablediffusion
## Path where to store generated images
# IMAGE_PATH=/tmp
## Specify a default upload limit in MB (whisper)
# UPLOAD_LIMIT
# BUILD_TYPE=generic

View File

@@ -1,31 +0,0 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: bug
assignees: mudler
---
<!-- Thanks for helping us to improve LocalAI! We welcome all bug reports. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
**LocalAI version:**
<!-- Container Image or LocalAI tag/commit -->
**Environment, CPU architecture, OS, and Version:**
<!-- Provide the output from "uname -a", HW specs, if it's a VM -->
**Describe the bug**
<!-- A clear and concise description of what the bug is. -->
**To Reproduce**
<!-- Steps to reproduce the behavior, including the LocalAI command used, if any -->
**Expected behavior**
<!-- A clear and concise description of what you expected to happen. -->
**Logs**
<!-- If applicable, add logs while running LocalAI in debug mode (`--debug` or `DEBUG=true`) to help explain your problem. -->
**Additional context**
<!-- Add any other context about the problem here. -->

View File

@@ -1,8 +0,0 @@
blank_issues_enabled: false
contact_links:
- name: Community Support
url: https://github.com/go-skynet/LocalAI/discussions
about: Please ask and answer questions here.
- name: Discord
url: https://discord.gg/uJAeKSAGDy
about: Join our community on Discord!

View File

@@ -1,22 +0,0 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: enhancement
assignees: mudler
---
<!-- Thanks for helping us to improve LocalAI! We welcome all feature requests. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->
**Is your feature request related to a problem? Please describe.**
<!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -->
**Describe the solution you'd like**
<!-- A clear and concise description of what you want to happen. -->
**Describe alternatives you've considered**
<!-- A clear and concise description of any alternative solutions or features you've considered. -->
**Additional context**
<!-- Add any other context or screenshots about the feature request here. -->

View File

@@ -1,23 +0,0 @@
**Description**
This PR fixes #
**Notes for Reviewers**
**[Signed commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [ ] Yes, I signed my commits.
<!--
Thank you for contributing to LocalAI!
Contributing Conventions:
1. Include descriptive PR titles with [<component-name>] prepended.
2. Build and test your changes before submitting a PR.
3. Sign your commits
By following the community's contribution conventions upfront, the review process will
be accelerated and your PR merged more quickly.
-->

View File

@@ -1,9 +0,0 @@
#!/bin/bash
set -xe
REPO=$1
BRANCH=$2
VAR=$3
LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"

24
.github/release.yml vendored
View File

@@ -1,24 +0,0 @@
# .github/release.yml
changelog:
exclude:
labels:
- ignore-for-release
categories:
- title: Breaking Changes 🛠
labels:
- Semver-Major
- breaking-change
- title: "Bug fixes :bug:"
labels:
- bug
- title: Exciting New Features 🎉
labels:
- Semver-Minor
- enhancement
- title: 👒 Dependencies
labels:
- dependencies
- title: Other Changes
labels:
- "*"

18
.github/stale.yml vendored
View File

@@ -1,18 +0,0 @@
# Number of days of inactivity before an issue becomes stale
daysUntilStale: 45
# Number of days of inactivity before a stale issue is closed
daysUntilClose: 10
# Issues with these labels will never be considered stale
exemptLabels:
- issue/willfix
# Label to use when marking an issue as stale
staleLabel: issue/stale
# Comment to post when marking an issue as stale. Set to `false` to disable
markComment: >
This issue has been automatically marked as stale because it has not had
recent activity. It will be closed if no further activity occurs. Thank you
for your contributions.
# Comment to post when closing a stale issue. Set to `false` to disable
closeComment: >
This issue is being automatically closed due to inactivity.
However, you may choose to reopen this issue.

View File

@@ -1,51 +0,0 @@
name: Bump dependencies
on:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
bump:
strategy:
fail-fast: false
matrix:
include:
- repository: "go-skynet/go-llama.cpp"
variable: "GOLLAMA_VERSION"
branch: "master"
- repository: "go-skynet/go-ggml-transformers.cpp"
variable: "GOGGMLTRANSFORMERS_VERSION"
branch: "master"
- repository: "donomii/go-rwkv.cpp"
variable: "RWKV_VERSION"
branch: "main"
- repository: "ggerganov/whisper.cpp"
variable: "WHISPER_CPP_VERSION"
branch: "master"
- repository: "go-skynet/go-bert.cpp"
variable: "BERT_VERSION"
branch: "master"
- repository: "go-skynet/bloomz.cpp"
variable: "BLOOMZ_VERSION"
branch: "main"
- repository: "nomic-ai/gpt4all"
variable: "GPT4ALL_VERSION"
branch: "main"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Bump dependencies 🔧
run: |
bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Update ${{ matrix.repository }}'
title: ':arrow_up: Update ${{ matrix.repository }}'
branch: "update/${{ matrix.variable }}"
body: Bump of ${{ matrix.repository }} version
signoff: true

View File

@@ -9,10 +9,6 @@ on:
tags:
- '*'
concurrency:
group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
docker:
runs-on: ubuntu-latest
@@ -58,8 +54,8 @@ jobs:
uses: docker/login-action@v2
with:
registry: quay.io
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_PASSWORD }}
- name: Build
if: github.event_name != 'pull_request'
uses: docker/build-push-action@v4

View File

@@ -1,84 +0,0 @@
name: Build and Release
on: push
permissions:
contents: write
jobs:
build-linux:
strategy:
matrix:
include:
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON'
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v3
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
- name: Build
id: build
env:
CMAKE_ARGS: "${{ matrix.defines }}"
BUILD_ID: "${{ matrix.build }}"
run: |
make dist
- uses: actions/upload-artifact@v3
with:
name: ${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*
build-macOS:
strategy:
matrix:
include:
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON'
runs-on: macOS-latest
steps:
- name: Clone
uses: actions/checkout@v3
with:
submodules: true
- name: Dependencies
run: |
brew update
brew install sdl2 ffmpeg
- name: Build
id: build
env:
CMAKE_ARGS: "${{ matrix.defines }}"
BUILD_ID: "${{ matrix.build }}"
run: |
make dist
- uses: actions/upload-artifact@v3
with:
name: ${{ matrix.build }}
path: release/
- name: Release
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
release/*

26
.github/workflows/release.yml.disabled vendored Normal file
View File

@@ -0,0 +1,26 @@
name: goreleaser
on:
push:
tags:
- 'v*'
jobs:
goreleaser:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.18
- name: Run GoReleaser
uses: goreleaser/goreleaser-action@v4
with:
version: latest
args: release --clean
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -9,10 +9,6 @@ on:
tags:
- '*'
concurrency:
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
cancel-in-progress: true
jobs:
ubuntu-latest:
runs-on: ubuntu-latest
@@ -25,7 +21,7 @@ jobs:
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
sudo apt-get install build-essential
- name: Test
run: |
make test
@@ -42,7 +38,7 @@ jobs:
- name: Dependencies
run: |
brew update
brew install sdl2 ffmpeg
brew install sdl2
- name: Test
run: |
make test

19
.gitignore vendored
View File

@@ -1,13 +1,7 @@
# go-llama build artifacts
go-llama
gpt4all
go-stable-diffusion
go-ggml-transformers
go-gpt4all-j
go-gpt2
go-rwkv
whisper.cpp
bloomz
go-bert
# LocalAI build binary
LocalAI
@@ -16,11 +10,6 @@ local-ai
!charts/*
# Ignore models
models/*
test-models/
test-dir/
release/
# just in case
.DS_Store
models/*.bin
models/ggml-*
test-models/

15
.goreleaser.yaml Normal file
View File

@@ -0,0 +1,15 @@
# Make sure to check the documentation at http://goreleaser.com
project_name: local-ai
builds:
- ldflags:
- -w -s
env:
- CGO_ENABLED=0
goos:
- linux
- darwin
- windows
goarch:
- amd64
- arm64
binary: '{{ .ProjectName }}'

19
.vscode/launch.json vendored
View File

@@ -2,20 +2,7 @@
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": false,
"cwd": "${workspaceFolder}/examples/langchain-chroma",
"env": {
"OPENAI_API_BASE": "http://localhost:8080/v1",
"OPENAI_API_KEY": "abc"
}
},
{
"name": "Launch LocalAI API",
"name": "Launch Go",
"type": "go",
"request": "launch",
"mode": "debug",
@@ -24,8 +11,8 @@
"api"
],
"env": {
"C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
"LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
"C_INCLUDE_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
"LIBRARY_PATH": "/workspace/go-llama:/workspace/go-gpt4all-j:/workspace/go-gpt2",
"DEBUG": "true"
}
}

View File

@@ -1,94 +1,13 @@
ARG GO_VERSION=1.20
ARG DEBIAN_VERSION=11
ARG BUILD_TYPE=
FROM golang:$GO_VERSION as builder
ARG BUILD_TYPE=
ARG GO_TAGS=
ARG CUDA_MAJOR_VERSION=11
ARG CUDA_MINOR_VERSION=7
ENV BUILD_TYPE=${BUILD_TYPE}
ENV GO_TAGS=${GO_TAGS}
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
ENV REBUILD=true
WORKDIR /build
RUN apt-get update && \
apt-get install -y ca-certificates cmake curl
# CuBLAS requirements
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
apt-get install -y software-properties-common && \
apt-add-repository contrib && \
curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
dpkg -i cuda-keyring_1.0-1_all.deb && \
rm -f cuda-keyring_1.0-1_all.deb && \
apt-get update && \
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
; fi
ENV PATH /usr/local/cuda/bin:${PATH}
# OpenBLAS requirements
RUN apt-get install -y libopenblas-dev
# Stable Diffusion requirements
RUN apt-get install -y libopencv-dev && \
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
RUN apt-get update && apt-get install -y cmake
COPY . .
RUN make build
FROM golang:$GO_VERSION
ARG BUILD_TYPE=
ARG GO_TAGS=
ARG CUDA_MAJOR_VERSION=11
ARG CUDA_MINOR_VERSION=7
ENV BUILD_TYPE=${BUILD_TYPE}
ENV GO_TAGS=${GO_TAGS}
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
ENV REBUILD=true
WORKDIR /build
RUN apt-get update && \
apt-get install -y ca-certificates cmake curl
# CuBLAS requirements
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
apt-get install -y software-properties-common && \
apt-add-repository contrib && \
curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
dpkg -i cuda-keyring_1.0-1_all.deb && \
rm -f cuda-keyring_1.0-1_all.deb && \
apt-get update && \
apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
; fi
ENV PATH /usr/local/cuda/bin:${PATH}
# OpenBLAS requirements
RUN apt-get install -y libopenblas-dev
# Stable Diffusion requirements
RUN apt-get install -y libopencv-dev && \
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
COPY . .
RUN make prepare-sources
COPY --from=builder /build/local-ai ./
# Define the health check command
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
EXPOSE 8080
ENTRYPOINT [ "/build/entrypoint.sh" ]
FROM debian:$DEBIAN_VERSION
COPY --from=builder /build/local-ai /usr/bin/local-ai
ENTRYPOINT [ "/usr/bin/local-ai" ]

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2023 Ettore Di Giacinto
Copyright (c) 2023 go-skynet authors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

277
Makefile
View File

@@ -2,240 +2,109 @@ GOCMD=go
GOTEST=$(GOCMD) test
GOVET=$(GOCMD) vet
BINARY_NAME=local-ai
# renovate: datasource=github-tags depName=go-skynet/go-llama.cpp
GOLLAMA_VERSION?=llama.cpp-25d7abb
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt4all-j.cpp currentValueTemplate=master depNameTemplate=go-gpt4all-j.cpp
GOGPT4ALLJ_VERSION?=1f7bff57f66cb7062e40d0ac3abd2217815e5109
# renovate: datasource=git-refs packageNameTemplate=https://github.com/go-skynet/go-gpt2.cpp currentValueTemplate=master depNameTemplate=go-gpt2.cpp
GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
GOLLAMA_VERSION?=4bd3910005a593a6db237bc82c506d6d9fb81b18
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
GOGGMLTRANSFORMERS_VERSION?=695f97befe14f0107d8da1c11f5b84912e0754b6
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
WHISPER_CPP_VERSION?=9b926844e3ae0ca6a0d13573b2e0349be1a4b573
BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
BUILD_TYPE?=
CGO_LDFLAGS?=
CUDA_LIBPATH?=/usr/local/cuda/lib64/
STABLEDIFFUSION_VERSION?=c0748eca3642d58bcf9521108bcee46959c647dc
GO_TAGS?=
BUILD_ID?=git
LD_FLAGS=?=
OPTIONAL_TARGETS?=
OS := $(shell uname -s)
ARCH := $(shell uname -m)
GREEN := $(shell tput -Txterm setaf 2)
YELLOW := $(shell tput -Txterm setaf 3)
WHITE := $(shell tput -Txterm setaf 7)
CYAN := $(shell tput -Txterm setaf 6)
RESET := $(shell tput -Txterm sgr0)
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2
ifeq ($(BUILD_TYPE),openblas)
CGO_LDFLAGS+=-lopenblas
# Use this if you want to set the default behavior
ifndef BUILD_TYPE
BUILD_TYPE:=default
endif
ifeq ($(BUILD_TYPE),cublas)
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
export LLAMA_CUBLAS=1
endif
ifeq ($(BUILD_TYPE),clblas)
CGO_LDFLAGS+=-lOpenCL -lclblast
endif
# glibc-static or glibc-devel-static required
ifeq ($(STATIC),true)
LD_FLAGS=-linkmode external -extldflags -static
endif
ifeq ($(GO_TAGS),stablediffusion)
OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
ifeq ($(BUILD_TYPE), "generic")
GENERIC_PREFIX:=generic-
else
GENERIC_PREFIX:=
endif
.PHONY: all test build vendor
all: help
## GPT4ALL
gpt4all:
git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/regex_escape/gpt4allregex_escape/g' {} +
mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h
## BERT embeddings
go-bert:
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert
cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
@find ./go-bert -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
@find ./go-bert -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
@find ./go-bert -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
## stable diffusion
go-stable-diffusion:
git clone --recurse-submodules https://github.com/mudler/go-stable-diffusion go-stable-diffusion
cd go-stable-diffusion && git checkout -b build $(STABLEDIFFUSION_VERSION) && git submodule update --init --recursive --depth 1
go-stable-diffusion/libstablediffusion.a:
$(MAKE) -C go-stable-diffusion libstablediffusion.a
## RWKV
go-rwkv:
git clone --recurse-submodules $(RWKV_REPO) go-rwkv
cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
@find ./go-rwkv -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
@find ./go-rwkv -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
@find ./go-rwkv -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
go-rwkv/librwkv.a: go-rwkv
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. && cp ggml/src/libggml.a ..
## bloomz
bloomz:
git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
@find ./bloomz -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_bloomz_replace/g' {} +
@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_bloomz_replace/g' {} +
bloomz/libbloomz.a: bloomz
cd bloomz && make libbloomz.a
go-bert/libgobert.a: go-bert
$(MAKE) -C go-bert libgobert.a
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
## CEREBRAS GPT
go-ggml-transformers:
git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp go-ggml-transformers
cd go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./go-ggml-transformers -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
go-ggml-transformers/libtransformers.a: go-ggml-transformers
$(MAKE) -C go-ggml-transformers libtransformers.a
whisper.cpp:
git clone https://github.com/ggerganov/whisper.cpp.git
cd whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
@find ./whisper.cpp -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_whisper_/g' {} +
@find ./whisper.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_whisper_/g' {} +
@find ./whisper.cpp -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_whisper_/g' {} +
whisper.cpp/libwhisper.a: whisper.cpp
cd whisper.cpp && make libwhisper.a
go-llama:
git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
go-llama/libbinding.a: go-llama
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
prepare-sources: go-llama go-ggml-transformers gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
$(GOCMD) mod download
## GENERIC
rebuild: ## Rebuilds the project
$(MAKE) -C go-llama clean
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
$(MAKE) -C go-ggml-transformers clean
$(MAKE) -C go-rwkv clean
$(MAKE) -C whisper.cpp clean
$(MAKE) -C go-stable-diffusion clean
$(MAKE) -C go-bert clean
$(MAKE) -C bloomz clean
$(MAKE) build
prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a ## Prepares for building
clean: ## Remove build related file
rm -fr ./go-llama
rm -rf ./gpt4all
rm -rf ./go-gpt2
rm -rf ./go-stable-diffusion
rm -rf ./go-ggml-transformers
rm -rf ./go-rwkv
rm -rf ./go-bert
rm -rf ./bloomz
rm -rf ./whisper.cpp
rm -rf $(BINARY_NAME)
rm -rf release/
## Build:
build: prepare ## Build the project
$(info ${GREEN}I local-ai build info:${RESET})
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
dist: build
mkdir -p release
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -o $(BINARY_NAME) ./
generic-build: ## Build the project using generic
BUILD_TYPE="generic" $(MAKE) build
## Run
run: prepare ## run local-ai
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
## GPT4ALL-J
go-gpt4all-j:
git clone --recurse-submodules https://github.com/go-skynet/go-gpt4all-j.cpp go-gpt4all-j
cd go-gpt4all-j && git checkout -b build $(GOGPT4ALLJ_VERSION)
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./go-gpt4all-j -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
go-gpt4all-j/libgptj.a: go-gpt4all-j
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
# CEREBRAS GPT
go-gpt2:
git clone --recurse-submodules https://github.com/go-skynet/go-gpt2.cpp go-gpt2
cd go-gpt2 && git checkout -b build $(GOGPT2_VERSION)
# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
@find ./go-gpt2 -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt2_/g' {} +
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
go-gpt2/libgpt2.a: go-gpt2
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
go-llama:
git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
go-llama/libbinding.a: go-llama
$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt4all-j.cpp=$(shell pwd)/go-gpt4all-j
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
prepare: go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a replace
clean: ## Remove build related file
rm -fr ./go-llama
rm -rf ./go-gpt4all-j
rm -rf ./go-gpt2
rm -rf $(BINARY_NAME)
## Run:
run: prepare
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
test-models/testmodel:
mkdir test-models
mkdir test-dir
wget https://huggingface.co/nnakasato/ggml-model-test/resolve/main/ggml-model-q4.bin -O test-models/testmodel
wget https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O test-models/bert
wget https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
wget https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
cp tests/models_fixtures/* test-models
wget https://huggingface.co/concedo/cerebras-111M-ggml/resolve/main/cerberas-111m-q4_0.bin -O test-models/testmodel
test: prepare test-models/testmodel
cp tests/models_fixtures/* test-models
C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flakeAttempts 5 -v -r ./api ./pkg
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} MODELS_PATH=$(abspath ./)/test-models $(GOCMD) test -v ./...
## Help:
help: ## Show this help.

336
README.md
View File

@@ -5,54 +5,44 @@
<br>
</h1>
> :warning: This project has been renamed from `llama-cli` to `LocalAI` to reflect the fact that we are focusing on a fast drop-in OpenAI API rather on the CLI interface. We think that there are already many projects that can be used as a CLI interface already, for instance [llama.cpp](https://github.com/ggerganov/llama.cpp) and [gpt4all](https://github.com/nomic-ai/gpt4all). If you are were using `llama-cli` for CLI interactions and want to keep using it, use older versions or please open up an issue - contributions are welcome!
[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml) [![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)
[![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy)
**LocalAI** is a drop-in replacement REST API thats compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format. Does not require GPU.
**LocalAI** is a straightforward, drop-in replacement API compatible with OpenAI for local CPU inferencing, based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is Apache 2.0 Licensed and can be used for commercial purposes.
For a list of the supported model families, please see [the model compatibility table](https://localai.io/model-compatibility/index.html#model-compatibility-table).
In a nutshell:
- Local, OpenAI drop-in alternative REST API. You own your data.
- NO GPU required. NO Internet access is required either. Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. [See building instructions](https://localai.io/basics/build/index.html).
- Supports multiple models, Audio transcription, Text generation with GPTs, Image generation with stable diffusion (experimental)
- OpenAI compatible API
- Supports multiple-models
- Once loaded the first time, it keep models loaded in memory for faster inference
- Doesn't shell-out, but uses C++ bindings for a faster inference and better performance.
- Support for prompt templates
- Doesn't shell-out, but uses C bindings for a faster inference and better performance. Uses [go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) and [go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp).
LocalAI was created by [Ettore Di Giacinto](https://github.com/mudler/) and is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome!
Reddit post: https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/
| [ChatGPT OSS alternative](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) | [Image generation](https://localai.io/api-endpoints/index.html#image-generation) |
|------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
| ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png) | ![b6441997879](https://github.com/go-skynet/LocalAI/assets/2420543/d50af51c-51b7-4f39-b6c2-bf04c403894c) |
## Model compatibility
It is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) supports also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all) and [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml).
See the [Getting started](https://localai.io/basics/getting_started/index.html) and [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) sections to learn how to use LocalAI. For a list of curated models check out the [model gallery](https://github.com/go-skynet/model-gallery).
Tested with:
- Vicuna
- Alpaca
- [GPT4ALL](https://github.com/nomic-ai/gpt4all)
- [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin)
- Koala
- [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml)
## News
It should also be compatible with StableLM and GPTNeoX ggml models (untested)
- 29-05-2023: LocalAI now has a website, [https://localai.io](https://localai.io)! check the news in the [dedicated section](https://localai.io/basics/news/index.html)!
For latest news, follow also on Twitter [@LocalAI_API](https://twitter.com/LocalAI_API) and [@mudler_it](https://twitter.com/mudler_it)
## Contribute and help
To help the project you can:
- Upvote the [Reddit post](https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/) about LocalAI.
- [Hacker news post](https://news.ycombinator.com/item?id=35726934) - help us out by voting if you like this project.
- If you have technological skills and want to contribute to development, have a look at the open issues. If you are new you can have a look at the [good-first-issue](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) and [help-wanted](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels.
- If you don't have technological skills you can still help improving documentation or add examples or share your user-stories with our community, any help and contribution is welcome!
Note: You might need to convert older models to the new format, see [here](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
## Usage
Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section. Here below you will find generic, quick instructions to get ready and use LocalAI.
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
The easiest way to run LocalAI is by using `docker-compose` (to build locally, see [building LocalAI](https://localai.io/basics/build/index.html)):
The easiest way to run LocalAI is by using `docker-compose`:
```bash
@@ -60,9 +50,6 @@ git clone https://github.com/go-skynet/LocalAI
cd LocalAI
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# copy your models to models/
cp your-model.bin models/
@@ -70,9 +57,7 @@ cp your-model.bin models/
# vim .env
# start with docker-compose
docker-compose up -d --pull always
# or you can build the images with:
# docker-compose up -d --build
docker-compose up -d --build
# Now API is accessible at localhost:8080
curl http://localhost:8080/v1/models
@@ -95,9 +80,6 @@ git clone https://github.com/go-skynet/LocalAI
cd LocalAI
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
@@ -108,9 +90,8 @@ cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
# vim .env
# start with docker-compose
docker-compose up -d --pull always
# or you can build the images with:
# docker-compose up -d --build
docker-compose up -d --build
# Now API is accessible at localhost:8080
curl http://localhost:8080/v1/models
# {"object":"list","data":[{"id":"ggml-gpt4all-j","object":"model"}]}
@@ -125,92 +106,253 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
```
</details>
## Prompt templates
### Build locally
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
<details>
You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
```
Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{{.Input}}
### Response:
```
See the [prompt-templates](https://github.com/go-skynet/LocalAI/tree/master/prompt-templates) directory in this repository for templates for most popular models.
</details>
## API
`LocalAI` provides an API for running text generation as a service, that follows the OpenAI reference and can be used as a drop-in. The models once loaded the first time will be kept in memory.
<details>
Example of starting the API with `docker`:
```bash
docker run -p 8080:8080 -ti --rm quay.io/go-skynet/local-ai:latest --models-path /path/to/models --context-size 700 --threads 4
```
And you'll see:
```
┌───────────────────────────────────────────────────┐
│ Fiber v2.42.0 │
│ http://127.0.0.1:8080 │
│ (bound on host 0.0.0.0 and port 8080) │
│ │
│ Handlers ............. 1 Processes ........... 1 │
│ Prefork ....... Disabled PID ................. 1 │
└───────────────────────────────────────────────────┘
```
You can control the API server options with command line arguments:
```
local-api --models-path <model_path> [--address <address>] [--threads <num_threads>]
```
The API takes takes the following parameters:
| Parameter | Environment Variable | Default Value | Description |
| ------------ | -------------------- | ------------- | -------------------------------------- |
| models-path | MODELS_PATH | | The path where you have models (ending with `.bin`). |
| threads | THREADS | Number of Physical cores | The number of threads to use for text generation. |
| address | ADDRESS | :8080 | The address and port to listen on. |
| context-size | CONTEXT_SIZE | 512 | Default token context size. |
| debug | DEBUG | false | Enable debug mode. |
Once the server is running, you can start making requests to it using HTTP, using the OpenAI API.
</details>
### Supported OpenAI API endpoints
You can check out the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create).
Following the list of endpoints/parameters supported.
Note:
- You can also specify the model a part of the OpenAI token.
- If only one model is available, the API will use it for all the requests.
#### Chat completions
<details>
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
```
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`
</details>
#### Completions
<details>
For example, to generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as the request body:
```
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"prompt": "A long time ago in a galaxy far, far away",
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`
</details>
#### List models
<details>
You can list all the models available with:
```
curl http://localhost:8080/v1/models
```
</details>
## Using other models
gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
```bash
wget -O tokenizer.model https://huggingface.co/decapoda-research/llama-30b-hf/resolve/main/tokenizer.model
mkdir models
cp gpt4all.. models/
git clone https://gist.github.com/eiz/828bddec6162a023114ce19146cb2b82
pip install sentencepiece
python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
# There will be a new model with the ".tmp" extension, you have to use that one!
```
## Helm Chart Installation (run LocalAI in Kubernetes)
The local-ai Helm chart supports two options for the LocalAI server's models directory:
1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
```bash
helm install local-ai charts/local-ai -n local-ai --create-namespace
```
Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
```bash
helm upgrade local-ai -n local-ai charts/local-ai
```
This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.
## Windows compatibility
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
## Build locally
Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
In order to build the `LocalAI` container image locally you can use `docker`:
```
# build the image
docker build -t localai .
docker run localai
docker build -t LocalAI .
docker run LocalAI
```
Or you can build the binary with `make`:
Or build the binary with `make`:
```
make build
```
</details>
See the [build section](https://localai.io/basics/build/index.html) in our documentation for detailed instructions.
### Run LocalAI in Kubernetes
LocalAI can be installed inside Kubernetes with helm. See [installation instructions](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes).
## Supported API endpoints
See the [list of the supported API endpoints](https://localai.io/api-endpoints/index.html) and how to configure image generation and audio transcription.
## Frequently asked questions
See [the FAQ](https://localai.io/faq/index.html) section for a list of common questions.
Here are answers to some of the most common questions.
## Projects already using LocalAI to run local models
Feel free to open up a PR to get your project listed!
### How do I get models?
<details>
Most ggml-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=ggml, or models from gpt4all should also work: https://github.com/nomic-ai/gpt4all.
</details>
### What's the difference with Serge, or XXX?
<details>
LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference, easy to set up locally and deploy to Kubernetes.
</details>
### Can I use it with a Discord bot, or XXX?
<details>
Yes! If the client uses OpenAI and supports setting a different base URL to send requests to, you can use the LocalAI endpoint. This allows to use this with every application that was supposed to work with OpenAI, but without changing the application!
</details>
### Can this leverage GPUs?
<details>
Not currently, as ggml doesn't support GPUs yet: https://github.com/ggerganov/llama.cpp/discussions/915.
</details>
### Where is the webUI?
<details>
We are working on to have a good out of the box experience - however as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
</details>
### Does it work with AutoGPT?
<details>
AutoGPT currently doesn't allow to set a different API URL, but there is a PR open for it, so this should be possible soon!
</details>
- [Kairos](https://github.com/kairos-io/kairos)
- [k8sgpt](https://github.com/k8sgpt-ai/k8sgpt#running-local-models)
- [Spark](https://github.com/cedriking/spark)
- [autogpt4all](https://github.com/aorumbayev/autogpt4all)
- [Mods](https://github.com/charmbracelet/mods)
## Short-term roadmap
- [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)
- [ ] Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
- [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351) and [gpt4all](https://github.com/go-skynet/LocalAI/issues/85)
- [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351)
- [x] Multi-model support
- [x] Have a webUI!
- [x] Allow configuration of defaults for models.
- [x] Support for embeddings
- [x] Support for audio transcription with https://github.com/ggerganov/whisper.cpp
- [ ] GPU/CUDA support ( https://github.com/go-skynet/LocalAI/issues/69 )
- [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models, directly from the webui.
## Star history
[![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)
- [ ] Have a webUI!
- [ ] Allow configuration of defaults for models.
- [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models.
## License
LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
MIT
## Author
Ettore Di Giacinto and others
## Acknowledgements
LocalAI couldn't have been built without the help of great software already available from the community. Thank you!
- [llama.cpp](https://github.com/ggerganov/llama.cpp)
- https://github.com/tatsu-lab/stanford_alpaca
- https://github.com/cornelk/llama-go for the initial ideas
- https://github.com/antimatter15/alpaca.cpp
- https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/ggerganov/whisper.cpp
- https://github.com/saharNooby/rwkv.cpp
## Contributors
<a href="https://github.com/go-skynet/LocalAI/graphs/contributors">
<img src="https://contrib.rocks/image?repo=go-skynet/LocalAI" />
</a>
- https://github.com/antimatter15/alpaca.cpp for the light model version (this is compatible and tested only with that checkpoint model!)

View File

@@ -1,28 +1,400 @@
package api
import (
"encoding/json"
"errors"
"fmt"
"strings"
"sync"
model "github.com/go-skynet/LocalAI/pkg/model"
gpt2 "github.com/go-skynet/go-gpt2.cpp"
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/cors"
"github.com/gofiber/fiber/v2/middleware/logger"
"github.com/gofiber/fiber/v2/middleware/recover"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
)
func App(opts ...AppOption) (*fiber.App, error) {
options := newOptions(opts...)
// APIError provides error information returned by the OpenAI API.
type APIError struct {
Code any `json:"code,omitempty"`
Message string `json:"message"`
Param *string `json:"param,omitempty"`
Type string `json:"type"`
}
type ErrorResponse struct {
Error *APIError `json:"error,omitempty"`
}
type OpenAIResponse struct {
Created int `json:"created,omitempty"`
Object string `json:"chat.completion,omitempty"`
ID string `json:"id,omitempty"`
Model string `json:"model,omitempty"`
Choices []Choice `json:"choices,omitempty"`
}
type Choice struct {
Index int `json:"index,omitempty"`
FinishReason string `json:"finish_reason,omitempty"`
Message *Message `json:"message,omitempty"`
Text string `json:"text,omitempty"`
}
type Message struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
}
type OpenAIModel struct {
ID string `json:"id"`
Object string `json:"object"`
}
type OpenAIRequest struct {
Model string `json:"model"`
// Prompt is read only by completion API calls
Prompt string `json:"prompt"`
Stop string `json:"stop"`
// Messages is read only by chat/completion API calls
Messages []Message `json:"messages"`
Echo bool `json:"echo"`
// Common options between all the API calls
TopP float64 `json:"top_p"`
TopK int `json:"top_k"`
Temperature float64 `json:"temperature"`
Maxtokens int `json:"max_tokens"`
N int `json:"n"`
// Custom parameters - not present in the OpenAI API
Batch int `json:"batch"`
F16 bool `json:"f16kv"`
IgnoreEOS bool `json:"ignore_eos"`
RepeatPenalty float64 `json:"repeat_penalty"`
Keep int `json:"n_keep"`
Seed int `json:"seed"`
}
// https://platform.openai.com/docs/api-reference/completions
func openAIEndpoint(chat, debug bool, loader *model.ModelLoader, threads, ctx int, f16 bool, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
var err error
var model *llama.LLama
var gptModel *gptj.GPTJ
var gpt2Model *gpt2.GPT2
var stableLMModel *gpt2.StableLM
input := new(OpenAIRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}
modelFile := input.Model
received, _ := json.Marshal(input)
log.Debug().Msgf("Request received: %s", string(received))
// Set model from bearer token, if available
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
// If no model was specified, take the first available
if modelFile == "" {
models, _ := loader.ListModels()
if len(models) > 0 {
modelFile = models[0]
log.Debug().Msgf("No model specified, using: %s", modelFile)
}
}
// If no model is found or specified, we bail out
if modelFile == "" && !bearerExists {
return fmt.Errorf("no model specified")
}
// If a model is found in bearer token takes precedence
if bearerExists {
log.Debug().Msgf("Using model from bearer token: %s", bearer)
modelFile = bearer
}
// Try to load the model
var llamaerr, gpt2err, gptjerr, stableerr error
llamaOpts := []llama.ModelOption{}
if ctx != 0 {
llamaOpts = append(llamaOpts, llama.SetContext(ctx))
}
if f16 {
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
}
// TODO: this is ugly, better identifying the model somehow! however, it is a good stab for a first implementation..
model, llamaerr = loader.LoadLLaMAModel(modelFile, llamaOpts...)
if llamaerr != nil {
gptModel, gptjerr = loader.LoadGPTJModel(modelFile)
if gptjerr != nil {
gpt2Model, gpt2err = loader.LoadGPT2Model(modelFile)
if gpt2err != nil {
stableLMModel, stableerr = loader.LoadStableLMModel(modelFile)
if stableerr != nil {
return fmt.Errorf("llama: %s gpt: %s gpt2: %s stableLM: %s", llamaerr.Error(), gptjerr.Error(), gpt2err.Error(), stableerr.Error()) // llama failed first, so we want to catch both errors
}
}
}
}
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
mutexMap.Lock()
l, ok := mutexes[modelFile]
if !ok {
m := &sync.Mutex{}
mutexes[modelFile] = m
l = m
}
mutexMap.Unlock()
l.Lock()
defer l.Unlock()
// Set the parameters for the language model prediction
topP := input.TopP
if topP == 0 {
topP = 0.7
}
topK := input.TopK
if topK == 0 {
topK = 80
}
temperature := input.Temperature
if temperature == 0 {
temperature = 0.9
}
tokens := input.Maxtokens
if tokens == 0 {
tokens = 512
}
predInput := input.Prompt
if chat {
mess := []string{}
// TODO: encode roles
for _, i := range input.Messages {
mess = append(mess, i.Content)
}
predInput = strings.Join(mess, "\n")
}
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := loader.TemplatePrefix(modelFile, struct {
Input string
}{Input: predInput})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
}
result := []Choice{}
n := input.N
if input.N == 0 {
n = 1
}
var predFunc func() (string, error)
switch {
case stableLMModel != nil:
predFunc = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(temperature),
gpt2.SetTopP(topP),
gpt2.SetTopK(topK),
gpt2.SetTokens(tokens),
gpt2.SetThreads(threads),
}
if input.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
}
if input.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
}
return stableLMModel.Predict(
predInput,
predictOptions...,
)
}
case gpt2Model != nil:
predFunc = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gpt2.PredictOption{
gpt2.SetTemperature(temperature),
gpt2.SetTopP(topP),
gpt2.SetTopK(topK),
gpt2.SetTokens(tokens),
gpt2.SetThreads(threads),
}
if input.Batch != 0 {
predictOptions = append(predictOptions, gpt2.SetBatch(input.Batch))
}
if input.Seed != 0 {
predictOptions = append(predictOptions, gpt2.SetSeed(input.Seed))
}
return gpt2Model.Predict(
predInput,
predictOptions...,
)
}
case gptModel != nil:
predFunc = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []gptj.PredictOption{
gptj.SetTemperature(temperature),
gptj.SetTopP(topP),
gptj.SetTopK(topK),
gptj.SetTokens(tokens),
gptj.SetThreads(threads),
}
if input.Batch != 0 {
predictOptions = append(predictOptions, gptj.SetBatch(input.Batch))
}
if input.Seed != 0 {
predictOptions = append(predictOptions, gptj.SetSeed(input.Seed))
}
return gptModel.Predict(
predInput,
predictOptions...,
)
}
case model != nil:
predFunc = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []llama.PredictOption{
llama.SetTemperature(temperature),
llama.SetTopP(topP),
llama.SetTopK(topK),
llama.SetTokens(tokens),
llama.SetThreads(threads),
}
if debug {
predictOptions = append(predictOptions, llama.Debug)
}
if input.Stop != "" {
predictOptions = append(predictOptions, llama.SetStopWords(input.Stop))
}
if input.RepeatPenalty != 0 {
predictOptions = append(predictOptions, llama.SetPenalty(input.RepeatPenalty))
}
if input.Keep != 0 {
predictOptions = append(predictOptions, llama.SetNKeep(input.Keep))
}
if input.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(input.Batch))
}
if input.F16 {
predictOptions = append(predictOptions, llama.EnableF16KV)
}
if input.IgnoreEOS {
predictOptions = append(predictOptions, llama.IgnoreEOS)
}
if input.Seed != 0 {
predictOptions = append(predictOptions, llama.SetSeed(input.Seed))
}
return model.Predict(
predInput,
predictOptions...,
)
}
}
for i := 0; i < n; i++ {
prediction, err := predFunc()
if err != nil {
return err
}
if input.Echo {
prediction = predInput + prediction
}
if chat {
result = append(result, Choice{Message: &Message{Role: "assistant", Content: prediction}})
} else {
result = append(result, Choice{Text: prediction})
}
}
jsonResult, _ := json.Marshal(result)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: result,
})
}
}
func listModels(loader *model.ModelLoader) func(ctx *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
models, err := loader.ListModels()
if err != nil {
return err
}
dataModels := []OpenAIModel{}
for _, m := range models {
dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
}
return c.JSON(struct {
Object string `json:"object"`
Data []OpenAIModel `json:"data"`
}{
Object: "list",
Data: dataModels,
})
}
}
func App(loader *model.ModelLoader, threads, ctxSize int, f16 bool, debug, disableMessage bool) *fiber.App {
zerolog.SetGlobalLevel(zerolog.InfoLevel)
if options.debug {
if debug {
zerolog.SetGlobalLevel(zerolog.DebugLevel)
}
// Return errors as JSON responses
app := fiber.New(fiber.Config{
BodyLimit: options.uploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
DisableStartupMessage: options.disableMessage,
DisableStartupMessage: disableMessage,
// Override default error handler
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
// Status code defaults to 500
@@ -43,100 +415,23 @@ func App(opts ...AppOption) (*fiber.App, error) {
},
})
if options.debug {
app.Use(logger.New(logger.Config{
Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
}))
}
cm := NewConfigMerger()
if err := cm.LoadConfigs(options.loader.ModelPath); err != nil {
log.Error().Msgf("error loading config files: %s", err.Error())
}
if options.configFile != "" {
if err := cm.LoadConfigFile(options.configFile); err != nil {
log.Error().Msgf("error loading config file: %s", err.Error())
}
}
if options.debug {
for _, v := range cm.ListConfigs() {
cfg, _ := cm.GetConfig(v)
log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
}
}
// Default middleware config
app.Use(recover.New())
app.Use(cors.New())
if options.preloadJSONModels != "" {
if err := ApplyGalleryFromString(options.loader.ModelPath, options.preloadJSONModels, cm); err != nil {
return nil, err
}
}
if options.preloadModelsFromPath != "" {
if err := ApplyGalleryFromFile(options.loader.ModelPath, options.preloadModelsFromPath, cm); err != nil {
return nil, err
}
}
if options.cors {
if options.corsAllowOrigins == "" {
app.Use(cors.New())
} else {
app.Use(cors.New(cors.Config{
AllowOrigins: options.corsAllowOrigins,
}))
}
}
// LocalAI API endpoints
applier := newGalleryApplier(options.loader.ModelPath)
applier.start(options.context, cm)
app.Post("/models/apply", applyModelGallery(options.loader.ModelPath, cm, applier.C))
app.Get("/models/jobs/:uuid", getOpStatus(applier))
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
mu := map[string]*sync.Mutex{}
var mumutex = &sync.Mutex{}
// openAI compatible API endpoint
app.Post("/v1/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
app.Post("/chat/completions", openAIEndpoint(true, debug, loader, threads, ctxSize, f16, mumutex, mu))
// chat
app.Post("/v1/chat/completions", chatEndpoint(cm, options))
app.Post("/chat/completions", chatEndpoint(cm, options))
app.Post("/v1/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
app.Post("/completions", openAIEndpoint(false, debug, loader, threads, ctxSize, f16, mumutex, mu))
// edit
app.Post("/v1/edits", editEndpoint(cm, options))
app.Post("/edits", editEndpoint(cm, options))
app.Get("/v1/models", listModels(loader))
app.Get("/models", listModels(loader))
// completion
app.Post("/v1/completions", completionEndpoint(cm, options))
app.Post("/completions", completionEndpoint(cm, options))
// embeddings
app.Post("/v1/embeddings", embeddingsEndpoint(cm, options))
app.Post("/embeddings", embeddingsEndpoint(cm, options))
app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, options))
// audio
app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, options))
// images
app.Post("/v1/images/generations", imageEndpoint(cm, options))
if options.imageDir != "" {
app.Static("/generated-images", options.imageDir)
}
ok := func(c *fiber.Ctx) error {
return c.SendStatus(200)
}
// Kubernetes health checks
app.Get("/healthz", ok)
app.Get("/readyz", ok)
// models
app.Get("/v1/models", listModels(options.loader, cm))
app.Get("/models", listModels(options.loader, cm))
return app, nil
return app
}

View File

@@ -1,215 +1,32 @@
package api_test
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"runtime"
. "github.com/go-skynet/LocalAI/api"
"github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"gopkg.in/yaml.v3"
openaigo "github.com/otiai10/openaigo"
"github.com/sashabaranov/go-openai"
)
type modelApplyRequest struct {
URL string `json:"url"`
Name string `json:"name"`
Overrides map[string]string `json:"overrides"`
}
func getModelStatus(url string) (response map[string]interface{}) {
// Create the HTTP request
resp, err := http.Get(url)
if err != nil {
fmt.Println("Error creating request:", err)
return
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Println("Error reading response body:", err)
return
}
// Unmarshal the response into a map[string]interface{}
err = json.Unmarshal(body, &response)
if err != nil {
fmt.Println("Error unmarshaling JSON response:", err)
return
}
return
}
func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) {
//url := "http://localhost:AI/models/apply"
// Create the request payload
payload, err := json.Marshal(request)
if err != nil {
fmt.Println("Error marshaling JSON:", err)
return
}
// Create the HTTP request
req, err := http.NewRequest("POST", url, bytes.NewBuffer(payload))
if err != nil {
fmt.Println("Error creating request:", err)
return
}
req.Header.Set("Content-Type", "application/json")
// Make the request
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
fmt.Println("Error making request:", err)
return
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Println("Error reading response body:", err)
return
}
// Unmarshal the response into a map[string]interface{}
err = json.Unmarshal(body, &response)
if err != nil {
fmt.Println("Error unmarshaling JSON response:", err)
return
}
return
}
var _ = Describe("API test", func() {
var app *fiber.App
var modelLoader *model.ModelLoader
var client *openai.Client
var client2 *openaigo.Client
var c context.Context
var cancel context.CancelFunc
var tmpdir string
Context("API with ephemeral models", func() {
BeforeEach(func() {
var err error
tmpdir, err = os.MkdirTemp("", "")
Expect(err).ToNot(HaveOccurred())
modelLoader = model.NewModelLoader(tmpdir)
c, cancel = context.WithCancel(context.Background())
app, err = App(WithContext(c), WithModelLoader(modelLoader))
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
client2 = openaigo.NewClient("")
client2.BaseURL = defaultConfig.BaseURL
// Wait for API to be ready
client = openai.NewClientWithConfig(defaultConfig)
Eventually(func() error {
_, err := client.ListModels(context.TODO())
return err
}, "2m").ShouldNot(HaveOccurred())
})
AfterEach(func() {
cancel()
app.Shutdown()
os.RemoveAll(tmpdir)
})
Context("Applying models", func() {
It("overrides models", func() {
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
Name: "bert",
Overrides: map[string]string{
"backend": "llama",
},
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
uuid := response["uuid"].(string)
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
fmt.Println(response)
return response["processed"].(bool)
}, "360s").Should(Equal(true))
dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
Expect(err).ToNot(HaveOccurred())
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["backend"]).To(Equal("llama"))
})
It("apply models without overrides", func() {
response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
Name: "bert",
Overrides: map[string]string{},
})
Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
uuid := response["uuid"].(string)
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
fmt.Println(response)
return response["processed"].(bool)
}, "360s").Should(Equal(true))
dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
Expect(err).ToNot(HaveOccurred())
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["backend"]).To(Equal("bert-embeddings"))
})
})
})
Context("API query", func() {
BeforeEach(func() {
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
c, cancel = context.WithCancel(context.Background())
var err error
app, err = App(WithContext(c), WithModelLoader(modelLoader))
Expect(err).ToNot(HaveOccurred())
app = App(modelLoader, 1, 512, false, false, true)
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
client2 = openaigo.NewClient("")
client2.BaseURL = defaultConfig.BaseURL
// Wait for API to be ready
client = openai.NewClientWithConfig(defaultConfig)
Eventually(func() error {
@@ -218,13 +35,13 @@ var _ = Describe("API test", func() {
}, "2m").ShouldNot(HaveOccurred())
})
AfterEach(func() {
cancel()
app.Shutdown()
})
It("returns the models list", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(10))
Expect(len(models.Models)).To(Equal(1))
Expect(models.Models[0].ID).To(Equal("testmodel"))
})
It("can generate completions", func() {
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
@@ -232,141 +49,10 @@ var _ = Describe("API test", func() {
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
It("can generate chat completions ", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "testmodel", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate completions from model configs", func() {
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: "abcdedfghikl"})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
It("can generate chat completions from model configs", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("returns errors", func() {
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 12 errors occurred:"))
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: llama: model does not exist"))
})
It("transcribes audio", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateTranscription(
context.Background(),
openai.AudioRequest{
Model: openai.Whisper1,
FilePath: filepath.Join(os.Getenv("TEST_DIR"), "audio.wav"),
},
)
Expect(err).ToNot(HaveOccurred())
Expect(resp.Text).To(ContainSubstring("This is the Micro Machine Man presenting"))
})
It("calculate embeddings", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaEmbeddingV2,
Input: []string{"sun", "cat"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
sunEmbedding := resp.Data[0].Embedding
resp2, err := client.CreateEmbeddings(
context.Background(),
openai.EmbeddingRequest{
Model: openai.AdaEmbeddingV2,
Input: []string{"sun"},
},
)
Expect(err).ToNot(HaveOccurred())
Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
})
Context("backends", func() {
It("runs rwkv", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
}
resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices) > 0).To(BeTrue())
Expect(resp.Choices[0].Text).To(Equal(" five."))
})
})
})
Context("Config file", func() {
BeforeEach(func() {
modelLoader = model.NewModelLoader(os.Getenv("MODELS_PATH"))
c, cancel = context.WithCancel(context.Background())
var err error
app, err = App(WithContext(c), WithModelLoader(modelLoader), WithConfigFile(os.Getenv("CONFIG_FILE")))
Expect(err).ToNot(HaveOccurred())
go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
client2 = openaigo.NewClient("")
client2.BaseURL = defaultConfig.BaseURL
// Wait for API to be ready
client = openai.NewClientWithConfig(defaultConfig)
Eventually(func() error {
_, err := client.ListModels(context.TODO())
return err
}, "2m").ShouldNot(HaveOccurred())
})
AfterEach(func() {
cancel()
app.Shutdown()
})
It("can generate chat completions from config file", func() {
models, err := client.ListModels(context.TODO())
Expect(err).ToNot(HaveOccurred())
Expect(len(models.Models)).To(Equal(12))
})
It("can generate chat completions from config file", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate chat completions from config file", func() {
resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
})
It("can generate edit completions from config file", func() {
request := openaigo.EditCreateRequestBody{
Model: "list2",
Instruction: "foo",
Input: "bar",
}
resp, err := client2.CreateEdit(context.Background(), request)
Expect(err).ToNot(HaveOccurred())
Expect(len(resp.Choices)).To(Equal(1))
Expect(resp.Choices[0].Text).ToNot(BeEmpty())
})
})
})

View File

@@ -1,333 +0,0 @@
package api
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"sync"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v3"
)
type Config struct {
OpenAIRequest `yaml:"parameters"`
Name string `yaml:"name"`
StopWords []string `yaml:"stopwords"`
Cutstrings []string `yaml:"cutstrings"`
TrimSpace []string `yaml:"trimspace"`
ContextSize int `yaml:"context_size"`
F16 bool `yaml:"f16"`
Threads int `yaml:"threads"`
Debug bool `yaml:"debug"`
Roles map[string]string `yaml:"roles"`
Embeddings bool `yaml:"embeddings"`
Backend string `yaml:"backend"`
TemplateConfig TemplateConfig `yaml:"template"`
MirostatETA float64 `yaml:"mirostat_eta"`
MirostatTAU float64 `yaml:"mirostat_tau"`
Mirostat int `yaml:"mirostat"`
NGPULayers int `yaml:"gpu_layers"`
ImageGenerationAssets string `yaml:"asset_dir"`
PromptCachePath string `yaml:"prompt_cache_path"`
PromptCacheAll bool `yaml:"prompt_cache_all"`
PromptStrings, InputStrings []string
InputToken [][]int
}
type TemplateConfig struct {
Completion string `yaml:"completion"`
Chat string `yaml:"chat"`
Edit string `yaml:"edit"`
}
type ConfigMerger struct {
configs map[string]Config
sync.Mutex
}
func NewConfigMerger() *ConfigMerger {
return &ConfigMerger{
configs: make(map[string]Config),
}
}
func ReadConfigFile(file string) ([]*Config, error) {
c := &[]*Config{}
f, err := os.ReadFile(file)
if err != nil {
return nil, fmt.Errorf("cannot read config file: %w", err)
}
if err := yaml.Unmarshal(f, c); err != nil {
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
}
return *c, nil
}
func ReadConfig(file string) (*Config, error) {
c := &Config{}
f, err := os.ReadFile(file)
if err != nil {
return nil, fmt.Errorf("cannot read config file: %w", err)
}
if err := yaml.Unmarshal(f, c); err != nil {
return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
}
return c, nil
}
func (cm ConfigMerger) LoadConfigFile(file string) error {
cm.Lock()
defer cm.Unlock()
c, err := ReadConfigFile(file)
if err != nil {
return fmt.Errorf("cannot load config file: %w", err)
}
for _, cc := range c {
cm.configs[cc.Name] = *cc
}
return nil
}
func (cm ConfigMerger) LoadConfig(file string) error {
cm.Lock()
defer cm.Unlock()
c, err := ReadConfig(file)
if err != nil {
return fmt.Errorf("cannot read config file: %w", err)
}
cm.configs[c.Name] = *c
return nil
}
func (cm ConfigMerger) GetConfig(m string) (Config, bool) {
cm.Lock()
defer cm.Unlock()
v, exists := cm.configs[m]
return v, exists
}
func (cm ConfigMerger) ListConfigs() []string {
cm.Lock()
defer cm.Unlock()
var res []string
for k := range cm.configs {
res = append(res, k)
}
return res
}
func (cm ConfigMerger) LoadConfigs(path string) error {
cm.Lock()
defer cm.Unlock()
files, err := ioutil.ReadDir(path)
if err != nil {
return err
}
for _, file := range files {
// Skip templates, YAML and .keep files
if !strings.Contains(file.Name(), ".yaml") {
continue
}
c, err := ReadConfig(filepath.Join(path, file.Name()))
if err == nil {
cm.configs[c.Name] = *c
}
}
return nil
}
func updateConfig(config *Config, input *OpenAIRequest) {
if input.Echo {
config.Echo = input.Echo
}
if input.TopK != 0 {
config.TopK = input.TopK
}
if input.TopP != 0 {
config.TopP = input.TopP
}
if input.Temperature != 0 {
config.Temperature = input.Temperature
}
if input.Maxtokens != 0 {
config.Maxtokens = input.Maxtokens
}
switch stop := input.Stop.(type) {
case string:
if stop != "" {
config.StopWords = append(config.StopWords, stop)
}
case []interface{}:
for _, pp := range stop {
if s, ok := pp.(string); ok {
config.StopWords = append(config.StopWords, s)
}
}
}
if input.RepeatPenalty != 0 {
config.RepeatPenalty = input.RepeatPenalty
}
if input.Keep != 0 {
config.Keep = input.Keep
}
if input.Batch != 0 {
config.Batch = input.Batch
}
if input.F16 {
config.F16 = input.F16
}
if input.IgnoreEOS {
config.IgnoreEOS = input.IgnoreEOS
}
if input.Seed != 0 {
config.Seed = input.Seed
}
if input.Mirostat != 0 {
config.Mirostat = input.Mirostat
}
if input.MirostatETA != 0 {
config.MirostatETA = input.MirostatETA
}
if input.MirostatTAU != 0 {
config.MirostatTAU = input.MirostatTAU
}
switch inputs := input.Input.(type) {
case string:
if inputs != "" {
config.InputStrings = append(config.InputStrings, inputs)
}
case []interface{}:
for _, pp := range inputs {
switch i := pp.(type) {
case string:
config.InputStrings = append(config.InputStrings, i)
case []interface{}:
tokens := []int{}
for _, ii := range i {
tokens = append(tokens, int(ii.(float64)))
}
config.InputToken = append(config.InputToken, tokens)
}
}
}
switch p := input.Prompt.(type) {
case string:
config.PromptStrings = append(config.PromptStrings, p)
case []interface{}:
for _, pp := range p {
if s, ok := pp.(string); ok {
config.PromptStrings = append(config.PromptStrings, s)
}
}
}
}
func readInput(c *fiber.Ctx, loader *model.ModelLoader, randomModel bool) (string, *OpenAIRequest, error) {
input := new(OpenAIRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return "", nil, err
}
modelFile := input.Model
if c.Params("model") != "" {
modelFile = c.Params("model")
}
received, _ := json.Marshal(input)
log.Debug().Msgf("Request received: %s", string(received))
// Set model from bearer token, if available
bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
// If no model was specified, take the first available
if modelFile == "" && !bearerExists && randomModel {
models, _ := loader.ListModels()
if len(models) > 0 {
modelFile = models[0]
log.Debug().Msgf("No model specified, using: %s", modelFile)
} else {
log.Debug().Msgf("No model specified, returning error")
return "", nil, fmt.Errorf("no model specified")
}
}
// If a model is found in bearer token takes precedence
if bearerExists {
log.Debug().Msgf("Using model from bearer token: %s", bearer)
modelFile = bearer
}
return modelFile, input, nil
}
func readConfig(modelFile string, input *OpenAIRequest, cm *ConfigMerger, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
// Load a config file if present after the model name
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
if _, err := os.Stat(modelConfig); err == nil {
if err := cm.LoadConfig(modelConfig); err != nil {
return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
}
}
var config *Config
cfg, exists := cm.GetConfig(modelFile)
if !exists {
config = &Config{
OpenAIRequest: defaultRequest(modelFile),
ContextSize: ctx,
Threads: threads,
F16: f16,
Debug: debug,
}
} else {
config = &cfg
}
// Set the parameters for the language model prediction
updateConfig(config, input)
// Don't allow 0 as setting
if config.Threads == 0 {
if threads != 0 {
config.Threads = threads
} else {
config.Threads = 4
}
}
// Enforce debug flag if passed from CLI
if debug {
config.Debug = true
}
return config, input, nil
}

View File

@@ -1,27 +0,0 @@
package api
import (
"os"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("Test cases for config related functions", func() {
var (
configFile string
)
Context("Test Read configuration functions", func() {
configFile = os.Getenv("CONFIG_FILE")
It("Test ReadConfigFile", func() {
config, err := ReadConfigFile(configFile)
Expect(err).To(BeNil())
Expect(config).ToNot(BeNil())
// two configs in config.yaml
Expect(len(config)).To(Equal(2))
})
})
})

View File

@@ -1,233 +0,0 @@
package api
import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"os"
"strings"
"sync"
"github.com/go-skynet/LocalAI/pkg/gallery"
"github.com/gofiber/fiber/v2"
"github.com/google/uuid"
"gopkg.in/yaml.v3"
)
type galleryOp struct {
req ApplyGalleryModelRequest
id string
}
type galleryOpStatus struct {
Error error `json:"error"`
Processed bool `json:"processed"`
Message string `json:"message"`
}
type galleryApplier struct {
modelPath string
sync.Mutex
C chan galleryOp
statuses map[string]*galleryOpStatus
}
func newGalleryApplier(modelPath string) *galleryApplier {
return &galleryApplier{
modelPath: modelPath,
C: make(chan galleryOp),
statuses: make(map[string]*galleryOpStatus),
}
}
func applyGallery(modelPath string, req ApplyGalleryModelRequest, cm *ConfigMerger) error {
url, err := req.DecodeURL()
if err != nil {
return err
}
// Send a GET request to the URL
response, err := http.Get(url)
if err != nil {
return err
}
defer response.Body.Close()
// Read the response body
body, err := ioutil.ReadAll(response.Body)
if err != nil {
return err
}
// Unmarshal YAML data into a Config struct
var config gallery.Config
err = yaml.Unmarshal(body, &config)
if err != nil {
return err
}
config.Files = append(config.Files, req.AdditionalFiles...)
if err := gallery.Apply(modelPath, req.Name, &config, req.Overrides); err != nil {
return err
}
// Reload models
return cm.LoadConfigs(modelPath)
}
func (g *galleryApplier) updatestatus(s string, op *galleryOpStatus) {
g.Lock()
defer g.Unlock()
g.statuses[s] = op
}
func (g *galleryApplier) getstatus(s string) *galleryOpStatus {
g.Lock()
defer g.Unlock()
return g.statuses[s]
}
func (g *galleryApplier) start(c context.Context, cm *ConfigMerger) {
go func() {
for {
select {
case <-c.Done():
return
case op := <-g.C:
g.updatestatus(op.id, &galleryOpStatus{Message: "processing"})
updateError := func(e error) {
g.updatestatus(op.id, &galleryOpStatus{Error: e, Processed: true})
}
if err := applyGallery(g.modelPath, op.req, cm); err != nil {
updateError(err)
continue
}
g.updatestatus(op.id, &galleryOpStatus{Processed: true, Message: "completed"})
}
}
}()
}
func ApplyGalleryFromFile(modelPath, s string, cm *ConfigMerger) error {
dat, err := os.ReadFile(s)
if err != nil {
return err
}
var requests []ApplyGalleryModelRequest
err = json.Unmarshal(dat, &requests)
if err != nil {
return err
}
for _, r := range requests {
if err := applyGallery(modelPath, r, cm); err != nil {
return err
}
}
return nil
}
func ApplyGalleryFromString(modelPath, s string, cm *ConfigMerger) error {
var requests []ApplyGalleryModelRequest
err := json.Unmarshal([]byte(s), &requests)
if err != nil {
return err
}
for _, r := range requests {
if err := applyGallery(modelPath, r, cm); err != nil {
return err
}
}
return nil
}
// endpoints
type ApplyGalleryModelRequest struct {
URL string `json:"url"`
Name string `json:"name"`
Overrides map[string]interface{} `json:"overrides"`
AdditionalFiles []gallery.File `json:"files"`
}
const (
githubURI = "github:"
)
func (request ApplyGalleryModelRequest) DecodeURL() (string, error) {
input := request.URL
var rawURL string
if strings.HasPrefix(input, githubURI) {
parts := strings.Split(input, ":")
repoParts := strings.Split(parts[1], "@")
branch := "main"
if len(repoParts) > 1 {
branch = repoParts[1]
}
repoPath := strings.Split(repoParts[0], "/")
org := repoPath[0]
project := repoPath[1]
projectPath := strings.Join(repoPath[2:], "/")
rawURL = fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
} else if strings.HasPrefix(input, "http://") || strings.HasPrefix(input, "https://") {
// Handle regular URLs
u, err := url.Parse(input)
if err != nil {
return "", fmt.Errorf("invalid URL: %w", err)
}
rawURL = u.String()
} else {
return "", fmt.Errorf("invalid URL format")
}
return rawURL, nil
}
func getOpStatus(g *galleryApplier) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
status := g.getstatus(c.Params("uuid"))
if status == nil {
return fmt.Errorf("could not find any status for ID")
}
return c.JSON(status)
}
}
func applyModelGallery(modelPath string, cm *ConfigMerger, g chan galleryOp) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
input := new(ApplyGalleryModelRequest)
// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}
uuid, err := uuid.NewUUID()
if err != nil {
return err
}
g <- galleryOp{
req: *input,
id: uuid.String(),
}
return c.JSON(struct {
ID string `json:"uuid"`
StatusURL string `json:"status"`
}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
}
}

View File

@@ -1,30 +0,0 @@
package api_test
import (
. "github.com/go-skynet/LocalAI/api"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("Gallery API tests", func() {
Context("requests", func() {
It("parses github with a branch", func() {
req := ApplyGalleryModelRequest{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
str, err := req.DecodeURL()
Expect(err).ToNot(HaveOccurred())
Expect(str).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
})
It("parses github without a branch", func() {
req := ApplyGalleryModelRequest{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml"}
str, err := req.DecodeURL()
Expect(err).ToNot(HaveOccurred())
Expect(str).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
})
It("parses URLS", func() {
req := ApplyGalleryModelRequest{URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"}
str, err := req.DecodeURL()
Expect(err).ToNot(HaveOccurred())
Expect(str).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
})
})
})

View File

@@ -1,678 +0,0 @@
package api
import (
"bufio"
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
model "github.com/go-skynet/LocalAI/pkg/model"
whisperutil "github.com/go-skynet/LocalAI/pkg/whisper"
llama "github.com/go-skynet/go-llama.cpp"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
"github.com/valyala/fasthttp"
)
// APIError provides error information returned by the OpenAI API.
type APIError struct {
Code any `json:"code,omitempty"`
Message string `json:"message"`
Param *string `json:"param,omitempty"`
Type string `json:"type"`
}
type ErrorResponse struct {
Error *APIError `json:"error,omitempty"`
}
type OpenAIUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
type Item struct {
Embedding []float32 `json:"embedding"`
Index int `json:"index"`
Object string `json:"object,omitempty"`
// Images
URL string `json:"url,omitempty"`
B64JSON string `json:"b64_json,omitempty"`
}
type OpenAIResponse struct {
Created int `json:"created,omitempty"`
Object string `json:"object,omitempty"`
ID string `json:"id,omitempty"`
Model string `json:"model,omitempty"`
Choices []Choice `json:"choices,omitempty"`
Data []Item `json:"data,omitempty"`
Usage OpenAIUsage `json:"usage"`
}
type Choice struct {
Index int `json:"index,omitempty"`
FinishReason string `json:"finish_reason,omitempty"`
Message *Message `json:"message,omitempty"`
Delta *Message `json:"delta,omitempty"`
Text string `json:"text,omitempty"`
}
type Message struct {
Role string `json:"role,omitempty" yaml:"role"`
Content string `json:"content,omitempty" yaml:"content"`
}
type OpenAIModel struct {
ID string `json:"id"`
Object string `json:"object"`
}
type OpenAIRequest struct {
Model string `json:"model" yaml:"model"`
// whisper
File string `json:"file" validate:"required"`
Language string `json:"language"`
//whisper/image
ResponseFormat string `json:"response_format"`
// image
Size string `json:"size"`
// Prompt is read only by completion/image API calls
Prompt interface{} `json:"prompt" yaml:"prompt"`
// Edit endpoint
Instruction string `json:"instruction" yaml:"instruction"`
Input interface{} `json:"input" yaml:"input"`
Stop interface{} `json:"stop" yaml:"stop"`
// Messages is read only by chat/completion API calls
Messages []Message `json:"messages" yaml:"messages"`
Stream bool `json:"stream"`
Echo bool `json:"echo"`
// Common options between all the API calls
TopP float64 `json:"top_p" yaml:"top_p"`
TopK int `json:"top_k" yaml:"top_k"`
Temperature float64 `json:"temperature" yaml:"temperature"`
Maxtokens int `json:"max_tokens" yaml:"max_tokens"`
N int `json:"n"`
// Custom parameters - not present in the OpenAI API
Batch int `json:"batch" yaml:"batch"`
F16 bool `json:"f16" yaml:"f16"`
IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"`
RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
Keep int `json:"n_keep" yaml:"n_keep"`
MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"`
MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"`
Mirostat int `json:"mirostat" yaml:"mirostat"`
Seed int `json:"seed" yaml:"seed"`
// Image (not supported by OpenAI)
Mode int `json:"mode"`
Step int `json:"step"`
}
func defaultRequest(modelFile string) OpenAIRequest {
return OpenAIRequest{
TopP: 0.7,
TopK: 80,
Maxtokens: 512,
Temperature: 0.9,
Model: modelFile,
}
}
// https://platform.openai.com/docs/api-reference/completions
func completionEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, o.loader, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Parameter Config: %+v", config)
templateFile := config.Model
if config.TemplateConfig.Completion != "" {
templateFile = config.TemplateConfig.Completion
}
var result []Choice
for _, i := range config.PromptStrings {
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
Input string
}{Input: i})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
r, err := ComputeChoices(i, input, config, o.loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Text: s})
}, nil)
if err != nil {
return err
}
result = append(result, r...)
}
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: result,
Object: "text_completion",
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}
// https://platform.openai.com/docs/api-reference/embeddings
func embeddingsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, o.loader, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Parameter Config: %+v", config)
items := []Item{}
for i, s := range config.InputToken {
// get the model function to call for the result
embedFn, err := ModelEmbedding("", s, o.loader, *config)
if err != nil {
return err
}
embeddings, err := embedFn()
if err != nil {
return err
}
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
}
for i, s := range config.InputStrings {
// get the model function to call for the result
embedFn, err := ModelEmbedding(s, []int{}, o.loader, *config)
if err != nil {
return err
}
embeddings, err := embedFn()
if err != nil {
return err
}
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
}
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Data: items,
Object: "list",
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}
func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
initialMessage := OpenAIResponse{
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{Delta: &Message{Role: "assistant"}}},
Object: "chat.completion.chunk",
}
responses <- initialMessage
ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
resp := OpenAIResponse{
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{Delta: &Message{Content: s}}},
Object: "chat.completion.chunk",
}
log.Debug().Msgf("Sending goroutine: %s", s)
responses <- resp
return true
})
close(responses)
}
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, o.loader, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Parameter Config: %+v", config)
var predInput string
mess := []string{}
for _, i := range input.Messages {
var content string
r := config.Roles[i.Role]
if r != "" {
content = fmt.Sprint(r, " ", i.Content)
} else {
content = i.Content
}
mess = append(mess, content)
}
predInput = strings.Join(mess, "\n")
if input.Stream {
log.Debug().Msgf("Stream request received")
c.Context().SetContentType("text/event-stream")
//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
// c.Set("Content-Type", "text/event-stream")
c.Set("Cache-Control", "no-cache")
c.Set("Connection", "keep-alive")
c.Set("Transfer-Encoding", "chunked")
}
templateFile := config.Model
if config.TemplateConfig.Chat != "" {
templateFile = config.TemplateConfig.Chat
}
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
Input string
}{Input: predInput})
if err == nil {
predInput = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", predInput)
}
if input.Stream {
responses := make(chan OpenAIResponse)
go process(predInput, input, config, o.loader, responses)
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
for ev := range responses {
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
enc.Encode(ev)
log.Debug().Msgf("Sending chunk: %s", buf.String())
fmt.Fprintf(w, "data: %v\n", buf.String())
w.Flush()
}
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []Choice{{FinishReason: "stop"}},
}
respData, _ := json.Marshal(resp)
w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
w.WriteString("data: [DONE]\n\n")
w.Flush()
}))
return nil
}
result, err := ComputeChoices(predInput, input, config, o.loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
}, nil)
if err != nil {
return err
}
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: result,
Object: "chat.completion",
}
respData, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", respData)
// Return the prediction in the response body
return c.JSON(resp)
}
}
func editEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
model, input, err := readInput(c, o.loader, true)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Parameter Config: %+v", config)
templateFile := config.Model
if config.TemplateConfig.Edit != "" {
templateFile = config.TemplateConfig.Edit
}
var result []Choice
for _, i := range config.InputStrings {
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
Input string
Instruction string
}{Input: i})
if err == nil {
i = templatedInput
log.Debug().Msgf("Template found, input modified to: %s", i)
}
r, err := ComputeChoices(i, input, config, o.loader, func(s string, c *[]Choice) {
*c = append(*c, Choice{Text: s})
}, nil)
if err != nil {
return err
}
result = append(result, r...)
}
resp := &OpenAIResponse{
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: result,
Object: "edit",
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}
// https://platform.openai.com/docs/api-reference/images/create
/*
*
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "A cute baby sea otter",
"n": 1,
"size": "512x512"
}'
*
*/
func imageEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
m, input, err := readInput(c, o.loader, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
if m == "" {
m = model.StableDiffusionBackend
}
log.Debug().Msgf("Loading model: %+v", m)
config, input, err := readConfig(m, input, cm, o.loader, o.debug, 0, 0, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
log.Debug().Msgf("Parameter Config: %+v", config)
// XXX: Only stablediffusion is supported for now
if config.Backend == "" {
config.Backend = model.StableDiffusionBackend
}
sizeParts := strings.Split(input.Size, "x")
if len(sizeParts) != 2 {
return fmt.Errorf("Invalid value for 'size'")
}
width, err := strconv.Atoi(sizeParts[0])
if err != nil {
return fmt.Errorf("Invalid value for 'size'")
}
height, err := strconv.Atoi(sizeParts[1])
if err != nil {
return fmt.Errorf("Invalid value for 'size'")
}
b64JSON := false
if input.ResponseFormat == "b64_json" {
b64JSON = true
}
var result []Item
for _, i := range config.PromptStrings {
n := input.N
if input.N == 0 {
n = 1
}
for j := 0; j < n; j++ {
prompts := strings.Split(i, "|")
positive_prompt := prompts[0]
negative_prompt := ""
if len(prompts) > 1 {
negative_prompt = prompts[1]
}
mode := 0
step := 15
if input.Mode != 0 {
mode = input.Mode
}
if input.Step != 0 {
step = input.Step
}
tempDir := ""
if !b64JSON {
tempDir = o.imageDir
}
// Create a temporary file
outputFile, err := ioutil.TempFile(tempDir, "b64")
if err != nil {
return err
}
outputFile.Close()
output := outputFile.Name() + ".png"
// Rename the temporary file
err = os.Rename(outputFile.Name(), output)
if err != nil {
return err
}
baseURL := c.BaseURL()
fn, err := ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, o.loader, *config)
if err != nil {
return err
}
if err := fn(); err != nil {
return err
}
item := &Item{}
if b64JSON {
defer os.RemoveAll(output)
data, err := os.ReadFile(output)
if err != nil {
return err
}
item.B64JSON = base64.StdEncoding.EncodeToString(data)
} else {
base := filepath.Base(output)
item.URL = baseURL + "/generated-images/" + base
}
result = append(result, *item)
}
}
resp := &OpenAIResponse{
Data: result,
}
jsonResult, _ := json.Marshal(resp)
log.Debug().Msgf("Response: %s", jsonResult)
// Return the prediction in the response body
return c.JSON(resp)
}
}
// https://platform.openai.com/docs/api-reference/audio/create
func transcriptEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
m, input, err := readInput(c, o.loader, false)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
config, input, err := readConfig(m, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
if err != nil {
return fmt.Errorf("failed reading parameters from request:%w", err)
}
// retrieve the file data from the request
file, err := c.FormFile("file")
if err != nil {
return err
}
f, err := file.Open()
if err != nil {
return err
}
defer f.Close()
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return err
}
defer os.RemoveAll(dir)
dst := filepath.Join(dir, path.Base(file.Filename))
dstFile, err := os.Create(dst)
if err != nil {
return err
}
if _, err := io.Copy(dstFile, f); err != nil {
log.Debug().Msgf("Audio file copying error %+v - %+v - err %+v", file.Filename, dst, err)
return err
}
log.Debug().Msgf("Audio file copied to: %+v", dst)
whisperModel, err := o.loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads))
if err != nil {
return err
}
if whisperModel == nil {
return fmt.Errorf("could not load whisper model")
}
w, ok := whisperModel.(whisper.Model)
if !ok {
return fmt.Errorf("loader returned non-whisper object")
}
tr, err := whisperutil.Transcript(w, dst, input.Language, uint(config.Threads))
if err != nil {
return err
}
log.Debug().Msgf("Trascribed: %+v", tr)
// TODO: handle different outputs here
return c.Status(http.StatusOK).JSON(fiber.Map{"text": tr})
}
}
func listModels(loader *model.ModelLoader, cm *ConfigMerger) func(ctx *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
models, err := loader.ListModels()
if err != nil {
return err
}
var mm map[string]interface{} = map[string]interface{}{}
dataModels := []OpenAIModel{}
for _, m := range models {
mm[m] = nil
dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
}
for _, k := range cm.ListConfigs() {
if _, exists := mm[k]; !exists {
dataModels = append(dataModels, OpenAIModel{ID: k, Object: "model"})
}
}
return c.JSON(struct {
Object string `json:"object"`
Data []OpenAIModel `json:"data"`
}{
Object: "list",
Data: dataModels,
})
}
}

View File

@@ -1,121 +0,0 @@
package api
import (
"context"
model "github.com/go-skynet/LocalAI/pkg/model"
)
type Option struct {
context context.Context
configFile string
loader *model.ModelLoader
uploadLimitMB, threads, ctxSize int
f16 bool
debug, disableMessage bool
imageDir string
cors bool
preloadJSONModels string
preloadModelsFromPath string
corsAllowOrigins string
}
type AppOption func(*Option)
func newOptions(o ...AppOption) *Option {
opt := &Option{
context: context.Background(),
uploadLimitMB: 15,
threads: 1,
ctxSize: 512,
debug: true,
disableMessage: true,
}
for _, oo := range o {
oo(opt)
}
return opt
}
func WithCors(b bool) AppOption {
return func(o *Option) {
o.cors = b
}
}
func WithCorsAllowOrigins(b string) AppOption {
return func(o *Option) {
o.corsAllowOrigins = b
}
}
func WithContext(ctx context.Context) AppOption {
return func(o *Option) {
o.context = ctx
}
}
func WithYAMLConfigPreload(configFile string) AppOption {
return func(o *Option) {
o.preloadModelsFromPath = configFile
}
}
func WithJSONStringPreload(configFile string) AppOption {
return func(o *Option) {
o.preloadJSONModels = configFile
}
}
func WithConfigFile(configFile string) AppOption {
return func(o *Option) {
o.configFile = configFile
}
}
func WithModelLoader(loader *model.ModelLoader) AppOption {
return func(o *Option) {
o.loader = loader
}
}
func WithUploadLimitMB(limit int) AppOption {
return func(o *Option) {
o.uploadLimitMB = limit
}
}
func WithThreads(threads int) AppOption {
return func(o *Option) {
o.threads = threads
}
}
func WithContextSize(ctxSize int) AppOption {
return func(o *Option) {
o.ctxSize = ctxSize
}
}
func WithF16(f16 bool) AppOption {
return func(o *Option) {
o.f16 = f16
}
}
func WithDebug(debug bool) AppOption {
return func(o *Option) {
o.debug = debug
}
}
func WithDisableMessage(disableMessage bool) AppOption {
return func(o *Option) {
o.disableMessage = disableMessage
}
}
func WithImageDir(imageDir string) AppOption {
return func(o *Option) {
o.imageDir = imageDir
}
}

View File

@@ -1,574 +0,0 @@
package api
import (
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"sync"
"github.com/donomii/go-rwkv.cpp"
model "github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/stablediffusion"
"github.com/go-skynet/bloomz.cpp"
bert "github.com/go-skynet/go-bert.cpp"
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
llama "github.com/go-skynet/go-llama.cpp"
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
)
// mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
var mutexMap sync.Mutex
var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
func defaultLLamaOpts(c Config) []llama.ModelOption {
llamaOpts := []llama.ModelOption{}
if c.ContextSize != 0 {
llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize))
}
if c.F16 {
llamaOpts = append(llamaOpts, llama.EnableF16Memory)
}
if c.Embeddings {
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
}
if c.NGPULayers != 0 {
llamaOpts = append(llamaOpts, llama.SetGPULayers(c.NGPULayers))
}
return llamaOpts
}
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c Config) (func() error, error) {
if c.Backend != model.StableDiffusionBackend {
return nil, fmt.Errorf("endpoint only working with stablediffusion models")
}
inferenceModel, err := loader.BackendLoader(c.Backend, c.ImageGenerationAssets, []llama.ModelOption{}, uint32(c.Threads))
if err != nil {
return nil, err
}
var fn func() error
switch model := inferenceModel.(type) {
case *stablediffusion.StableDiffusion:
fn = func() error {
return model.GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst)
}
default:
fn = func() error {
return fmt.Errorf("creation of images not supported by the backend")
}
}
return func() error {
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
mutexMap.Lock()
l, ok := mutexes[c.Backend]
if !ok {
m := &sync.Mutex{}
mutexes[c.Backend] = m
l = m
}
mutexMap.Unlock()
l.Lock()
defer l.Unlock()
return fn()
}, nil
}
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
if !c.Embeddings {
return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
}
modelFile := c.Model
llamaOpts := defaultLLamaOpts(c)
var inferenceModel interface{}
var err error
if c.Backend == "" {
inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
} else {
inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
}
if err != nil {
return nil, err
}
var fn func() ([]float32, error)
switch model := inferenceModel.(type) {
case *llama.LLama:
fn = func() ([]float32, error) {
predictOptions := buildLLamaPredictOptions(c, loader.ModelPath)
if len(tokens) > 0 {
return model.TokenEmbeddings(tokens, predictOptions...)
}
return model.Embeddings(s, predictOptions...)
}
// bert embeddings
case *bert.Bert:
fn = func() ([]float32, error) {
if len(tokens) > 0 {
return model.TokenEmbeddings(tokens, bert.SetThreads(c.Threads))
}
return model.Embeddings(s, bert.SetThreads(c.Threads))
}
default:
fn = func() ([]float32, error) {
return nil, fmt.Errorf("embeddings not supported by the backend")
}
}
return func() ([]float32, error) {
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
mutexMap.Lock()
l, ok := mutexes[modelFile]
if !ok {
m := &sync.Mutex{}
mutexes[modelFile] = m
l = m
}
mutexMap.Unlock()
l.Lock()
defer l.Unlock()
embeds, err := fn()
if err != nil {
return embeds, err
}
// Remove trailing 0s
for i := len(embeds) - 1; i >= 0; i-- {
if embeds[i] == 0.0 {
embeds = embeds[:i]
} else {
break
}
}
return embeds, nil
}, nil
}
func buildLLamaPredictOptions(c Config, modelPath string) []llama.PredictOption {
// Generate the prediction using the language model
predictOptions := []llama.PredictOption{
llama.SetTemperature(c.Temperature),
llama.SetTopP(c.TopP),
llama.SetTopK(c.TopK),
llama.SetTokens(c.Maxtokens),
llama.SetThreads(c.Threads),
}
if c.PromptCacheAll {
predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
}
if c.PromptCachePath != "" {
// Create parent directory
p := filepath.Join(modelPath, c.PromptCachePath)
os.MkdirAll(filepath.Dir(p), 0755)
predictOptions = append(predictOptions, llama.SetPathPromptCache(p))
}
if c.Mirostat != 0 {
predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
}
if c.MirostatETA != 0 {
predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA))
}
if c.MirostatTAU != 0 {
predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU))
}
if c.Debug {
predictOptions = append(predictOptions, llama.Debug)
}
predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
if c.RepeatPenalty != 0 {
predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
}
if c.Keep != 0 {
predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
}
if c.Batch != 0 {
predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
}
if c.F16 {
predictOptions = append(predictOptions, llama.EnableF16KV)
}
if c.IgnoreEOS {
predictOptions = append(predictOptions, llama.IgnoreEOS)
}
if c.Seed != 0 {
predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
}
return predictOptions
}
func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
supportStreams := false
modelFile := c.Model
llamaOpts := defaultLLamaOpts(c)
var inferenceModel interface{}
var err error
if c.Backend == "" {
inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
} else {
inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
}
if err != nil {
return nil, err
}
var fn func() (string, error)
switch model := inferenceModel.(type) {
case *rwkv.RwkvState:
supportStreams = true
fn = func() (string, error) {
stopWord := "\n"
if len(c.StopWords) > 0 {
stopWord = c.StopWords[0]
}
if err := model.ProcessInput(s); err != nil {
return "", err
}
response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
return response, nil
}
case *transformers.GPTNeoX:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *transformers.Replit:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *transformers.Starcoder:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *transformers.MPT:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *bloomz.Bloomz:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []bloomz.PredictOption{
bloomz.SetTemperature(c.Temperature),
bloomz.SetTopP(c.TopP),
bloomz.SetTopK(c.TopK),
bloomz.SetTokens(c.Maxtokens),
bloomz.SetThreads(c.Threads),
}
if c.Seed != 0 {
predictOptions = append(predictOptions, bloomz.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *transformers.GPTJ:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *transformers.Dolly:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *transformers.GPT2:
fn = func() (string, error) {
// Generate the prediction using the language model
predictOptions := []transformers.PredictOption{
transformers.SetTemperature(c.Temperature),
transformers.SetTopP(c.TopP),
transformers.SetTopK(c.TopK),
transformers.SetTokens(c.Maxtokens),
transformers.SetThreads(c.Threads),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
}
if c.Seed != 0 {
predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
}
return model.Predict(
s,
predictOptions...,
)
}
case *gpt4all.Model:
supportStreams = true
fn = func() (string, error) {
if tokenCallback != nil {
model.SetTokenCallback(tokenCallback)
}
// Generate the prediction using the language model
predictOptions := []gpt4all.PredictOption{
gpt4all.SetTemperature(c.Temperature),
gpt4all.SetTopP(c.TopP),
gpt4all.SetTopK(c.TopK),
gpt4all.SetTokens(c.Maxtokens),
}
if c.Batch != 0 {
predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch))
}
str, er := model.Predict(
s,
predictOptions...,
)
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
// after a stream event has occurred
model.SetTokenCallback(nil)
return str, er
}
case *llama.LLama:
supportStreams = true
fn = func() (string, error) {
if tokenCallback != nil {
model.SetTokenCallback(tokenCallback)
}
predictOptions := buildLLamaPredictOptions(c, loader.ModelPath)
str, er := model.Predict(
s,
predictOptions...,
)
// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
// after a stream event has occurred
model.SetTokenCallback(nil)
return str, er
}
}
return func() (string, error) {
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
mutexMap.Lock()
l, ok := mutexes[modelFile]
if !ok {
m := &sync.Mutex{}
mutexes[modelFile] = m
l = m
}
mutexMap.Unlock()
l.Lock()
defer l.Unlock()
res, err := fn()
if tokenCallback != nil && !supportStreams {
tokenCallback(res)
}
return res, err
}, nil
}
func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) {
result := []Choice{}
n := input.N
if input.N == 0 {
n = 1
}
// get the model function to call for the result
predFunc, err := ModelInference(predInput, loader, *config, tokenCallback)
if err != nil {
return result, err
}
for i := 0; i < n; i++ {
prediction, err := predFunc()
if err != nil {
return result, err
}
prediction = Finetune(*config, predInput, prediction)
cb(prediction, &result)
//result = append(result, Choice{Text: prediction})
}
return result, err
}
var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
var mu sync.Mutex = sync.Mutex{}
func Finetune(config Config, input, prediction string) string {
if config.Echo {
prediction = input + prediction
}
for _, c := range config.Cutstrings {
mu.Lock()
reg, ok := cutstrings[c]
if !ok {
cutstrings[c] = regexp.MustCompile(c)
reg = cutstrings[c]
}
mu.Unlock()
prediction = reg.ReplaceAllString(prediction, "")
}
for _, c := range config.TrimSpace {
prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
}
return prediction
}

View File

@@ -0,0 +1,6 @@
apiVersion: v2
appVersion: 0.1.0
description: A Helm chart for LocalAI
name: local-ai
type: application
version: 1.0.0

View File

@@ -0,0 +1,44 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "local-ai.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "local-ai.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "local-ai.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "local-ai.labels" -}}
helm.sh/chart: {{ include "local-ai.chart" . }}
app.kubernetes.io/name: {{ include "local-ai.name" . }}
app.kubernetes.io/instance: "{{ .Release.Name }}"
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,39 @@
{{- if .Values.dataVolume.enabled }}
apiVersion: cdi.kubevirt.io/v1beta1
kind: DataVolume
metadata:
name: {{ template "local-ai.fullname" . }}
namespace: {{ .Release.Namespace | quote }}
labels:
{{- include "local-ai.labels" . | nindent 4 }}
spec:
contentType: archive
source:
{{ .Values.dataVolume.source.type }}:
url: {{ .Values.dataVolume.source.url }}
secretRef: {{ template "local-ai.fullname" . }}
{{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
{{- end }}
{{- if .Values.dataVolume.source.caCertConfigMap }}
caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
{{- end }}
pvc:
accessModes: {{ .Values.dataVolume.pvc.accessModes }}
resources:
requests:
storage: {{ .Values.dataVolume.pvc.size }}
---
{{- if .Values.dataVolume.secret.enabled }}
apiVersion: v1
kind: Secret
metadata:
name: {{ template "local-ai.fullname" . }}
namespace: {{ .Release.Namespace | quote }}
labels:
{{- include "local-ai.labels" . | nindent 4 }}
data:
accessKeyId: {{ .Values.dataVolume.secret.username }}
secretKey: {{ .Values.dataVolume.secret.password }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,39 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ template "local-ai.fullname" . }}
namespace: {{ .Release.Namespace | quote }}
labels:
{{- include "local-ai.labels" . | nindent 4 }}
spec:
selector:
matchLabels:
app.kubernetes.io/name: {{ include "local-ai.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
replicas: 1
template:
metadata:
name: {{ template "local-ai.fullname" . }}
labels:
app.kubernetes.io/name: {{ include "local-ai.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
spec:
containers:
- name: {{ template "local-ai.fullname" . }}
image: {{ .Values.deployment.image }}
env:
- name: THREADS
value: {{ .Values.deployment.env.threads | quote }}
- name: CONTEXT_SIZE
value: {{ .Values.deployment.env.contextSize | quote }}
- name: MODELS_PATH
value: {{ .Values.deployment.env.modelsPath }}
{{- if .Values.deployment.volume.enabled }}
volumeMounts:
- mountPath: {{ .Values.deployment.env.modelsPath }}
name: models
volumes:
- name: models
persistentVolumeClaim:
claimName: {{ template "local-ai.fullname" . }}
{{- end }}

View File

@@ -0,0 +1,19 @@
apiVersion: v1
kind: Service
metadata:
name: {{ template "local-ai.fullname" . }}
namespace: {{ .Release.Namespace | quote }}
labels:
{{- include "local-ai.labels" . | nindent 4 }}
{{- if .Values.service.annotations }}
annotations:
{{ toYaml .Values.service.annotations | indent 4 }}
{{- end }}
spec:
selector:
app.kubernetes.io/name: {{ include "local-ai.name" . }}
type: "{{ .Values.service.type }}"
ports:
- protocol: TCP
port: 8080
targetPort: 8080

View File

@@ -0,0 +1,38 @@
deployment:
image: quay.io/go-skynet/local-ai:latest
env:
threads: 14
contextSize: 512
modelsPath: "/models"
volume:
enabled: false
service:
type: ClusterIP
annotations: {}
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
# Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
# (requires https://github.com/kubevirt/containerized-data-importer)
dataVolume:
enabled: false
source:
type: "http" # Source type. One of: [ http | s3 ]
url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
# CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
# and a base64 encoded pem certificate
caCertConfigMap: ""
# SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
# that may include sensitive information. Only applicable for the http source type.
secretExtraHeaders: []
pvc:
accessModes:
- ReadWriteOnce
size: 5Gi
secret:
enabled: false
username: "" # base64 encoded
password: "" # base64 encoded

View File

@@ -12,4 +12,4 @@ services:
- .env
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]
command: ["/usr/bin/local-ai" ]

View File

@@ -1,11 +0,0 @@
#!/bin/bash
set -e
cd /build
if [ "$REBUILD" != "false" ]; then
rm -rf ./local-ai
make build
fi
./local-ai "$@"

View File

@@ -2,108 +2,10 @@
Here is a list of projects that can easily be integrated with the LocalAI backend.
### Projects
## Projects
### AutoGPT
_by [@mudler](https://github.com/mudler)_
This example shows how to use AutoGPT with LocalAI.
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/autoGPT/)
### Chatbot-UI
_by [@mkellerman](https://github.com/mkellerman)_
![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
This integration shows how to use LocalAI with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/)
### Discord bot
_by [@mudler](https://github.com/mudler)_
Run a discord bot which lets you talk directly with a model
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/discord-bot/), or for a live demo you can talk with our bot in #random-bot in our discord server.
### Langchain
_by [@dave-gray101](https://github.com/dave-gray101)_
A ready to use example to show e2e how to integrate LocalAI with langchain
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain/)
### Langchain Python
_by [@mudler](https://github.com/mudler)_
A ready to use example to show e2e how to integrate LocalAI with langchain
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/)
### LocalAI WebUI
_by [@dhruvgera](https://github.com/dhruvgera)_
![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
A light, community-maintained web interface for LocalAI
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/localai-webui/)
### How to run rwkv models
_by [@mudler](https://github.com/mudler)_
A full example on how to run RWKV models with LocalAI
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv/)
### PrivateGPT
_by [@mudler](https://github.com/mudler)_
A full example on how to run PrivateGPT with LocalAI
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/privateGPT/)
### Slack bot
_by [@mudler](https://github.com/mudler)_
Run a slack bot which lets you talk directly with a model
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/)
### Question answering on documents with llama-index
_by [@mudler](https://github.com/mudler)_
Shows how to integrate with [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/)
### Question answering on documents with langchain and chroma
_by [@mudler](https://github.com/mudler)_
Shows how to integrate with `Langchain` and `Chroma` to enable question answering on a set of documents.
[Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-chroma/)
### Template for Runpod.io
_by [@fHachenberg](https://github.com/fHachenberg)_
Allows to run any LocalAI-compatible model as a backend on the servers of https://runpod.io
[Check it out here](https://runpod.io/gsc?template=uv9mtqnrd0&ref=984wlcra)
- [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/) (by [@mkellerman](https://github.com/mkellerman))
## Want to contribute?
Create an issue, and put `Example: <description>` in the title! We will post your examples here.
Create an issue, and put `Example: <description>` in the title! We will post your examples here.

View File

@@ -1,5 +0,0 @@
OPENAI_API_KEY=sk---anystringhere
OPENAI_API_BASE=http://api:8080/v1
# Models to preload at start
# Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, { "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002"}]

View File

@@ -1,32 +0,0 @@
# AutoGPT
Example of integration with [AutoGPT](https://github.com/Significant-Gravitas/Auto-GPT).
## Run
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/autoGPT
docker-compose run --rm auto-gpt
```
Note: The example automatically downloads the `gpt4all` model as it is under a permissive license. The GPT4All model does not seem to be enough to run AutoGPT. WizardLM-7b-uncensored seems to perform better (with `f16: true`).
See the `.env` configuration file to set a different model with the [model-gallery](https://github.com/go-skynet/model-gallery) by editing `PRELOAD_MODELS`.
## Without docker
Run AutoGPT with `OPENAI_API_BASE` pointing to the LocalAI endpoint. If you run it locally for instance:
```
OPENAI_API_BASE=http://localhost:8080 python ...
```
Note: you need a model named `gpt-3.5-turbo` and `text-embedding-ada-002`. You can preload those in LocalAI at start by setting in the env:
```
PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}, { "url": "github:go-skynet/model-gallery/bert-embeddings.yaml", "name": "text-embedding-ada-002"}]
```

View File

@@ -1,42 +0,0 @@
version: "3.9"
services:
api:
image: quay.io/go-skynet/local-ai:latest
ports:
- 8080:8080
env_file:
- .env
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]
auto-gpt:
image: significantgravitas/auto-gpt
depends_on:
api:
condition: service_healthy
redis:
condition: service_started
env_file:
- .env
environment:
MEMORY_BACKEND: ${MEMORY_BACKEND:-redis}
REDIS_HOST: ${REDIS_HOST:-redis}
profiles: ["exclude-from-up"]
volumes:
- ./auto_gpt_workspace:/app/autogpt/auto_gpt_workspace
- ./data:/app/data
## allow auto-gpt to write logs to disk
- ./logs:/app/logs
## uncomment following lines if you want to make use of these files
## you must have them existing in the same folder as this docker-compose.yml
#- type: bind
# source: ./azure.yaml
# target: /app/azure.yaml
#- type: bind
# source: ./ai_settings.yaml
# target: /app/ai_settings.yaml
redis:
image: "redis/redis-stack-server:latest"

View File

@@ -1,48 +1,18 @@
# chatbot-ui
Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
## Setup
- Set the `services > api > volumes` parameter in the docker-compose.yaml file. This should point to your local folder with your models.
- Copy/Rename your model file to `gpt-3.5-turbo` (without any .bin file extension).
- Type `docker compose up` to run the api and the Web UI.
- Open http://localhost:3000 for the Web UI.
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/chatbot-ui
## Known issues
- Can't select the model from the UI. Seems hardcoded to `gpt-3.5-turbo`.
- If your machine is slow, the UI will timeout on the request to the API.
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose
docker-compose up -d --pull always
# or you can build the images with:
# docker-compose up -d --build
```
## Pointing chatbot-ui to a separately managed LocalAI service
If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
```
version: '3.6'
services:
chatgpt:
image: ghcr.io/mckaywrigley/chatbot-ui:main
ports:
- 3000:3000
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
```
Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`.
## Accessing chatbot-ui
Open http://localhost:3000 for the Web UI.
### Links
- [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui)

View File

@@ -3,12 +3,11 @@ version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile
ports:
- 8080:8080
environment:
- THREADS=4
- CONTEXT_SIZE=512
- DEBUG=true
- MODELS_PATH=/models
volumes:
@@ -21,4 +20,4 @@ services:
- 3000:3000
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_HOST=http://api:8080'
- 'OPENAI_API_HOST=http://host.docker.internal:8080'

View File

@@ -1 +0,0 @@
{{.Input}}

View File

@@ -1,16 +0,0 @@
name: gpt-3.5-turbo
parameters:
model: ggml-gpt4all-j
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
stopwords:
- "HUMAN:"
- "GPT:"
roles:
user: " "
system: " "
template:
completion: completion
chat: gpt4all

View File

@@ -1,4 +0,0 @@
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
### Prompt:
{{.Input}}
### Response:

View File

@@ -1,6 +0,0 @@
OPENAI_API_KEY=x
DISCORD_BOT_TOKEN=x
DISCORD_CLIENT_ID=x
OPENAI_API_BASE=http://api:8080
ALLOWED_SERVER_IDS=x
SERVER_TO_MODERATION_CHANNEL=1:1

View File

@@ -1,76 +0,0 @@
# discord-bot
![Screenshot from 2023-05-01 07-58-19](https://user-images.githubusercontent.com/2420543/235413924-0cb2e75b-f2d6-4119-8610-44386e44afb8.png)
## Setup
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/discord-bot
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# Set the discord bot options (see: https://github.com/go-skynet/gpt-discord-bot#setup)
cp -rfv .env.example .env
vim .env
# start with docker-compose
docker-compose up -d --build
```
Note: see setup options here: https://github.com/go-skynet/gpt-discord-bot#setup
Open up the URL in the console and give permission to the bot in your server. Start a thread with `/chat ..`
## Kubernetes
- install the local-ai chart first
- change OPENAI_API_BASE to point to the API address and apply the discord-bot manifest:
```yaml
apiVersion: v1
kind: Namespace
metadata:
name: discord-bot
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: localai
namespace: discord-bot
labels:
app: localai
spec:
selector:
matchLabels:
app: localai
replicas: 1
template:
metadata:
labels:
app: localai
name: localai
spec:
containers:
- name: localai-discord
env:
- name: OPENAI_API_KEY
value: "x"
- name: DISCORD_BOT_TOKEN
value: ""
- name: DISCORD_CLIENT_ID
value: ""
- name: OPENAI_API_BASE
value: "http://local-ai.default.svc.cluster.local:8080"
- name: ALLOWED_SERVER_IDS
value: "xx"
- name: SERVER_TO_MODERATION_CHANNEL
value: "1:1"
image: quay.io/go-skynet/gpt-discord-bot:main
```

View File

@@ -1,21 +0,0 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]
bot:
image: quay.io/go-skynet/gpt-discord-bot:main
env_file:
- .env

View File

@@ -1 +0,0 @@
../chatbot-ui/models/

View File

@@ -1,5 +0,0 @@
THREADS=4
CONTEXT_SIZE=512
MODELS_PATH=/models
DEBUG=true
# BUILD_TYPE=generic

View File

@@ -1,4 +0,0 @@
db/
state_of_the_union.txt
models/bert
models/ggml-gpt4all-j

View File

@@ -1,63 +0,0 @@
# Data query example
This example makes use of [langchain and chroma](https://blog.langchain.dev/langchain-chroma/) to enable question answering on a set of documents.
## Setup
Download the models and start the API:
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/langchain-chroma
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# configure your .env
# NOTE: ensure that THREADS does not exceed your machine's CPU cores
mv .env.example .env
# start with docker-compose
docker-compose up -d --build
# tail the logs & wait until the build completes
docker logs -f langchain-chroma-api-1
```
### Python requirements
```
pip install -r requirements.txt
```
### Create a storage
In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
Note: **OPENAI_API_KEY** is not required. However the library might fail if no API_KEY is passed by, so an arbitrary string can be used.
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
wget https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt
python store.py
```
After it finishes, a directory "db" will be created with the vector index database.
## Query
We can now query the dataset.
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
python query.py
# President Trump recently stated during a press conference regarding tax reform legislation that "we're getting rid of all these loopholes." He also mentioned that he wants to simplify the system further through changes such as increasing the standard deduction amount and making other adjustments aimed at reducing taxpayers' overall burden.
```
Keep in mind now things are hit or miss!

View File

@@ -1,15 +0,0 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile
ports:
- 8080:8080
env_file:
- ../../.env
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai"]

View File

@@ -1 +0,0 @@
{{.Input}}

View File

@@ -1,6 +0,0 @@
name: text-embedding-ada-002
parameters:
model: bert
threads: 4
backend: bert-embeddings
embeddings: true

View File

@@ -1,16 +0,0 @@
name: gpt-3.5-turbo
parameters:
model: ggml-gpt4all-j
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
stopwords:
- "HUMAN:"
- "GPT:"
roles:
user: " "
system: " "
template:
completion: completion
chat: gpt4all

View File

@@ -1,4 +0,0 @@
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
### Prompt:
{{.Input}}
### Response:

View File

@@ -1,23 +0,0 @@
import os
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores.base import VectorStoreRetriever
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
# Load and process the text
embedding = OpenAIEmbeddings()
persist_directory = 'db'
# Now we can load the persisted database from disk, and use it as normal.
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path)
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)
retriever = VectorStoreRetriever(vectorstore=vectordb)
qa = RetrievalQA.from_llm(llm=llm, retriever=retriever)
query = "What the president said about taxes ?"
print(qa.run(query))

View File

@@ -1,4 +0,0 @@
langchain==0.0.160
openai==0.27.6
chromadb==0.3.21
llama-index==0.6.2

View File

@@ -1,25 +0,0 @@
import os
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
# Load and process the text
loader = TextLoader('state_of_the_union.txt')
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=70)
texts = text_splitter.split_documents(documents)
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'
embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory)
vectordb.persist()
vectordb = None

View File

@@ -1,36 +0,0 @@
## Langchain-python
Langchain example from [quickstart](https://python.langchain.com/en/latest/getting_started/getting_started.html).
To interact with langchain, you can just set the `OPENAI_API_BASE` URL and provide a token with a random string.
See the example below:
```
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/langchain-python
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose
docker-compose up -d --build
pip install langchain
pip install openai
export OPENAI_API_BASE=http://localhost:8080
# Note: **OPENAI_API_KEY** is not required. However the library might fail if no API_KEY is passed by, so an arbitrary string can be used.
export OPENAI_API_KEY=sk-
python test.py
# A good company name for a company that makes colorful socks would be "Colorsocks".
python agent.py
```

View File

@@ -1,44 +0,0 @@
## This is a fork/based from https://gist.github.com/wiseman/4a706428eaabf4af1002a07a114f61d6
from io import StringIO
import sys
import os
from typing import Dict, Optional
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents.tools import Tool
from langchain.llms import OpenAI
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
class PythonREPL:
"""Simulates a standalone Python REPL."""
def __init__(self):
pass
def run(self, command: str) -> str:
"""Run command and returns anything printed."""
old_stdout = sys.stdout
sys.stdout = mystdout = StringIO()
try:
exec(command, globals())
sys.stdout = old_stdout
output = mystdout.getvalue()
except Exception as e:
sys.stdout = old_stdout
output = str(e)
return output
llm = OpenAI(temperature=0.0, openai_api_base=base_path, model_name=model_name)
python_repl = Tool(
"Python REPL",
PythonREPL().run,
"""A Python shell. Use this to execute python commands. Input should be a valid python command.
If you expect output it should be printed out.""",
)
tools = [python_repl]
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
agent.run("What is the 10th fibonacci number?")

View File

@@ -1,16 +0,0 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]

View File

@@ -1 +0,0 @@
../chatbot-ui/models

View File

@@ -1,6 +0,0 @@
from langchain.llms import OpenAI
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
text = "What would be a good company name for a company that makes colorful socks?"
print(llm(text))

View File

@@ -1,2 +0,0 @@
models/ggml-koala-13B-4bit-128g
models/ggml-gpt4all-j

View File

@@ -1,6 +0,0 @@
FROM node:latest
COPY ./langchainjs-localai-example /app
WORKDIR /app
RUN npm install
RUN npm run build
ENTRYPOINT [ "npm", "run", "start" ]

View File

@@ -1,5 +0,0 @@
FROM python:3.10-bullseye
COPY ./langchainpy-localai-example /app
WORKDIR /app
RUN pip install --no-cache-dir -r requirements.txt
ENTRYPOINT [ "python", "./full_demo.py" ];

View File

@@ -1,30 +0,0 @@
# langchain
Example of using langchain, with the standard OpenAI llm module, and LocalAI. Has docker compose profiles for both the Typescript and Python versions.
**Please Note** - This is a tech demo example at this time. ggml-gpt4all-j has pretty terrible results for most langchain applications with the settings used in this example.
## Setup
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/langchain
# (optional) - Edit the example code in typescript.
# vi ./langchainjs-localai-example/index.ts
# Download gpt4all-j to models/
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose for typescript!
docker-compose --profile ts up --build
# or start with docker-compose for python!
docker-compose --profile py up --build
```
## Copyright
Some of the example code in index.mts and full_demo.py is adapted from the langchainjs project and is Copyright (c) Harrison Chase. Used under the terms of the MIT license, as is the remainder of this code.

View File

@@ -1,43 +0,0 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile
ports:
- 8080:8080
environment:
- DEBUG=true
- MODELS_PATH=/models
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai" ]
js:
build:
context: .
dockerfile: JS.Dockerfile
profiles:
- js
- ts
depends_on:
- "api"
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_BASE=http://api:8080/v1'
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'
py:
build:
context: .
dockerfile: PY.Dockerfile
profiles:
- py
depends_on:
- "api"
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_BASE=http://api:8080/v1'
- 'MODEL_NAME=gpt-3.5-turbo' #gpt-3.5-turbo' # ggml-gpt4all-j' # ggml-koala-13B-4bit-128g'

View File

@@ -1,2 +0,0 @@
node_modules/
dist/

View File

@@ -1,20 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Launch Program",
// "skipFiles": [
// "<node_internals>/**"
// ],
"program": "${workspaceFolder}\\dist\\index.mjs",
"outFiles": [
"${workspaceFolder}/**/*.js"
]
}
]
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,21 +0,0 @@
{
"name": "langchainjs-localai-example",
"version": "0.1.0",
"description": "Trivial Example of using langchain + the OpenAI API + LocalAI together",
"main": "index.mjs",
"scripts": {
"build": "tsc --build",
"clean": "tsc --build --clean",
"start": "node --trace-warnings dist/index.mjs"
},
"author": "dave@gray101.com",
"license": "MIT",
"devDependencies": {
"@types/node": "^18.16.4",
"typescript": "^5.0.4"
},
"dependencies": {
"langchain": "^0.0.67",
"typeorm": "^0.3.15"
}
}

View File

@@ -1,79 +0,0 @@
import { OpenAIChat } from "langchain/llms/openai";
import { loadQAStuffChain } from "langchain/chains";
import { Document } from "langchain/document";
import { initializeAgentExecutorWithOptions } from "langchain/agents";
import {Calculator} from "langchain/tools/calculator";
const pathToLocalAi = process.env['OPENAI_API_BASE'] || 'http://api:8080/v1';
const fakeApiKey = process.env['OPENAI_API_KEY'] || '-';
const modelName = process.env['MODEL_NAME'] || 'gpt-3.5-turbo';
function getModel(): OpenAIChat {
return new OpenAIChat({
prefixMessages: [
{
role: "system",
content: "You are a helpful assistant that answers in pirate language",
},
],
modelName: modelName,
maxTokens: 50,
openAIApiKey: fakeApiKey,
maxRetries: 2
}, {
basePath: pathToLocalAi,
apiKey: fakeApiKey,
});
}
// Minimal example.
export const run = async () => {
const model = getModel();
console.log(`about to model.call at ${new Date().toUTCString()}`);
const res = await model.call(
"What would be a good company name a company that makes colorful socks?"
);
console.log(`${new Date().toUTCString()}`);
console.log({ res });
};
await run();
// This example uses the `StuffDocumentsChain`
export const run2 = async () => {
const model = getModel();
const chainA = loadQAStuffChain(model);
const docs = [
new Document({ pageContent: "Harrison went to Harvard." }),
new Document({ pageContent: "Ankush went to Princeton." }),
];
const resA = await chainA.call({
input_documents: docs,
question: "Where did Harrison go to college?",
});
console.log({ resA });
};
await run2();
// Quickly thrown together example of using tools + agents.
// This seems like it should work, but it doesn't yet.
export const temporarilyBrokenToolTest = async () => {
const model = getModel();
const executor = await initializeAgentExecutorWithOptions([new Calculator(true)], model, {
agentType: "zero-shot-react-description",
});
console.log("Loaded agent.");
const input = `What is the value of (500 *2) + 350 - 13?`;
console.log(`Executing with input "${input}"...`);
const result = await executor.call({ input });
console.log(`Got output ${result.output}`);
}
await temporarilyBrokenToolTest();

View File

@@ -1,15 +0,0 @@
{
"compilerOptions": {
"target": "es2022",
"lib": ["ES2022", "DOM"],
"module": "ES2022",
"moduleResolution": "node",
"strict": true,
"esModuleInterop": true,
"allowSyntheticDefaultImports": true,
"isolatedModules": true,
"outDir": "./dist"
},
"include": ["src", "test"],
"exclude": ["node_modules", "dist"]
}

View File

@@ -1,24 +0,0 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"redirectOutput": true,
"justMyCode": false
},
{
"name": "Python: Attach to Port 5678",
"type": "python",
"request": "attach",
"connect": {
"host": "localhost",
"port": 5678
},
"justMyCode": false
}
]
}

View File

@@ -1,3 +0,0 @@
{
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python"
}

View File

@@ -1,46 +0,0 @@
import os
import logging
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
AIMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
# This logging incantation makes it easy to see that you're actually reaching your LocalAI instance rather than OpenAI.
logging.basicConfig(level=logging.DEBUG)
print('Langchain + LocalAI PYTHON Tests')
base_path = os.environ.get('OPENAI_API_BASE', 'http://api:8080/v1')
key = os.environ.get('OPENAI_API_KEY', '-')
model_name = os.environ.get('MODEL_NAME', 'gpt-3.5-turbo')
chat = ChatOpenAI(temperature=0, openai_api_base=base_path, openai_api_key=key, model_name=model_name, max_tokens=100)
print("Created ChatOpenAI for ", chat.model_name)
template = "You are a helpful assistant that translates {input_language} to {output_language}. The next message will be a sentence in {input_language}. Respond ONLY with the translation in {output_language}. Do not respond in {input_language}!"
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])
print("ABOUT to execute")
# get a chat completion from the formatted messages
response = chat(chat_prompt.format_prompt(input_language="English", output_language="French", text="I love programming.").to_messages())
print(response)
print(".");

View File

@@ -1,32 +0,0 @@
aiohttp==3.8.4
aiosignal==1.3.1
async-timeout==4.0.2
attrs==23.1.0
certifi==2022.12.7
charset-normalizer==3.1.0
colorama==0.4.6
dataclasses-json==0.5.7
debugpy==1.6.7
frozenlist==1.3.3
greenlet==2.0.2
idna==3.4
langchain==0.0.159
marshmallow==3.19.0
marshmallow-enum==1.5.1
multidict==6.0.4
mypy-extensions==1.0.0
numexpr==2.8.4
numpy==1.24.3
openai==0.27.6
openapi-schema-pydantic==1.2.4
packaging==23.1
pydantic==1.10.7
PyYAML==6.0
requests==2.29.0
SQLAlchemy==2.0.12
tenacity==8.2.2
tqdm==4.65.0
typing-inspect==0.8.0
typing_extensions==4.5.0
urllib3==1.26.15
yarl==1.9.2

View File

@@ -1,6 +0,0 @@
from langchain.llms import OpenAI
llm = OpenAI(temperature=0.9,model_name="gpt-3.5-turbo")
text = "What would be a good company name for a company that makes colorful socks?"
print(llm(text))

View File

@@ -1 +0,0 @@
{{.Input}}

View File

@@ -1,17 +0,0 @@
name: gpt-3.5-turbo
parameters:
model: ggml-gpt4all-j # ggml-koala-13B-4bit-128g
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
stopwords:
- "HUMAN:"
- "GPT:"
roles:
user: " "
system: " "
backend: "gptj"
template:
completion: completion
chat: gpt4all

View File

@@ -1,4 +0,0 @@
The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
### Prompt:
{{.Input}}
### Response:

View File

@@ -1,26 +0,0 @@
# localai-webui
Example of integration with [dhruvgera/localai-frontend](https://github.com/Dhruvgera/LocalAI-frontend).
![image](https://user-images.githubusercontent.com/42107491/235344183-44b5967d-ba22-4331-804c-8da7004a5d35.png)
## Setup
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/localai-webui
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# Download any desired models to models/ in the parent LocalAI project dir
# For example: wget https://gpt4all.io/models/ggml-gpt4all-j.bin
# start with docker-compose
docker-compose up -d --build
```
Open http://localhost:3000 for the Web UI.

View File

@@ -1,20 +0,0 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: .
dockerfile: Dockerfile
ports:
- 8080:8080
env_file:
- .env
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai"]
frontend:
image: quay.io/go-skynet/localai-frontend:master
ports:
- 3000:3000

View File

@@ -1,25 +0,0 @@
# privateGPT
This example is a re-adaptation of https://github.com/imartinez/privateGPT to work with LocalAI and OpenAI endpoints. We have a fork with the changes required to work with privateGPT here https://github.com/go-skynet/privateGPT ( PR: https://github.com/imartinez/privateGPT/pull/408 ).
Follow the instructions in https://github.com/go-skynet/privateGPT:
```bash
git clone git@github.com:go-skynet/privateGPT.git
cd privateGPT
pip install -r requirements.txt
```
Rename `example.env` to `.env` and edit the variables appropriately.
This is an example `.env` file for LocalAI:
```
PERSIST_DIRECTORY=db
# Set to OpenAI here
MODEL_TYPE=OpenAI
EMBEDDINGS_MODEL_NAME=all-MiniLM-L6-v2
MODEL_N_CTX=1000
# LocalAI URL
OPENAI_API_BASE=http://localhost:8080/v1
```

View File

@@ -1 +0,0 @@
storage/

View File

@@ -1,69 +0,0 @@
# Data query example
This example makes use of [Llama-Index](https://gpt-index.readthedocs.io/en/stable/getting_started/installation.html) to enable question answering on a set of documents.
It loosely follows [the quickstart](https://gpt-index.readthedocs.io/en/stable/guides/primer/usage_pattern.html).
Summary of the steps:
- prepare the dataset (and store it into `data`)
- prepare a vector index database to run queries on
- run queries
## Requirements
You will need a training data set. Copy that over `data`.
## Setup
Start the API:
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI/examples/query_data
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
# start with docker-compose
docker-compose up -d --build
```
### Create a storage
In this step we will create a local vector database from our document set, so later we can ask questions on it with the LLM.
Note: **OPENAI_API_KEY** is not required. However the library might fail if no API_KEY is passed by, so an arbitrary string can be used.
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
python store.py
```
After it finishes, a directory "storage" will be created with the vector index database.
## Query
We can now query the dataset.
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
python query.py
```
## Update
To update our vector database, run `update.py`
```bash
export OPENAI_API_BASE=http://localhost:8080/v1
export OPENAI_API_KEY=sk-
python update.py
```

View File

View File

@@ -1,15 +0,0 @@
version: '3.6'
services:
api:
image: quay.io/go-skynet/local-ai:latest
build:
context: ../../
dockerfile: Dockerfile
ports:
- 8080:8080
env_file:
- .env
volumes:
- ./models:/models:cached
command: ["/usr/bin/local-ai"]

View File

@@ -1 +0,0 @@
{{.Input}}

View File

@@ -1,6 +0,0 @@
name: text-embedding-ada-002
parameters:
model: bert
threads: 14
backend: bert-embeddings
embeddings: true

View File

@@ -1,16 +0,0 @@
name: gpt-3.5-turbo
parameters:
model: ggml-gpt4all-j
top_k: 80
temperature: 0.2
top_p: 0.7
context_size: 1024
stopwords:
- "HUMAN:"
- "GPT:"
roles:
user: " "
system: " "
template:
completion: completion
chat: gpt4all

View File

@@ -1,35 +0,0 @@
import os
# Uncomment to specify your OpenAI API key here (local testing only, not in production!), or add corresponding environment variable (recommended)
# os.environ['OPENAI_API_KEY']= ""
from llama_index import LLMPredictor, PromptHelper, ServiceContext
from langchain.llms.openai import OpenAI
from llama_index import StorageContext, load_index_from_storage
base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
# This example uses text-davinci-003 by default; feel free to change if desired
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
# Configure prompt parameters and initialise helper
max_input_size = 500
num_output = 256
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
# Load documents from the 'data' directory
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir='./storage')
# load index
index = load_index_from_storage(storage_context, service_context=service_context, )
query_engine = index.as_query_engine()
data = input("Question: ")
response = query_engine.query(data)
print(response)

Some files were not shown because too many files have changed in this diff Show More