mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 19:22:39 -05:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
574fa67bdc | ||
|
|
e19d7226f8 | ||
|
|
0843fe6c65 | ||
|
|
62a02cd1fe | ||
|
|
949da7792d | ||
|
|
ce724a7e55 | ||
|
|
0a06c80801 | ||
|
|
edc55ade61 | ||
|
|
09e5d9007b | ||
|
|
db926896bd | ||
|
|
ab7b4d5ee9 |
86
.github/workflows/image-pr.yml
vendored
Normal file
86
.github/workflows/image-pr.yml
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
---
|
||||
name: 'build container images tests'
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
|
||||
concurrency:
|
||||
group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
extras-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
ffmpeg: ${{ matrix.ffmpeg }}
|
||||
image-type: ${{ matrix.image-type }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
# Pushing with all jobs in parallel
|
||||
# eats the bandwidth of all the nodes
|
||||
max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
|
||||
matrix:
|
||||
include:
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'extras'
|
||||
runs-on: 'arc-runner-set'
|
||||
core-image-build:
|
||||
uses: ./.github/workflows/image_build.yml
|
||||
with:
|
||||
tag-latest: ${{ matrix.tag-latest }}
|
||||
tag-suffix: ${{ matrix.tag-suffix }}
|
||||
ffmpeg: ${{ matrix.ffmpeg }}
|
||||
image-type: ${{ matrix.image-type }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
cuda-major-version: ${{ matrix.cuda-major-version }}
|
||||
cuda-minor-version: ${{ matrix.cuda-minor-version }}
|
||||
platforms: ${{ matrix.platforms }}
|
||||
runs-on: ${{ matrix.runs-on }}
|
||||
secrets:
|
||||
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
- build-type: 'cublas'
|
||||
cuda-major-version: "12"
|
||||
cuda-minor-version: "1"
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
tag-suffix: '-cublas-cuda12-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
image-type: 'core'
|
||||
runs-on: 'ubuntu-latest'
|
||||
1
.github/workflows/image.yml
vendored
1
.github/workflows/image.yml
vendored
@@ -2,7 +2,6 @@
|
||||
name: 'build container images'
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
18
.github/workflows/release.yaml
vendored
18
.github/workflows/release.yaml
vendored
@@ -34,10 +34,22 @@ jobs:
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: grpc
|
||||
key: ${{ runner.os }}-grpc
|
||||
- name: Build grpc
|
||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make -j12 install
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make -j12
|
||||
- name: Install gRPC
|
||||
run: |
|
||||
cd grpc && cd cmake/build && sudo make -j12 install
|
||||
|
||||
- name: Build
|
||||
id: build
|
||||
|
||||
19
.github/workflows/test.yml
vendored
19
.github/workflows/test.yml
vendored
@@ -86,11 +86,22 @@ jobs:
|
||||
sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
|
||||
# Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
|
||||
GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: grpc
|
||||
key: ${{ runner.os }}-grpc
|
||||
- name: Build grpc
|
||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make -j12 install
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make -j12
|
||||
- name: Install gRPC
|
||||
run: |
|
||||
cd grpc && cd cmake/build && sudo make -j12 install
|
||||
- name: Test
|
||||
run: |
|
||||
GO_TAGS="stablediffusion tts" make test
|
||||
|
||||
@@ -15,7 +15,6 @@ ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh"
|
||||
|
||||
ENV GALLERIES='[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
|
||||
ARG GO_TAGS="stablediffusion tinydream tts"
|
||||
|
||||
RUN apt-get update && \
|
||||
@@ -64,12 +63,12 @@ RUN curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmo
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list && \
|
||||
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y conda
|
||||
apt-get install -y conda && apt-get clean
|
||||
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN pip install --upgrade pip
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
RUN apt-get install -y espeak-ng espeak
|
||||
RUN apt-get install -y espeak-ng espeak && apt-get clean
|
||||
|
||||
###################################
|
||||
###################################
|
||||
@@ -127,10 +126,11 @@ ARG CUDA_MAJOR_VERSION=11
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV PIP_CACHE_PURGE=true
|
||||
|
||||
# Add FFmpeg
|
||||
RUN if [ "${FFMPEG}" = "true" ]; then \
|
||||
apt-get install -y ffmpeg \
|
||||
apt-get install -y ffmpeg && apt-get clean \
|
||||
; fi
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
2
Makefile
2
Makefile
@@ -8,7 +8,7 @@ GOLLAMA_VERSION?=aeba71ee842819da681ea537e78846dc75949ac0
|
||||
|
||||
GOLLAMA_STABLE_VERSION?=50cee7712066d9e38306eccadcfbb44ea87df4b7
|
||||
|
||||
CPPLLAMA_VERSION?=cb1e2818e0e12ec99f7236ec5d4f3ffd8bcc2f4a
|
||||
CPPLLAMA_VERSION?=226460cc0d5b185bc6685fb76f418fd9418d7add
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
|
||||
@@ -20,6 +20,9 @@
|
||||
</a>
|
||||
</p>
|
||||
|
||||
[<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker">](https://hub.docker.com/r/localai/localai)
|
||||
[<img src="https://img.shields.io/badge/quay.io-images-important.svg?">](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest)
|
||||
|
||||
> :bulb: Get help - [❓FAQ](https://localai.io/faq/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [:speech_balloon: Discord](https://discord.gg/uJAeKSAGDy) [:book: Documentation website](https://localai.io/)
|
||||
>
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
23
api/api.go
23
api/api.go
@@ -5,7 +5,6 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
config "github.com/go-skynet/LocalAI/api/config"
|
||||
@@ -17,7 +16,7 @@ import (
|
||||
"github.com/go-skynet/LocalAI/metrics"
|
||||
"github.com/go-skynet/LocalAI/pkg/assets"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/go-skynet/LocalAI/pkg/startup"
|
||||
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
@@ -38,25 +37,7 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
|
||||
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath)
|
||||
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
|
||||
|
||||
modelPath := options.Loader.ModelPath
|
||||
if len(options.ModelsURL) > 0 {
|
||||
for _, url := range options.ModelsURL {
|
||||
if utils.LooksLikeURL(url) {
|
||||
// md5 of model name
|
||||
md5Name := utils.MD5(url)
|
||||
|
||||
// check if file exists
|
||||
if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
|
||||
err := utils.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) {
|
||||
utils.DisplayDownloadFunction(fileName, current, total, percent)
|
||||
})
|
||||
if err != nil {
|
||||
log.Error().Msgf("error loading model: %s", err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
startup.PreloadModelsConfigurations(options.Loader.ModelPath, options.ModelsURL...)
|
||||
|
||||
cl := config.NewConfigLoader()
|
||||
if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil {
|
||||
|
||||
@@ -16,9 +16,9 @@ import (
|
||||
. "github.com/go-skynet/LocalAI/api"
|
||||
"github.com/go-skynet/LocalAI/api/options"
|
||||
"github.com/go-skynet/LocalAI/metrics"
|
||||
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
@@ -61,7 +61,7 @@ func getModelStatus(url string) (response map[string]interface{}) {
|
||||
}
|
||||
|
||||
func getModels(url string) (response []gallery.GalleryModel) {
|
||||
utils.GetURI(url, func(url string, i []byte) error {
|
||||
downloader.GetURI(url, func(url string, i []byte) error {
|
||||
// Unmarshal YAML data into a struct
|
||||
return json.Unmarshal(i, &response)
|
||||
})
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v3"
|
||||
@@ -54,6 +55,9 @@ type Config struct {
|
||||
CUDA bool `yaml:"cuda"`
|
||||
|
||||
DownloadFiles []File `yaml:"download_files"`
|
||||
|
||||
Description string `yaml:"description"`
|
||||
Usage string `yaml:"usage"`
|
||||
}
|
||||
|
||||
type File struct {
|
||||
@@ -300,21 +304,21 @@ func (cm *ConfigLoader) Preload(modelPath string) error {
|
||||
// Create file path
|
||||
filePath := filepath.Join(modelPath, file.Filename)
|
||||
|
||||
if err := utils.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
|
||||
if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
modelURL := config.PredictionOptions.Model
|
||||
modelURL = utils.ConvertURL(modelURL)
|
||||
modelURL = downloader.ConvertURL(modelURL)
|
||||
|
||||
if utils.LooksLikeURL(modelURL) {
|
||||
if downloader.LooksLikeURL(modelURL) {
|
||||
// md5 of model name
|
||||
md5Name := utils.MD5(modelURL)
|
||||
|
||||
// check if file exists
|
||||
if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
|
||||
err := utils.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
|
||||
err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -325,6 +329,15 @@ func (cm *ConfigLoader) Preload(modelPath string) error {
|
||||
c.PredictionOptions.Model = md5Name
|
||||
cm.configs[i] = *c
|
||||
}
|
||||
if cm.configs[i].Name != "" {
|
||||
log.Info().Msgf("Model name: %s", cm.configs[i].Name)
|
||||
}
|
||||
if cm.configs[i].Description != "" {
|
||||
log.Info().Msgf("Model description: %s", cm.configs[i].Description)
|
||||
}
|
||||
if cm.configs[i].Usage != "" {
|
||||
log.Info().Msgf("Model usage: \n%s", cm.configs[i].Usage)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
.PHONY: autogptq
|
||||
autogptq:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name autogptq --file autogptq.yml
|
||||
@echo "Virtual environment created."
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate autogptq
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
@@ -13,3 +13,12 @@ if conda_env_exists "transformers" ; then
|
||||
else
|
||||
echo "Virtual environment already exists."
|
||||
fi
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
|
||||
pip cache purge
|
||||
fi
|
||||
@@ -45,7 +45,7 @@ dependencies:
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub==0.16.4
|
||||
- huggingface-hub
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- jmespath==1.0.1
|
||||
@@ -70,7 +70,6 @@ dependencies:
|
||||
- packaging==23.2
|
||||
- pandas
|
||||
- peft==0.5.0
|
||||
- git+https://github.com/bigscience-workshop/petals
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
@@ -85,17 +84,16 @@ dependencies:
|
||||
- scipy==1.11.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers==0.14.0
|
||||
- torch==2.1.0
|
||||
- torchaudio==2.1.0
|
||||
- tokenizers
|
||||
- torch==2.1.2
|
||||
- torchaudio==2.1.2
|
||||
- tqdm==4.66.1
|
||||
- transformers==4.34.0
|
||||
- TTS==0.22.0
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==1.26.17
|
||||
- xxhash==3.4.1
|
||||
- auto-gptq==0.6.0
|
||||
- yarl==1.9.2
|
||||
- soundfile
|
||||
- langid
|
||||
@@ -114,4 +112,7 @@ dependencies:
|
||||
- sudachipy
|
||||
- sudachidict_core
|
||||
- vocos
|
||||
- vllm==0.2.7
|
||||
- transformers>=4.36.0 # Required for Mixtral.
|
||||
- xformers==0.0.23.post1
|
||||
prefix: /opt/conda/envs/transformers
|
||||
|
||||
@@ -46,7 +46,7 @@ dependencies:
|
||||
- fsspec==2023.6.0
|
||||
- funcy==2.0
|
||||
- grpcio==1.59.0
|
||||
- huggingface-hub==0.16.4
|
||||
- huggingface-hub
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- jmespath==1.0.1
|
||||
@@ -59,7 +59,6 @@ dependencies:
|
||||
- packaging==23.2
|
||||
- pandas
|
||||
- peft==0.5.0
|
||||
- git+https://github.com/bigscience-workshop/petals
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
@@ -74,14 +73,14 @@ dependencies:
|
||||
- scipy==1.11.3
|
||||
- six==1.16.0
|
||||
- sympy==1.12
|
||||
- tokenizers==0.14.0
|
||||
- torch==2.1.0
|
||||
- torchaudio==2.1.0
|
||||
- tokenizers
|
||||
- torch==2.1.2
|
||||
- torchaudio==2.1.2
|
||||
- tqdm==4.66.1
|
||||
- transformers==4.34.0
|
||||
- triton==2.1.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- auto-gptq==0.6.0
|
||||
- urllib3==1.26.17
|
||||
- xxhash==3.4.1
|
||||
- yarl==1.9.2
|
||||
@@ -102,4 +101,7 @@ dependencies:
|
||||
- sudachipy
|
||||
- sudachidict_core
|
||||
- vocos
|
||||
- vllm==0.2.7
|
||||
- transformers>=4.36.0 # Required for Mixtral.
|
||||
- xformers==0.0.23.post1
|
||||
prefix: /opt/conda/envs/transformers
|
||||
@@ -21,7 +21,7 @@ _ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', 'en')
|
||||
COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
@@ -38,6 +38,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
if not torch.cuda.is_available() and request.CUDA:
|
||||
return backend_pb2.Result(success=False, message="CUDA is not available")
|
||||
|
||||
self.AudioPath = None
|
||||
# List available 🐸TTS models
|
||||
print(TTS().list_models())
|
||||
if os.path.isabs(request.AudioPath):
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
.PHONY: exllama
|
||||
exllama:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name exllama --file exllama.yml
|
||||
@echo "Virtual environment created."
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
|
||||
@@ -5,11 +5,15 @@
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate exllama
|
||||
source activate transformers
|
||||
|
||||
echo $CONDA_PREFIX
|
||||
|
||||
|
||||
git clone https://github.com/turboderp/exllama $CONDA_PREFIX/exllama && pushd $CONDA_PREFIX/exllama && pip install -r requirements.txt && popd
|
||||
|
||||
cp -rfv $CONDA_PREFIX/exllama/* ./
|
||||
cp -rfv $CONDA_PREFIX/exllama/* ./
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
pip cache purge
|
||||
fi
|
||||
@@ -6,7 +6,7 @@
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate exllama
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
.PHONY: exllama2
|
||||
exllama2:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name exllama2 --file exllama2.yml
|
||||
@echo "Virtual environment created."
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
bash install.sh
|
||||
|
||||
.PHONY: run
|
||||
|
||||
@@ -5,10 +5,14 @@
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate exllama2
|
||||
source activate transformers
|
||||
|
||||
echo $CONDA_PREFIX
|
||||
|
||||
git clone https://github.com/turboderp/exllamav2 $CONDA_PREFIX/exllamav2 && pushd $CONDA_PREFIX/exllamav2 && pip install -r requirements.txt && popd
|
||||
|
||||
cp -rfv $CONDA_PREFIX/exllamav2/* ./
|
||||
cp -rfv $CONDA_PREFIX/exllamav2/* ./
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
pip cache purge
|
||||
fi
|
||||
@@ -6,7 +6,7 @@
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate exllama2
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
.PHONY: petals
|
||||
petals:
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name petals --file petals.yml
|
||||
@echo "Virtual environment created."
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
|
||||
@@ -5,14 +5,16 @@
|
||||
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
CONDA_ENV=petals
|
||||
|
||||
# Activate conda environment
|
||||
# if source is available use it, or use conda
|
||||
#
|
||||
if [ -f /opt/conda/bin/activate ]; then
|
||||
source activate transformers
|
||||
source activate $CONDA_ENV
|
||||
else
|
||||
eval "$(conda shell.bash hook)"
|
||||
conda activate transformers
|
||||
conda activate $CONDA_ENV
|
||||
fi
|
||||
|
||||
# get the directory where the bash script is located
|
||||
|
||||
@@ -3,7 +3,16 @@
|
||||
## A bash script wrapper that runs the transformers server with conda
|
||||
|
||||
# Activate conda environment
|
||||
source activate transformers
|
||||
CONDA_ENV=petals
|
||||
# Activate conda environment
|
||||
# if source is available use it, or use conda
|
||||
#
|
||||
if [ -f /opt/conda/bin/activate ]; then
|
||||
source activate $CONDA_ENV
|
||||
else
|
||||
eval "$(conda shell.bash hook)"
|
||||
conda activate $CONDA_ENV
|
||||
fi
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
@@ -12,4 +12,8 @@ echo $CONDA_PREFIX
|
||||
|
||||
git clone https://github.com/Plachtaa/VALL-E-X.git $CONDA_PREFIX/vall-e-x && pushd $CONDA_PREFIX/vall-e-x && git checkout -b build $SHA && pip install -r requirements.txt && popd
|
||||
|
||||
cp -rfv $CONDA_PREFIX/vall-e-x/* ./
|
||||
cp -rfv $CONDA_PREFIX/vall-e-x/* ./
|
||||
|
||||
if [ "$PIP_CACHE_PURGE" = true ] ; then
|
||||
pip cache purge
|
||||
fi
|
||||
@@ -1,8 +1,6 @@
|
||||
.PHONY: vllm
|
||||
vllm:
|
||||
@echo "Creating virtual environment..."
|
||||
@conda env create --name vllm --file vllm.yml
|
||||
@echo "Virtual environment created."
|
||||
$(MAKE) -C ../common-env/transformers
|
||||
|
||||
.PHONY: run
|
||||
run:
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
export PATH=$PATH:/opt/conda/bin
|
||||
|
||||
# Activate conda environment
|
||||
source activate vllm
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
## A bash script wrapper that runs the transformers server with conda
|
||||
|
||||
# Activate conda environment
|
||||
source activate vllm
|
||||
source activate transformers
|
||||
|
||||
# get the directory where the bash script is located
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
@@ -1,99 +0,0 @@
|
||||
name: vllm
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- _libgcc_mutex=0.1=main
|
||||
- _openmp_mutex=5.1=1_gnu
|
||||
- bzip2=1.0.8=h7b6447c_0
|
||||
- ca-certificates=2023.08.22=h06a4308_0
|
||||
- ld_impl_linux-64=2.38=h1181459_1
|
||||
- libffi=3.4.4=h6a678d5_0
|
||||
- libgcc-ng=11.2.0=h1234567_1
|
||||
- libgomp=11.2.0=h1234567_1
|
||||
- libstdcxx-ng=11.2.0=h1234567_1
|
||||
- libuuid=1.41.5=h5eee18b_0
|
||||
- ncurses=6.4=h6a678d5_0
|
||||
- openssl=3.0.11=h7f8727e_2
|
||||
- pip=23.2.1=py311h06a4308_0
|
||||
- python=3.11.5=h955ad1f_0
|
||||
- readline=8.2=h5eee18b_0
|
||||
- setuptools=68.0.0=py311h06a4308_0
|
||||
- sqlite=3.41.2=h5eee18b_0
|
||||
- tk=8.6.12=h1ccaba5_0
|
||||
- wheel=0.41.2=py311h06a4308_0
|
||||
- xz=5.4.2=h5eee18b_0
|
||||
- zlib=1.2.13=h5eee18b_0
|
||||
- pip:
|
||||
- aiosignal==1.3.1
|
||||
- anyio==3.7.1
|
||||
- attrs==23.1.0
|
||||
- certifi==2023.7.22
|
||||
- charset-normalizer==3.3.0
|
||||
- click==8.1.7
|
||||
- cmake==3.27.6
|
||||
- fastapi==0.103.2
|
||||
- filelock==3.12.4
|
||||
- frozenlist==1.4.0
|
||||
- fsspec==2023.9.2
|
||||
- grpcio==1.59.0
|
||||
- h11==0.14.0
|
||||
- httptools==0.6.0
|
||||
- huggingface-hub==0.17.3
|
||||
- idna==3.4
|
||||
- jinja2==3.1.2
|
||||
- jsonschema==4.19.1
|
||||
- jsonschema-specifications==2023.7.1
|
||||
- lit==17.0.2
|
||||
- markupsafe==2.1.3
|
||||
- mpmath==1.3.0
|
||||
- msgpack==1.0.7
|
||||
- networkx==3.1
|
||||
- ninja==1.11.1
|
||||
- numpy==1.26.0
|
||||
- nvidia-cublas-cu11==11.10.3.66
|
||||
- nvidia-cuda-cupti-cu11==11.7.101
|
||||
- nvidia-cuda-nvrtc-cu11==11.7.99
|
||||
- nvidia-cuda-runtime-cu11==11.7.99
|
||||
- nvidia-cudnn-cu11==8.5.0.96
|
||||
- nvidia-cufft-cu11==10.9.0.58
|
||||
- nvidia-curand-cu11==10.2.10.91
|
||||
- nvidia-cusolver-cu11==11.4.0.1
|
||||
- nvidia-cusparse-cu11==11.7.4.91
|
||||
- nvidia-nccl-cu11==2.14.3
|
||||
- nvidia-nvtx-cu11==11.7.91
|
||||
- packaging==23.2
|
||||
- pandas==2.1.1
|
||||
- protobuf==4.24.4
|
||||
- psutil==5.9.5
|
||||
- pyarrow==13.0.0
|
||||
- pydantic==1.10.13
|
||||
- python-dateutil==2.8.2
|
||||
- python-dotenv==1.0.0
|
||||
- pytz==2023.3.post1
|
||||
- pyyaml==6.0.1
|
||||
- ray==2.7.0
|
||||
- referencing==0.30.2
|
||||
- regex==2023.10.3
|
||||
- requests==2.31.0
|
||||
- rpds-py==0.10.4
|
||||
- safetensors==0.4.0
|
||||
- sentencepiece==0.1.99
|
||||
- six==1.16.0
|
||||
- sniffio==1.3.0
|
||||
- starlette==0.27.0
|
||||
- sympy==1.12
|
||||
- tokenizers==0.14.1
|
||||
- torch==2.0.1
|
||||
- tqdm==4.66.1
|
||||
- transformers==4.34.0
|
||||
- triton==2.0.0
|
||||
- typing-extensions==4.8.0
|
||||
- tzdata==2023.3
|
||||
- urllib3==2.0.6
|
||||
- uvicorn==0.23.2
|
||||
- uvloop==0.17.0
|
||||
- vllm==0.2.0
|
||||
- watchfiles==0.20.0
|
||||
- websockets==11.0.3
|
||||
- xformers==0.0.22
|
||||
prefix: /opt/conda/envs/vllm
|
||||
@@ -18,6 +18,9 @@ title = "LocalAI"
|
||||
</a>
|
||||
</p>
|
||||
|
||||
[<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker">](https://hub.docker.com/r/localai/localai)
|
||||
[<img src="https://img.shields.io/badge/quay.io-images-important.svg?">](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest)
|
||||
|
||||
> 💡 Get help - [❓FAQ](https://localai.io/faq/) [❓How tos](https://localai.io/howtos/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy)
|
||||
>
|
||||
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
@@ -9,7 +9,7 @@ weight = 6
|
||||
|
||||
In order to define default prompts, model parameters (such as custom default `top_p` or `top_k`), LocalAI can be configured to serve user-defined models with a set of default parameters and templates.
|
||||
|
||||
You can create multiple `yaml` files in the models path or either specify a single YAML configuration file.
|
||||
In order to configure a model, you can create multiple `yaml` files in the models path or either specify a single YAML configuration file.
|
||||
Consider the following `models` folder in the `example/chatbot-ui`:
|
||||
|
||||
```
|
||||
@@ -96,6 +96,12 @@ Specifying a `config-file` via CLI allows to declare models in a single file as
|
||||
|
||||
See also [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) as an example on how to use config files.
|
||||
|
||||
It is possible to specify a full URL or a short-hand URL to a YAML model configuration file and use it on start with local-ai, for example to use phi-2:
|
||||
|
||||
```
|
||||
local-ai github://mudler/LocalAI/examples/configurations/phi-2.yaml@master
|
||||
```
|
||||
|
||||
### Full config model file reference
|
||||
|
||||
```yaml
|
||||
@@ -359,6 +365,36 @@ docker run --env REBUILD=true localai
|
||||
docker run --env-file .env localai
|
||||
```
|
||||
|
||||
### CLI parameters
|
||||
|
||||
You can control LocalAI with command line arguments, to specify a binding address, or the number of threads.
|
||||
|
||||
|
||||
| Parameter | Environmental Variable | Default Variable | Description |
|
||||
| ------------------------------ | ------------------------------- | -------------------------------------------------- | ------------------------------------------------------------------- |
|
||||
| --f16 | $F16 | false | Enable f16 mode |
|
||||
| --debug | $DEBUG | false | Enable debug mode |
|
||||
| --cors | $CORS | false | Enable CORS support |
|
||||
| --cors-allow-origins value | $CORS_ALLOW_ORIGINS | | Specify origins allowed for CORS |
|
||||
| --threads value | $THREADS | 4 | Number of threads to use for parallel computation |
|
||||
| --models-path value | $MODELS_PATH | ./models | Path to the directory containing models used for inferencing |
|
||||
| --preload-models value | $PRELOAD_MODELS | | List of models to preload in JSON format at startup |
|
||||
| --preload-models-config value | $PRELOAD_MODELS_CONFIG | | A config with a list of models to apply at startup. Specify the path to a YAML config file |
|
||||
| --config-file value | $CONFIG_FILE | | Path to the config file |
|
||||
| --address value | $ADDRESS | :8080 | Specify the bind address for the API server |
|
||||
| --image-path value | $IMAGE_PATH | | Path to the directory used to store generated images |
|
||||
| --context-size value | $CONTEXT_SIZE | 512 | Default context size of the model |
|
||||
| --upload-limit value | $UPLOAD_LIMIT | 15 | Default upload limit in megabytes (audio file upload) |
|
||||
| --galleries | $GALLERIES | | Allows to set galleries from command line |
|
||||
|--parallel-requests | $PARALLEL_REQUESTS | false | Enable backends to handle multiple requests in parallel. This is for backends that supports multiple requests in parallel, like llama.cpp or vllm |
|
||||
| --single-active-backend | $SINGLE_ACTIVE_BACKEND | false | Allow only one backend to be running |
|
||||
| --api-keys value | $API_KEY | empty | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys.
|
||||
| --enable-watchdog-idle | $WATCHDOG_IDLE | false | Enable watchdog for stopping idle backends. This will stop the backends if are in idle state for too long. (default: false) [$WATCHDOG_IDLE]
|
||||
| --enable-watchdog-busy | $WATCHDOG_BUSY | false | Enable watchdog for stopping busy backends that exceed a defined threshold.|
|
||||
| --watchdog-busy-timeout value | $WATCHDOG_BUSY_TIMEOUT | 5m | Watchdog timeout. This will restart the backend if it crashes. |
|
||||
| --watchdog-idle-timeout value | $WATCHDOG_IDLE_TIMEOUT | 15m | Watchdog idle timeout. This will restart the backend if it crashes. |
|
||||
| --preload-backend-only | $PRELOAD_BACKEND_ONLY | false | If set, the api is NOT launched, and only the preloaded models / backends are started. This is intended for multi-node setups. |
|
||||
| --external-grpc-backends | EXTERNAL_GRPC_BACKENDS | none | Comma separated list of external gRPC backends to use. Format: `name:host:port` or `name:/path/to/file` |
|
||||
|
||||
|
||||
### Extra backends
|
||||
|
||||
@@ -235,6 +235,14 @@ make GRPC_BACKENDS=backend-assets/grpc/llama-cpp build
|
||||
|
||||
By default, all the backends are built.
|
||||
|
||||
### Specific llama.cpp version
|
||||
|
||||
To build with a specific version of llama.cpp, set `CPPLLAMA_VERSION` to the tag or wanted sha:
|
||||
|
||||
```
|
||||
CPPLLAMA_VERSION=<sha> make build
|
||||
```
|
||||
|
||||
### Windows compatibility
|
||||
|
||||
Make sure to give enough resources to the running container. See https://github.com/go-skynet/LocalAI/issues/2
|
||||
|
||||
@@ -15,11 +15,19 @@ This section contains instruction on how to use LocalAI with GPU acceleration.
|
||||
For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "build/#acceleration" %}})
|
||||
{{% /notice %}}
|
||||
|
||||
### CUDA
|
||||
### CUDA(NVIDIA) acceleration
|
||||
|
||||
Requirement: nvidia-container-toolkit (installation instructions [1](https://www.server-world.info/en/note?os=Ubuntu_22.04&p=nvidia&f=2) [2](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html))
|
||||
|
||||
To use CUDA, use the images with the `cublas` tag.
|
||||
To check what CUDA version do you need, you can either run `nvidia-smi` or `nvcc --version`.
|
||||
|
||||
Alternatively, you can also check nvidia-smi with docker:
|
||||
|
||||
```
|
||||
docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi
|
||||
```
|
||||
|
||||
To use CUDA, use the images with the `cublas` tag, for example.
|
||||
|
||||
The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags):
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
|
||||
|
||||
+++
|
||||
disableToc = false
|
||||
title = "Getting started"
|
||||
@@ -6,7 +6,11 @@ weight = 1
|
||||
url = '/basics/getting_started/'
|
||||
+++
|
||||
|
||||
`LocalAI` is available as a container image and binary. It can be used with docker, podman, kubernetes and any container engine. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
|
||||
`LocalAI` is available as a container image and binary. It can be used with docker, podman, kubernetes and any container engine.
|
||||
Container images are published to [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) and [Dockerhub](https://hub.docker.com/r/localai/localai).
|
||||
|
||||
[<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker">](https://hub.docker.com/r/localai/localai)
|
||||
[<img src="https://img.shields.io/badge/quay.io-images-important.svg?">](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest)
|
||||
|
||||
See also our [How to]({{%relref "howtos" %}}) section for end-to-end guided examples curated by the community.
|
||||
|
||||
@@ -14,6 +18,8 @@ See also our [How to]({{%relref "howtos" %}}) section for end-to-end guided exam
|
||||
|
||||
The easiest way to run LocalAI is by using [`docker compose`](https://docs.docker.com/compose/install/) or with [Docker](https://docs.docker.com/engine/install/) (to build locally, see the [build section]({{%relref "build" %}})).
|
||||
|
||||
LocalAI needs at least a model file to work, or a configuration YAML file, or both. You can customize further model defaults and specific settings with a configuration file (see [advanced]({{%relref "advanced" %}})).
|
||||
|
||||
{{% notice note %}}
|
||||
To run with GPU Accelleration, see [GPU acceleration]({{%relref "features/gpu-acceleration" %}}).
|
||||
{{% /notice %}}
|
||||
@@ -111,10 +117,114 @@ helm show values go-skynet/local-ai > values.yaml
|
||||
helm install local-ai go-skynet/local-ai -f values.yaml
|
||||
```
|
||||
|
||||
{{% /tab %}}
|
||||
{{% tab name="From binary" %}}
|
||||
|
||||
LocalAI binary releases are available in [Github](https://github.com/go-skynet/LocalAI/releases).
|
||||
|
||||
{{% /tab %}}
|
||||
|
||||
{{% tab name="From source" %}}
|
||||
|
||||
See the [build section]({{%relref "build" %}}).
|
||||
|
||||
{{% /tab %}}
|
||||
|
||||
{{< /tabs >}}
|
||||
|
||||
### Running Popular models (one-click!)
|
||||
|
||||
{{% notice note %}}
|
||||
|
||||
Note: this feature currently is available only on master builds.
|
||||
|
||||
{{% /notice %}}
|
||||
|
||||
You can run `local-ai` directly with a model name, and it will download the model and start the API with the model loaded.
|
||||
|
||||
> Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies
|
||||
> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version`
|
||||
|
||||
|
||||
{{< tabs >}}
|
||||
{{% tab name="CPU-only" %}}
|
||||
|
||||
| Model | Category | Docker command |
|
||||
| --- | --- | --- |
|
||||
| [phi-2](https://huggingface.co/microsoft/phi-2) | LLM | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` |
|
||||
| [llava](https://github.com/SkunkworksAI/BakLLaVA) | Multimodal LLM | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava``` |
|
||||
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | LLM | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` |
|
||||
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | Embeddings | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` |
|
||||
| all-minilm-l6-v2 | Embeddings | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` |
|
||||
| whisper-base | Audio to Text | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` |
|
||||
| rhasspy-voice-en-us-amy | Text to Audio | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` |
|
||||
| coqui | Text to Audio | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` |
|
||||
| bark | Text to Audio | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` |
|
||||
| vall-e-x | Text to Audio | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` |
|
||||
|
||||
{{% /tab %}}
|
||||
{{% tab name="GPU (CUDA 11)" %}}
|
||||
|
||||
|
||||
|
||||
| Model | Category | Docker command |
|
||||
| --- | --- | --- |
|
||||
| [phi-2](https://huggingface.co/microsoft/phi-2) | LLM | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` |
|
||||
| [llava](https://github.com/SkunkworksAI/BakLLaVA) | Multimodal LLM | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core llava``` |
|
||||
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | LLM | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` |
|
||||
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | Embeddings | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` |
|
||||
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | Embeddings | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` |
|
||||
| whisper-base | Audio to Text | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` |
|
||||
| rhasspy-voice-en-us-amy | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` |
|
||||
| coqui | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` |
|
||||
| bark | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` |
|
||||
| vall-e-x | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` |
|
||||
|
||||
{{% /tab %}}
|
||||
|
||||
|
||||
{{% tab name="GPU (CUDA 12)" %}}
|
||||
|
||||
| Model | Category | Docker command |
|
||||
| --- | --- | --- |
|
||||
| [phi-2](https://huggingface.co/microsoft/phi-2) | LLM | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` |
|
||||
| [llava](https://github.com/SkunkworksAI/BakLLaVA) | Multimodal LLM | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core llava``` |
|
||||
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | LLM | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` |
|
||||
| bert-cpp | Embeddings | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` |
|
||||
| all-minilm-l6-v2 | Embeddings | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` |
|
||||
| whisper-base | Audio to Text | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` |
|
||||
| rhasspy-voice-en-us-amy | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` |
|
||||
| coqui | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` |
|
||||
| bark | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` |
|
||||
| vall-e-x | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` |
|
||||
|
||||
{{% /tab %}}
|
||||
|
||||
{{< /tabs >}}
|
||||
|
||||
{{% notice note %}}
|
||||
|
||||
LocalAI can be started (either the container image or the binary) with a list of model config files URLs or our short-handed format (e.g. `huggingface://`. `github://`). It works by passing the urls as arguments or environment variable, for example:
|
||||
|
||||
```
|
||||
local-ai github://owner/repo/file.yaml@branch
|
||||
|
||||
# Env
|
||||
MODELS="github://owner/repo/file.yaml@branch,github://owner/repo/file.yaml@branch" local-ai
|
||||
|
||||
# Args
|
||||
local-ai --models github://owner/repo/file.yaml@branch --models github://owner/repo/file.yaml@branch
|
||||
```
|
||||
|
||||
For example, to start localai with phi-2, it's possible for instance to also use a full config file from gists:
|
||||
|
||||
```bash
|
||||
docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
|
||||
```
|
||||
|
||||
The file should be a valid LocalAI YAML configuration file, for the full syntax see [advanced]({{%relref "advanced" %}}).
|
||||
{{% /notice %}}
|
||||
|
||||
### Container images
|
||||
|
||||
LocalAI has a set of images to support CUDA, ffmpeg and 'vanilla' (CPU-only). The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags):
|
||||
@@ -131,6 +241,11 @@ Core Images - Smaller images without predownload python dependencies
|
||||
{{% /tab %}}
|
||||
|
||||
{{% tab name="GPU Images CUDA 11" %}}
|
||||
|
||||
Images with Nvidia accelleration support
|
||||
|
||||
> If you do not know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version`
|
||||
|
||||
- `master-cublas-cuda11`
|
||||
- `master-cublas-cuda11-core`
|
||||
- `{{< version >}}-cublas-cuda11`
|
||||
@@ -142,6 +257,11 @@ Core Images - Smaller images without predownload python dependencies
|
||||
{{% /tab %}}
|
||||
|
||||
{{% tab name="GPU Images CUDA 12" %}}
|
||||
|
||||
Images with Nvidia accelleration support
|
||||
|
||||
> If you do not know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version`
|
||||
|
||||
- `master-cublas-cuda12`
|
||||
- `master-cublas-cuda12-core`
|
||||
- `{{< version >}}-cublas-cuda12`
|
||||
@@ -201,212 +321,9 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
|
||||
|
||||
To see other model configurations, see also the example section [here](https://github.com/mudler/LocalAI/tree/master/examples/configurations).
|
||||
|
||||
|
||||
### From binaries
|
||||
|
||||
LocalAI binary releases are available in [Github](https://github.com/go-skynet/LocalAI/releases).
|
||||
|
||||
You can control LocalAI with command line arguments, to specify a binding address, or the number of threads.
|
||||
|
||||
### CLI parameters
|
||||
|
||||
| Parameter | Environmental Variable | Default Variable | Description |
|
||||
| ------------------------------ | ------------------------------- | -------------------------------------------------- | ------------------------------------------------------------------- |
|
||||
| --f16 | $F16 | false | Enable f16 mode |
|
||||
| --debug | $DEBUG | false | Enable debug mode |
|
||||
| --cors | $CORS | false | Enable CORS support |
|
||||
| --cors-allow-origins value | $CORS_ALLOW_ORIGINS | | Specify origins allowed for CORS |
|
||||
| --threads value | $THREADS | 4 | Number of threads to use for parallel computation |
|
||||
| --models-path value | $MODELS_PATH | ./models | Path to the directory containing models used for inferencing |
|
||||
| --preload-models value | $PRELOAD_MODELS | | List of models to preload in JSON format at startup |
|
||||
| --preload-models-config value | $PRELOAD_MODELS_CONFIG | | A config with a list of models to apply at startup. Specify the path to a YAML config file |
|
||||
| --config-file value | $CONFIG_FILE | | Path to the config file |
|
||||
| --address value | $ADDRESS | :8080 | Specify the bind address for the API server |
|
||||
| --image-path value | $IMAGE_PATH | | Path to the directory used to store generated images |
|
||||
| --context-size value | $CONTEXT_SIZE | 512 | Default context size of the model |
|
||||
| --upload-limit value | $UPLOAD_LIMIT | 15 | Default upload limit in megabytes (audio file upload) |
|
||||
| --galleries | $GALLERIES | | Allows to set galleries from command line |
|
||||
|--parallel-requests | $PARALLEL_REQUESTS | false | Enable backends to handle multiple requests in parallel. This is for backends that supports multiple requests in parallel, like llama.cpp or vllm |
|
||||
| --single-active-backend | $SINGLE_ACTIVE_BACKEND | false | Allow only one backend to be running |
|
||||
| --api-keys value | $API_KEY | empty | List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys.
|
||||
| --enable-watchdog-idle | $WATCHDOG_IDLE | false | Enable watchdog for stopping idle backends. This will stop the backends if are in idle state for too long. (default: false) [$WATCHDOG_IDLE]
|
||||
| --enable-watchdog-busy | $WATCHDOG_BUSY | false | Enable watchdog for stopping busy backends that exceed a defined threshold.|
|
||||
| --watchdog-busy-timeout value | $WATCHDOG_BUSY_TIMEOUT | 5m | Watchdog timeout. This will restart the backend if it crashes. |
|
||||
| --watchdog-idle-timeout value | $WATCHDOG_IDLE_TIMEOUT | 15m | Watchdog idle timeout. This will restart the backend if it crashes. |
|
||||
| --preload-backend-only | $PRELOAD_BACKEND_ONLY | false | If set, the api is NOT launched, and only the preloaded models / backends are started. This is intended for multi-node setups. |
|
||||
| --external-grpc-backends | EXTERNAL_GRPC_BACKENDS | none | Comma separated list of external gRPC backends to use. Format: `name:host:port` or `name:/path/to/file` |
|
||||
|
||||
### Run LocalAI in Kubernetes
|
||||
|
||||
LocalAI can be installed inside Kubernetes with helm.
|
||||
|
||||
Requirements:
|
||||
- SSD storage class, or disable `mmap` to load the whole model in memory
|
||||
|
||||
<details>
|
||||
By default, the helm chart will install LocalAI instance using the ggml-gpt4all-j model without persistent storage.
|
||||
|
||||
1. Add the helm repo
|
||||
```bash
|
||||
helm repo add go-skynet https://go-skynet.github.io/helm-charts/
|
||||
```
|
||||
2. Install the helm chart:
|
||||
```bash
|
||||
helm repo update
|
||||
helm install local-ai go-skynet/local-ai -f values.yaml
|
||||
```
|
||||
> **Note:** For further configuration options, see the [helm chart repository on GitHub](https://github.com/go-skynet/helm-charts).
|
||||
### Example values
|
||||
Deploy a single LocalAI pod with 6GB of persistent storage serving up a `ggml-gpt4all-j` model with custom prompt.
|
||||
```yaml
|
||||
### values.yaml
|
||||
|
||||
replicaCount: 1
|
||||
|
||||
deployment:
|
||||
image: quay.io/go-skynet/local-ai:latest ##(This is for CPU only, to use GPU change it to a image that supports GPU IE "v2.0.0-cublas-cuda12-core")
|
||||
env:
|
||||
threads: 4
|
||||
context_size: 512
|
||||
modelsPath: "/models"
|
||||
|
||||
resources:
|
||||
{}
|
||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||
# choice for the user. This also increases chances charts run on environments with little
|
||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
# Prompt templates to include
|
||||
# Note: the keys of this map will be the names of the prompt template files
|
||||
promptTemplates:
|
||||
{}
|
||||
# ggml-gpt4all-j.tmpl: |
|
||||
# The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
||||
# ### Prompt:
|
||||
# {{.Input}}
|
||||
# ### Response:
|
||||
|
||||
# Models to download at runtime
|
||||
models:
|
||||
# Whether to force download models even if they already exist
|
||||
forceDownload: false
|
||||
|
||||
# The list of URLs to download models from
|
||||
# Note: the name of the file will be the name of the loaded model
|
||||
list:
|
||||
- url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
|
||||
# basicAuth: base64EncodedCredentials
|
||||
|
||||
# Persistent storage for models and prompt templates.
|
||||
# PVC and HostPath are mutually exclusive. If both are enabled,
|
||||
# PVC configuration takes precedence. If neither are enabled, ephemeral
|
||||
# storage is used.
|
||||
persistence:
|
||||
pvc:
|
||||
enabled: false
|
||||
size: 6Gi
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
|
||||
annotations: {}
|
||||
|
||||
# Optional
|
||||
storageClass: ~
|
||||
|
||||
hostPath:
|
||||
enabled: false
|
||||
path: "/models"
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 80
|
||||
annotations: {}
|
||||
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
|
||||
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
||||
|
||||
ingress:
|
||||
enabled: false
|
||||
className: ""
|
||||
annotations:
|
||||
{}
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# kubernetes.io/tls-acme: "true"
|
||||
hosts:
|
||||
- host: chart-example.local
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls: []
|
||||
# - secretName: chart-example-tls
|
||||
# hosts:
|
||||
# - chart-example.local
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
```
|
||||
</details>
|
||||
|
||||
|
||||
### Build from source
|
||||
|
||||
See the [build section]({{%relref "build" %}}).
|
||||
|
||||
### Other examples
|
||||
### Examples
|
||||
|
||||

|
||||
|
||||
To see other examples on how to integrate with other projects for instance for question answering or for using it with chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).
|
||||
|
||||
|
||||
### Clients
|
||||
|
||||
OpenAI clients are already compatible with LocalAI by overriding the basePath, or the target URL.
|
||||
|
||||
## Javascript
|
||||
|
||||
<details>
|
||||
|
||||
https://github.com/openai/openai-node/
|
||||
|
||||
```javascript
|
||||
import { Configuration, OpenAIApi } from 'openai';
|
||||
|
||||
const configuration = new Configuration({
|
||||
basePath: `http://localhost:8080/v1`
|
||||
});
|
||||
const openai = new OpenAIApi(configuration);
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
## Python
|
||||
|
||||
<details>
|
||||
|
||||
https://github.com/openai/openai-python
|
||||
|
||||
Set the `OPENAI_API_BASE` environment variable, or by code:
|
||||
|
||||
```python
|
||||
import openai
|
||||
|
||||
openai.api_base = "http://localhost:8080/v1"
|
||||
|
||||
# create a chat completion
|
||||
chat_completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}])
|
||||
|
||||
# print the completion
|
||||
print(completion.choices[0].message.content)
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
@@ -43,15 +43,18 @@ Besides llama based models, LocalAI is compatible also with other architectures.
|
||||
| [langchain-huggingface](https://github.com/tmc/langchaingo) | Any text generators available on HuggingFace through API | yes | GPT | no | no | N/A |
|
||||
| [piper](https://github.com/rhasspy/piper) ([binding](https://github.com/mudler/go-piper)) | Any piper onnx model | no | Text to voice | no | no | N/A |
|
||||
| [falcon](https://github.com/cmp-nct/ggllm.cpp/tree/c12b2d65f732a0d8846db2244e070f0f3e73505c) ([binding](https://github.com/mudler/go-ggllm.cpp)) | Falcon *** | yes | GPT | no | yes | CUDA |
|
||||
| `huggingface-embeddings` [sentence-transformers](https://github.com/UKPLab/sentence-transformers) | BERT | no | Embeddings only | yes | no | N/A |
|
||||
| [sentencetransformers](https://github.com/UKPLab/sentence-transformers) | BERT | no | Embeddings only | yes | no | N/A |
|
||||
| `bark` | bark | no | Audio generation | no | no | yes |
|
||||
| `AutoGPTQ` | GPTQ | yes | GPT | yes | no | N/A |
|
||||
| `autogptq` | GPTQ | yes | GPT | yes | no | N/A |
|
||||
| `exllama` | GPTQ | yes | GPT only | no | no | N/A |
|
||||
| `diffusers` | SD,... | no | Image generation | no | no | N/A |
|
||||
| `vall-e-x` | Vall-E | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
|
||||
| `vllm` | Various GPTs and quantization formats | yes | GPT | no | no | CPU/CUDA |
|
||||
| `exllama2` | GPTQ | yes | GPT only | no | no | N/A |
|
||||
| `transformers-musicgen` | | no | Audio generation | no | no | N/A |
|
||||
| [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream) | stablediffusion | no | Image | no | no | N/A |
|
||||
| `coqui` | Coqui | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
|
||||
| `petals` | Various GPTs and quantization formats | yes | GPT | no | no | CPU/CUDA |
|
||||
|
||||
Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "advanced" %}})).
|
||||
|
||||
|
||||
@@ -167,11 +167,6 @@ curl -H "Content-Type: application/json" -d @- http://localhost:8080/v1/images/
|
||||
|
||||
## img2vid
|
||||
|
||||
{{% notice note %}}
|
||||
|
||||
Experimental and available only on master builds. See: https://github.com/mudler/LocalAI/pull/1442
|
||||
|
||||
{{% /notice %}}
|
||||
|
||||
```yaml
|
||||
name: img2vid
|
||||
@@ -193,12 +188,6 @@ curl -H "Content-Type: application/json" -X POST -d @- http://localhost:8080/v1/
|
||||
|
||||
## txt2vid
|
||||
|
||||
{{% notice note %}}
|
||||
|
||||
Experimental and available only on master builds. See: https://github.com/mudler/LocalAI/pull/1442
|
||||
|
||||
{{% /notice %}}
|
||||
|
||||
```yaml
|
||||
name: txt2vid
|
||||
parameters:
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v2.3.1"
|
||||
"version": "v2.4.1"
|
||||
}
|
||||
|
||||
53
embedded/embedded.go
Normal file
53
embedded/embedded.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package embedded
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/assets"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
var modelShorteners map[string]string
|
||||
|
||||
//go:embed model_library.yaml
|
||||
var modelLibrary []byte
|
||||
|
||||
//go:embed models/*
|
||||
var embeddedModels embed.FS
|
||||
|
||||
func ModelShortURL(s string) string {
|
||||
if _, ok := modelShorteners[s]; ok {
|
||||
s = modelShorteners[s]
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func init() {
|
||||
yaml.Unmarshal(modelLibrary, &modelShorteners)
|
||||
}
|
||||
|
||||
// ExistsInModelsLibrary checks if a model exists in the embedded models library
|
||||
func ExistsInModelsLibrary(s string) bool {
|
||||
f := fmt.Sprintf("%s.yaml", s)
|
||||
|
||||
a := []string{}
|
||||
|
||||
for _, j := range assets.ListFiles(embeddedModels) {
|
||||
a = append(a, strings.TrimPrefix(j, "models/"))
|
||||
}
|
||||
|
||||
return slices.Contains(a, f)
|
||||
}
|
||||
|
||||
// ResolveContent returns the content in the embedded model library
|
||||
func ResolveContent(s string) ([]byte, error) {
|
||||
if ExistsInModelsLibrary(s) {
|
||||
return embeddedModels.ReadFile(fmt.Sprintf("models/%s.yaml", s))
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("cannot find model %s", s)
|
||||
}
|
||||
9
embedded/model_library.yaml
Normal file
9
embedded/model_library.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
###
|
||||
###
|
||||
### This file contains the list of models that are available in the library
|
||||
### The URLs are automatically expanded when local-ai is being called with the key as argument
|
||||
###
|
||||
### For models with an entire YAML file to be embededd, put the file inside the `models`
|
||||
### directory, it will be automatically available with the file name as key (without the .yaml extension)
|
||||
|
||||
phi-2: "github://mudler/LocalAI/examples/configurations/phi-2.yaml@master"
|
||||
13
embedded/models/all-minilm-l6-v2.yaml
Normal file
13
embedded/models/all-minilm-l6-v2.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
name: all-minilm-l6-v2
|
||||
backend: sentencetransformers
|
||||
embeddings: true
|
||||
parameters:
|
||||
model: all-MiniLM-L6-v2
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
||||
"input": "Your text string goes here",
|
||||
"model": "all-minilm-l6-v2"
|
||||
}'
|
||||
8
embedded/models/bark.yaml
Normal file
8
embedded/models/bark.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
usage: |
|
||||
bark works without any configuration, to test it, you can run the following curl command:
|
||||
|
||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
||||
"backend": "bark",
|
||||
"input":"Hello, this is a test!"
|
||||
}' | aplay
|
||||
# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
|
||||
23
embedded/models/bert-cpp.yaml
Normal file
23
embedded/models/bert-cpp.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
backend: bert-embeddings
|
||||
embeddings: true
|
||||
f16: true
|
||||
|
||||
gpu_layers: 90
|
||||
mmap: true
|
||||
name: bert-cpp-minilm-v6
|
||||
|
||||
parameters:
|
||||
model: bert-MiniLM-L6-v2q4_0.bin
|
||||
|
||||
download_files:
|
||||
- filename: "bert-MiniLM-L6-v2q4_0.bin"
|
||||
sha256: "a5a174d8772c8a569faf9f3136c441f2c3855b5bf35ed32274294219533feaad"
|
||||
uri: "https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin"
|
||||
|
||||
usage: |
|
||||
You can test this model with curl like this:
|
||||
|
||||
curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
|
||||
"input": "Your text string goes here",
|
||||
"model": "bert-cpp-minilm-v6"
|
||||
}'
|
||||
9
embedded/models/coqui.yaml
Normal file
9
embedded/models/coqui.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
usage: |
|
||||
coqui works without any configuration, to test it, you can run the following curl command:
|
||||
|
||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
||||
"backend": "coqui",
|
||||
"model": "tts_models/en/ljspeech/glow-tts",
|
||||
"input":"Hello, this is a test!"
|
||||
}'
|
||||
# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
|
||||
36
embedded/models/llava.yaml
Normal file
36
embedded/models/llava.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
backend: llama-cpp
|
||||
context_size: 4096
|
||||
f16: true
|
||||
|
||||
gpu_layers: 90
|
||||
mmap: true
|
||||
name: llava
|
||||
|
||||
roles:
|
||||
user: "USER:"
|
||||
assistant: "ASSISTANT:"
|
||||
system: "SYSTEM:"
|
||||
|
||||
mmproj: bakllava-mmproj.gguf
|
||||
parameters:
|
||||
model: bakllava.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
|
||||
template:
|
||||
chat: |
|
||||
A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
|
||||
{{.Input}}
|
||||
ASSISTANT:
|
||||
|
||||
download_files:
|
||||
- filename: bakllava.gguf
|
||||
uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf
|
||||
- filename: bakllava-mmproj.gguf
|
||||
uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "llava",
|
||||
"messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}'
|
||||
29
embedded/models/mistral-openorca.yaml
Normal file
29
embedded/models/mistral-openorca.yaml
Normal file
@@ -0,0 +1,29 @@
|
||||
name: mistral-openorca
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q6_K.gguf
|
||||
temperature: 0.2
|
||||
top_k: 40
|
||||
top_p: 0.95
|
||||
template:
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{if .Content}}{{.Content}}{{end}}
|
||||
<|im_end|>
|
||||
|
||||
chat: |
|
||||
{{.Input}}
|
||||
<|im_start|>assistant
|
||||
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "mistral-openorca",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
13
embedded/models/rhasspy-voice-en-us-amy.yaml
Normal file
13
embedded/models/rhasspy-voice-en-us-amy.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
name: voice-en-us-amy-low
|
||||
download_files:
|
||||
- filename: voice-en-us-amy-low.tar.gz
|
||||
uri: https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-en-us-amy-low.tar.gz
|
||||
|
||||
|
||||
usage: |
|
||||
To test if this model works as expected, you can use the following curl command:
|
||||
|
||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
||||
"model":"en-us-amy-low.onnx",
|
||||
"input": "Hi, this is a test."
|
||||
}'
|
||||
8
embedded/models/vall-e-x.yaml
Normal file
8
embedded/models/vall-e-x.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
usage: |
|
||||
Vall-e-x works without any configuration, to test it, you can run the following curl command:
|
||||
|
||||
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
|
||||
"backend": "vall-e-x",
|
||||
"input":"Hello, this is a test!"
|
||||
}' | aplay
|
||||
# TODO: This is a placeholder until we manage to pre-load HF/Transformers models
|
||||
18
embedded/models/whisper-base.yaml
Normal file
18
embedded/models/whisper-base.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
name: whisper
|
||||
backend: whisper
|
||||
parameters:
|
||||
model: ggml-whisper-base.bin
|
||||
|
||||
usage: |
|
||||
## example audio file
|
||||
wget --quiet --show-progress -O gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
|
||||
|
||||
## Send the example audio file to the transcriptions endpoint
|
||||
curl http://localhost:8080/v1/audio/transcriptions \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F file="@$PWD/gb1.ogg" -F model="whisper"
|
||||
|
||||
download_files:
|
||||
- filename: "ggml-whisper-base.bin"
|
||||
sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
|
||||
uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
|
||||
22
pkg/assets/list.go
Normal file
22
pkg/assets/list.go
Normal file
@@ -0,0 +1,22 @@
|
||||
package assets
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"io/fs"
|
||||
)
|
||||
|
||||
func ListFiles(content embed.FS) (files []string) {
|
||||
fs.WalkDir(content, ".", func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
files = append(files, path)
|
||||
return nil
|
||||
})
|
||||
return
|
||||
}
|
||||
26
pkg/downloader/progress.go
Normal file
26
pkg/downloader/progress.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package downloader
|
||||
|
||||
import "hash"
|
||||
|
||||
type progressWriter struct {
|
||||
fileName string
|
||||
total int64
|
||||
written int64
|
||||
downloadStatus func(string, string, string, float64)
|
||||
hash hash.Hash
|
||||
}
|
||||
|
||||
func (pw *progressWriter) Write(p []byte) (n int, err error) {
|
||||
n, err = pw.hash.Write(p)
|
||||
pw.written += int64(n)
|
||||
|
||||
if pw.total > 0 {
|
||||
percentage := float64(pw.written) / float64(pw.total) * 100
|
||||
//log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
|
||||
pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
|
||||
} else {
|
||||
pw.downloadStatus(pw.fileName, formatBytes(pw.written), "", 0)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
@@ -1,10 +1,9 @@
|
||||
package utils
|
||||
package downloader
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -12,9 +11,18 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
const (
|
||||
HuggingFacePrefix = "huggingface://"
|
||||
HTTPPrefix = "http://"
|
||||
HTTPSPrefix = "https://"
|
||||
GithubURI = "github:"
|
||||
GithubURI2 = "github://"
|
||||
)
|
||||
|
||||
func GetURI(url string, f func(url string, i []byte) error) error {
|
||||
url = ConvertURL(url)
|
||||
|
||||
@@ -52,14 +60,6 @@ func GetURI(url string, f func(url string, i []byte) error) error {
|
||||
return f(url, body)
|
||||
}
|
||||
|
||||
const (
|
||||
HuggingFacePrefix = "huggingface://"
|
||||
HTTPPrefix = "http://"
|
||||
HTTPSPrefix = "https://"
|
||||
GithubURI = "github:"
|
||||
GithubURI2 = "github://"
|
||||
)
|
||||
|
||||
func LooksLikeURL(s string) bool {
|
||||
return strings.HasPrefix(s, HTTPPrefix) ||
|
||||
strings.HasPrefix(s, HTTPSPrefix) ||
|
||||
@@ -229,10 +229,10 @@ func DownloadFile(url string, filePath, sha string, downloadStatus func(string,
|
||||
}
|
||||
|
||||
log.Info().Msgf("File %q downloaded and verified", filePath)
|
||||
if IsArchive(filePath) {
|
||||
if utils.IsArchive(filePath) {
|
||||
basePath := filepath.Dir(filePath)
|
||||
log.Info().Msgf("File %q is an archive, uncompressing to %s", filePath, basePath)
|
||||
if err := ExtractArchive(filePath, basePath); err != nil {
|
||||
if err := utils.ExtractArchive(filePath, basePath); err != nil {
|
||||
log.Debug().Msgf("Failed decompressing %q: %s", filePath, err.Error())
|
||||
return err
|
||||
}
|
||||
@@ -241,32 +241,35 @@ func DownloadFile(url string, filePath, sha string, downloadStatus func(string,
|
||||
return nil
|
||||
}
|
||||
|
||||
type progressWriter struct {
|
||||
fileName string
|
||||
total int64
|
||||
written int64
|
||||
downloadStatus func(string, string, string, float64)
|
||||
hash hash.Hash
|
||||
}
|
||||
// this function check if the string is an URL, if it's an URL downloads the image in memory
|
||||
// encodes it in base64 and returns the base64 string
|
||||
func GetBase64Image(s string) (string, error) {
|
||||
if strings.HasPrefix(s, "http") {
|
||||
// download the image
|
||||
resp, err := http.Get(s)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
func (pw *progressWriter) Write(p []byte) (n int, err error) {
|
||||
n, err = pw.hash.Write(p)
|
||||
pw.written += int64(n)
|
||||
// read the image data into memory
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if pw.total > 0 {
|
||||
percentage := float64(pw.written) / float64(pw.total) * 100
|
||||
//log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
|
||||
pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
|
||||
} else {
|
||||
pw.downloadStatus(pw.fileName, formatBytes(pw.written), "", 0)
|
||||
// encode the image data in base64
|
||||
encoded := base64.StdEncoding.EncodeToString(data)
|
||||
|
||||
// return the base64 string
|
||||
return encoded, nil
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// MD5 of a string
|
||||
func MD5(s string) string {
|
||||
return fmt.Sprintf("%x", md5.Sum([]byte(s)))
|
||||
// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
|
||||
if strings.HasPrefix(s, "data:image/jpeg;base64,") {
|
||||
return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
|
||||
}
|
||||
return "", fmt.Errorf("not valid string")
|
||||
}
|
||||
|
||||
func formatBytes(bytes int64) string {
|
||||
@@ -1,7 +1,7 @@
|
||||
package utils_test
|
||||
package downloader_test
|
||||
|
||||
import (
|
||||
. "github.com/go-skynet/LocalAI/pkg/utils"
|
||||
. "github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
"github.com/imdario/mergo"
|
||||
"github.com/rs/zerolog/log"
|
||||
"gopkg.in/yaml.v2"
|
||||
@@ -140,7 +140,7 @@ func AvailableGalleryModels(galleries []Gallery, basePath string) ([]*GalleryMod
|
||||
|
||||
func findGalleryURLFromReferenceURL(url string) (string, error) {
|
||||
var refFile string
|
||||
err := utils.GetURI(url, func(url string, d []byte) error {
|
||||
err := downloader.GetURI(url, func(url string, d []byte) error {
|
||||
refFile = string(d)
|
||||
if len(refFile) == 0 {
|
||||
return fmt.Errorf("invalid reference file at url %s: %s", url, d)
|
||||
@@ -163,7 +163,7 @@ func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error)
|
||||
}
|
||||
}
|
||||
|
||||
err := utils.GetURI(gallery.URL, func(url string, d []byte) error {
|
||||
err := downloader.GetURI(gallery.URL, func(url string, d []byte) error {
|
||||
return yaml.Unmarshal(d, &models)
|
||||
})
|
||||
if err != nil {
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
package gallery
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/imdario/mergo"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -66,7 +63,7 @@ type PromptTemplate struct {
|
||||
|
||||
func GetGalleryConfigFromURL(url string) (Config, error) {
|
||||
var config Config
|
||||
err := utils.GetURI(url, func(url string, d []byte) error {
|
||||
err := downloader.GetURI(url, func(url string, d []byte) error {
|
||||
return yaml.Unmarshal(d, &config)
|
||||
})
|
||||
if err != nil {
|
||||
@@ -114,7 +111,7 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
|
||||
// Create file path
|
||||
filePath := filepath.Join(basePath, file.Filename)
|
||||
|
||||
if err := utils.DownloadFile(file.URI, filePath, file.SHA256, downloadStatus); err != nil {
|
||||
if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, downloadStatus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -183,54 +180,3 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type progressWriter struct {
|
||||
fileName string
|
||||
total int64
|
||||
written int64
|
||||
downloadStatus func(string, string, string, float64)
|
||||
hash hash.Hash
|
||||
}
|
||||
|
||||
func (pw *progressWriter) Write(p []byte) (n int, err error) {
|
||||
n, err = pw.hash.Write(p)
|
||||
pw.written += int64(n)
|
||||
|
||||
if pw.total > 0 {
|
||||
percentage := float64(pw.written) / float64(pw.total) * 100
|
||||
//log.Debug().Msgf("Downloading %s: %s/%s (%.2f%%)", pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
|
||||
pw.downloadStatus(pw.fileName, formatBytes(pw.written), formatBytes(pw.total), percentage)
|
||||
} else {
|
||||
pw.downloadStatus(pw.fileName, formatBytes(pw.written), "", 0)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func formatBytes(bytes int64) string {
|
||||
const unit = 1024
|
||||
if bytes < unit {
|
||||
return strconv.FormatInt(bytes, 10) + " B"
|
||||
}
|
||||
div, exp := int64(unit), 0
|
||||
for n := bytes / unit; n >= unit; n /= unit {
|
||||
div *= unit
|
||||
exp++
|
||||
}
|
||||
return fmt.Sprintf("%.1f %ciB", float64(bytes)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
|
||||
func calculateSHA(filePath string) (string, error) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
hash := sha256.New()
|
||||
if _, err := io.Copy(hash, file); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%x", hash.Sum(nil)), nil
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ func (c *Client) setBusy(v bool) {
|
||||
c.Unlock()
|
||||
}
|
||||
|
||||
func (c *Client) HealthCheck(ctx context.Context) bool {
|
||||
func (c *Client) HealthCheck(ctx context.Context) (bool, error) {
|
||||
if !c.parallel {
|
||||
c.opMutex.Lock()
|
||||
defer c.opMutex.Unlock()
|
||||
@@ -59,8 +59,7 @@ func (c *Client) HealthCheck(ctx context.Context) bool {
|
||||
defer c.setBusy(false)
|
||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return false
|
||||
return false, err
|
||||
}
|
||||
defer conn.Close()
|
||||
client := pb.NewBackendClient(conn)
|
||||
@@ -71,15 +70,14 @@ func (c *Client) HealthCheck(ctx context.Context) bool {
|
||||
|
||||
res, err := client.Health(ctx, &pb.HealthMessage{})
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
|
||||
return false
|
||||
return false, err
|
||||
}
|
||||
|
||||
if string(res.Message) == "OK" {
|
||||
return true
|
||||
return true, nil
|
||||
}
|
||||
return false
|
||||
|
||||
return false, fmt.Errorf("health check failed: %s", res.Message)
|
||||
}
|
||||
|
||||
func (c *Client) Embeddings(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.EmbeddingResult, error) {
|
||||
|
||||
@@ -131,11 +131,15 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
||||
// Wait for the service to start up
|
||||
ready := false
|
||||
for i := 0; i < o.grpcAttempts; i++ {
|
||||
if client.GRPC(o.parallelRequests, ml.wd).HealthCheck(context.Background()) {
|
||||
alive, err := client.GRPC(o.parallelRequests, ml.wd).HealthCheck(context.Background())
|
||||
if alive {
|
||||
log.Debug().Msgf("GRPC Service Ready")
|
||||
ready = true
|
||||
break
|
||||
}
|
||||
if err != nil && i == o.grpcAttempts-1 {
|
||||
log.Error().Msgf("Failed starting/connecting to the gRPC service: %s", err.Error())
|
||||
}
|
||||
time.Sleep(time.Duration(o.grpcAttemptsDelay) * time.Second)
|
||||
}
|
||||
|
||||
@@ -176,7 +180,11 @@ func (ml *ModelLoader) resolveAddress(addr ModelAddress, parallel bool) (*grpc.C
|
||||
func (ml *ModelLoader) BackendLoader(opts ...Option) (client *grpc.Client, err error) {
|
||||
o := NewOptions(opts...)
|
||||
|
||||
log.Info().Msgf("Loading model '%s' with backend %s", o.model, o.backendString)
|
||||
if o.model != "" {
|
||||
log.Info().Msgf("Loading model '%s' with backend %s", o.model, o.backendString)
|
||||
} else {
|
||||
log.Info().Msgf("Loading model with backend %s", o.backendString)
|
||||
}
|
||||
|
||||
backend := strings.ToLower(o.backendString)
|
||||
if realBackend, exists := Aliases[backend]; exists {
|
||||
@@ -239,7 +247,10 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (*grpc.Client, error) {
|
||||
for _, b := range o.externalBackends {
|
||||
allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
|
||||
}
|
||||
log.Info().Msgf("Loading model '%s' greedly from all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
|
||||
|
||||
if o.model != "" {
|
||||
log.Info().Msgf("Trying to load the model '%s' with all the available backends: %s", o.model, strings.Join(allBackendsToAutoLoad, ", "))
|
||||
}
|
||||
|
||||
for _, b := range allBackendsToAutoLoad {
|
||||
log.Info().Msgf("[%s] Attempting to load", b)
|
||||
|
||||
@@ -171,9 +171,10 @@ func (ml *ModelLoader) CheckIsLoaded(s string) ModelAddress {
|
||||
} else {
|
||||
client = m.GRPC(false, ml.wd)
|
||||
}
|
||||
|
||||
if !client.HealthCheck(context.Background()) {
|
||||
log.Debug().Msgf("GRPC Model not responding: %s", s)
|
||||
alive, err := client.HealthCheck(context.Background())
|
||||
if !alive {
|
||||
log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
|
||||
log.Warn().Msgf("Deleting the process in order to recreate it")
|
||||
if !ml.grpcProcesses[s].IsAlive() {
|
||||
log.Debug().Msgf("GRPC Process is not responding: %s", s)
|
||||
// stop and delete the process, this forces to re-load the model and re-create again the service
|
||||
|
||||
54
pkg/startup/model_preload.go
Normal file
54
pkg/startup/model_preload.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package startup
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/go-skynet/LocalAI/embedded"
|
||||
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// PreloadModelsConfigurations will preload models from the given list of URLs
|
||||
// It will download the model if it is not already present in the model path
|
||||
// It will also try to resolve if the model is an embedded model YAML configuration
|
||||
func PreloadModelsConfigurations(modelPath string, models ...string) {
|
||||
for _, url := range models {
|
||||
url = embedded.ModelShortURL(url)
|
||||
|
||||
switch {
|
||||
case embedded.ExistsInModelsLibrary(url):
|
||||
modelYAML, err := embedded.ResolveContent(url)
|
||||
// If we resolve something, just save it to disk and continue
|
||||
if err != nil {
|
||||
log.Error().Msgf("error loading model: %s", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debug().Msgf("[startup] resolved embedded model: %s", url)
|
||||
md5Name := utils.MD5(url)
|
||||
if err := os.WriteFile(filepath.Join(modelPath, md5Name)+".yaml", modelYAML, os.ModePerm); err != nil {
|
||||
log.Error().Msgf("error loading model: %s", err.Error())
|
||||
}
|
||||
case downloader.LooksLikeURL(url):
|
||||
log.Debug().Msgf("[startup] resolved model to download: %s", url)
|
||||
|
||||
// md5 of model name
|
||||
md5Name := utils.MD5(url)
|
||||
|
||||
// check if file exists
|
||||
if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
|
||||
err := downloader.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) {
|
||||
utils.DisplayDownloadFunction(fileName, current, total, percent)
|
||||
})
|
||||
if err != nil {
|
||||
log.Error().Msgf("error loading model: %s", err.Error())
|
||||
}
|
||||
}
|
||||
default:
|
||||
log.Warn().Msgf("[startup] failed resolving model '%s'", url)
|
||||
}
|
||||
}
|
||||
}
|
||||
66
pkg/startup/model_preload_test.go
Normal file
66
pkg/startup/model_preload_test.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package startup_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
. "github.com/go-skynet/LocalAI/pkg/startup"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("Preload test", func() {
|
||||
|
||||
Context("Preloading from strings", func() {
|
||||
It("loads from embedded full-urls", func() {
|
||||
tmpdir, err := os.MkdirTemp("", "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
url := "https://raw.githubusercontent.com/mudler/LocalAI/master/examples/configurations/phi-2.yaml"
|
||||
fileName := fmt.Sprintf("%s.yaml", utils.MD5(url))
|
||||
|
||||
PreloadModelsConfigurations(tmpdir, url)
|
||||
|
||||
resultFile := filepath.Join(tmpdir, fileName)
|
||||
|
||||
content, err := os.ReadFile(resultFile)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
Expect(string(content)).To(ContainSubstring("name: phi-2"))
|
||||
})
|
||||
It("loads from embedded short-urls", func() {
|
||||
tmpdir, err := os.MkdirTemp("", "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
url := "phi-2"
|
||||
|
||||
PreloadModelsConfigurations(tmpdir, url)
|
||||
|
||||
entry, err := os.ReadDir(tmpdir)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(entry).To(HaveLen(1))
|
||||
resultFile := entry[0].Name()
|
||||
|
||||
content, err := os.ReadFile(filepath.Join(tmpdir, resultFile))
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
Expect(string(content)).To(ContainSubstring("name: phi-2"))
|
||||
})
|
||||
It("loads from embedded models", func() {
|
||||
tmpdir, err := os.MkdirTemp("", "")
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
url := "mistral-openorca"
|
||||
fileName := fmt.Sprintf("%s.yaml", utils.MD5(url))
|
||||
|
||||
PreloadModelsConfigurations(tmpdir, url)
|
||||
|
||||
resultFile := filepath.Join(tmpdir, fileName)
|
||||
|
||||
content, err := os.ReadFile(resultFile)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
Expect(string(content)).To(ContainSubstring("name: mistral-openorca"))
|
||||
})
|
||||
})
|
||||
})
|
||||
13
pkg/startup/startup_suite_test.go
Normal file
13
pkg/startup/startup_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package startup_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestStartup(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "LocalAI startup test")
|
||||
}
|
||||
10
pkg/utils/hash.go
Normal file
10
pkg/utils/hash.go
Normal file
@@ -0,0 +1,10 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
func MD5(s string) string {
|
||||
return fmt.Sprintf("%x", md5.Sum([]byte(s)))
|
||||
}
|
||||
Reference in New Issue
Block a user