mirror of
https://github.com/mudler/LocalAI.git
synced 2026-02-03 11:13:31 -05:00
Compare commits
214 Commits
functions_
...
v2.17.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8142bdc48f | ||
|
|
89a11e15e7 | ||
|
|
06de542032 | ||
|
|
ecbb61cbf4 | ||
|
|
7f13e3a783 | ||
|
|
c926469b9c | ||
|
|
c30b57a629 | ||
|
|
2f297979a7 | ||
|
|
2437a2769d | ||
|
|
b58b7cad94 | ||
|
|
68148f2a1a | ||
|
|
4897eb0ba2 | ||
|
|
1b43966c48 | ||
|
|
c5f2f11503 | ||
|
|
895443d1b5 | ||
|
|
6a0802e8e6 | ||
|
|
94cfaad7f4 | ||
|
|
ac4a94dd44 | ||
|
|
58bf8614d9 | ||
|
|
3764e50b35 | ||
|
|
3f464d2d9e | ||
|
|
5116d561e1 | ||
|
|
96a7a3b59f | ||
|
|
112d0ffa45 | ||
|
|
25f45827ab | ||
|
|
f322f7c62d | ||
|
|
06351cbbb4 | ||
|
|
8f952d90b0 | ||
|
|
7b205510f9 | ||
|
|
f183fec232 | ||
|
|
91f48b2143 | ||
|
|
f404580256 | ||
|
|
882556d4db | ||
|
|
f8382adbf7 | ||
|
|
80298f94fa | ||
|
|
0f8b489346 | ||
|
|
154694462e | ||
|
|
347317d5d2 | ||
|
|
d40722d2fa | ||
|
|
7b12300f15 | ||
|
|
3c50abffdd | ||
|
|
2eb2ed84ab | ||
|
|
5da10fb769 | ||
|
|
bec883e3ff | ||
|
|
14b41be057 | ||
|
|
aff2acacf9 | ||
|
|
b4d4c0a18f | ||
|
|
3a5f2283ea | ||
|
|
d9109ffafb | ||
|
|
d7e137295a | ||
|
|
6c087ae743 | ||
|
|
88af1033d6 | ||
|
|
e96d2d7667 | ||
|
|
aae7ad9d73 | ||
|
|
23b3d22525 | ||
|
|
603d81dda1 | ||
|
|
a21a52d384 | ||
|
|
219078a5e0 | ||
|
|
3b7a78adda | ||
|
|
0d62594099 | ||
|
|
d38e9090df | ||
|
|
b049805c9b | ||
|
|
0f9b58f2cf | ||
|
|
0f134d557e | ||
|
|
2676e127ae | ||
|
|
270d4f8413 | ||
|
|
2d79cee8cb | ||
|
|
4c9623f50d | ||
|
|
596cf76135 | ||
|
|
a293aa1b79 | ||
|
|
c4eb02c80f | ||
|
|
9c9198ff08 | ||
|
|
83c79d5453 | ||
|
|
88fd000065 | ||
|
|
956d652314 | ||
|
|
9ce2b4d71f | ||
|
|
4e974cb4fc | ||
|
|
d072835796 | ||
|
|
17cf6c4a4d | ||
|
|
fab3e711ff | ||
|
|
4e1463fec2 | ||
|
|
2fc6fe806b | ||
|
|
bdd6769b2d | ||
|
|
1ffee9989f | ||
|
|
34ab442ce9 | ||
|
|
67aa31faad | ||
|
|
6ef78ef7f6 | ||
|
|
daa7544d9c | ||
|
|
34527737bb | ||
|
|
148adebe16 | ||
|
|
bae2a649fd | ||
|
|
90945ebab3 | ||
|
|
4a239a4bff | ||
|
|
5ddaa19914 | ||
|
|
77d752a481 | ||
|
|
29ff51c12a | ||
|
|
c0744899c9 | ||
|
|
c9092ad39c | ||
|
|
b588cae70e | ||
|
|
fb0f188c93 | ||
|
|
b99182c8d4 | ||
|
|
95c65d67f5 | ||
|
|
c603b95ac7 | ||
|
|
13cfa6de0a | ||
|
|
0560c6fd57 | ||
|
|
f24dddae42 | ||
|
|
06b461b061 | ||
|
|
e50a7ba879 | ||
|
|
3b2bce1fc9 | ||
|
|
3fe7e9f678 | ||
|
|
654b661688 | ||
|
|
7f387fb238 | ||
|
|
5d31e5269d | ||
|
|
ff8a6962cd | ||
|
|
10c64dbb55 | ||
|
|
3f7212c660 | ||
|
|
5dc6bace49 | ||
|
|
3cd5918ae6 | ||
|
|
5b75bf16c7 | ||
|
|
0c40f545d4 | ||
|
|
b2fc92daa7 | ||
|
|
0787797961 | ||
|
|
2ba9e27bcf | ||
|
|
4d98dd9ce7 | ||
|
|
087bceccac | ||
|
|
7064697ce5 | ||
|
|
0b99be73b3 | ||
|
|
669cd06dd9 | ||
|
|
2bbc52fcc8 | ||
|
|
577888f3c0 | ||
|
|
1c80f628ff | ||
|
|
10430a00bd | ||
|
|
9f5c274321 | ||
|
|
d075dc44dd | ||
|
|
be8ffbdfcf | ||
|
|
eaf653f3d3 | ||
|
|
e9c28a1ed7 | ||
|
|
ba984c7097 | ||
|
|
ff1f9125ed | ||
|
|
2c82058548 | ||
|
|
16433d2e8e | ||
|
|
345047ed7c | ||
|
|
6343758f9c | ||
|
|
135208806c | ||
|
|
3280de7adf | ||
|
|
db3113c5c8 | ||
|
|
593fb62bf0 | ||
|
|
480834f75b | ||
|
|
3200a6655e | ||
|
|
b90cdced59 | ||
|
|
fc3502b56f | ||
|
|
785adc1ed5 | ||
|
|
e25fc656c9 | ||
|
|
bb3ec56de3 | ||
|
|
785c54e7b0 | ||
|
|
003b43f6fc | ||
|
|
663488b6bd | ||
|
|
e1d6b706f4 | ||
|
|
29615576fb | ||
|
|
f8cea16c03 | ||
|
|
e0187c2a1a | ||
|
|
b76d2fe68a | ||
|
|
ee4f722bf8 | ||
|
|
dce63237f2 | ||
|
|
0b637465d9 | ||
|
|
114f549f5e | ||
|
|
ea330d452d | ||
|
|
eb11a46a73 | ||
|
|
b57e14d65c | ||
|
|
7efa8e75d4 | ||
|
|
7551369abe | ||
|
|
79915bcd11 | ||
|
|
c8d7d14a37 | ||
|
|
c56bc0de98 | ||
|
|
3a9408363b | ||
|
|
21a12c2cdd | ||
|
|
371d0cc1f7 | ||
|
|
23fa92bec0 | ||
|
|
f91e4e5c03 | ||
|
|
6cbe6a4f99 | ||
|
|
491e1d752b | ||
|
|
1542c58466 | ||
|
|
1a3dedece0 | ||
|
|
a58ff00ab1 | ||
|
|
fdb45153fe | ||
|
|
16474bfb40 | ||
|
|
5a6d120a56 | ||
|
|
7a480bb16f | ||
|
|
053531e434 | ||
|
|
b7ab4f25d9 | ||
|
|
73566a2bb2 | ||
|
|
8ccd5ab040 | ||
|
|
5a3db730b9 | ||
|
|
8ad669339e | ||
|
|
a10a952085 | ||
|
|
b37447cac5 | ||
|
|
f2d182a2eb | ||
|
|
6b6c8cdd5f | ||
|
|
5f35e85e86 | ||
|
|
02f1b477df | ||
|
|
9ab8f8f5e0 | ||
|
|
9a255d6453 | ||
|
|
e0ef9e2bb9 | ||
|
|
86627b27f7 | ||
|
|
4e92569d45 | ||
|
|
f7508e3888 | ||
|
|
badfc16df1 | ||
|
|
b584dcf18a | ||
|
|
4c845fb47d | ||
|
|
07c0559d06 | ||
|
|
beb598e4f9 | ||
|
|
c89271b2e4 | ||
|
|
29909666c3 | ||
|
|
566b5cf2ee |
5
.env
5
.env
@@ -71,6 +71,11 @@
|
||||
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
|
||||
# LLAMACPP_PARALLEL=1
|
||||
|
||||
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
|
||||
# https://github.com/ggerganov/llama.cpp/pull/6829
|
||||
# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
|
||||
# LLAMACPP_GRPC_SERVERS=""
|
||||
|
||||
### Enable to run parallel requests
|
||||
# LOCALAI_PARALLEL_REQUESTS=true
|
||||
|
||||
|
||||
33
.github/checksum_checker.sh
vendored
33
.github/checksum_checker.sh
vendored
@@ -16,7 +16,7 @@ function check_and_update_checksum() {
|
||||
# Download the file and calculate new checksum using Python
|
||||
new_checksum=$(python3 -c "
|
||||
import hashlib
|
||||
from huggingface_hub import hf_hub_download
|
||||
from huggingface_hub import hf_hub_download, get_paths_info
|
||||
import requests
|
||||
import sys
|
||||
import os
|
||||
@@ -46,13 +46,24 @@ def calculate_sha256(file_path):
|
||||
|
||||
download_type, repo_id_or_url = parse_uri(uri)
|
||||
|
||||
new_checksum = None
|
||||
|
||||
# Decide download method based on URI type
|
||||
if download_type == 'huggingface':
|
||||
try:
|
||||
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
|
||||
except Exception as e:
|
||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
# Use HF API to pull sha
|
||||
for file in get_paths_info(repo_id_or_url, [file_name], repo_type='model'):
|
||||
try:
|
||||
new_checksum = file.lfs.sha256
|
||||
break
|
||||
except Exception as e:
|
||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
if new_checksum is None:
|
||||
try:
|
||||
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name)
|
||||
except Exception as e:
|
||||
print(f'Error from Hugging Face Hub: {str(e)}', file=sys.stderr)
|
||||
sys.exit(2)
|
||||
else:
|
||||
response = requests.get(repo_id_or_url)
|
||||
if response.status_code == 200:
|
||||
@@ -66,9 +77,13 @@ else:
|
||||
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(calculate_sha256(file_path))
|
||||
# Clean up the downloaded file
|
||||
os.remove(file_path)
|
||||
if new_checksum is None:
|
||||
new_checksum = calculate_sha256(file_path)
|
||||
print(new_checksum)
|
||||
os.remove(file_path)
|
||||
else:
|
||||
print(new_checksum)
|
||||
|
||||
")
|
||||
|
||||
if [[ "$new_checksum" == "" ]]; then
|
||||
|
||||
297
.github/ci/modelslist.go
vendored
Normal file
297
.github/ci/modelslist.go
vendored
Normal file
@@ -0,0 +1,297 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
var modelPageTemplate string = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>LocalAI models</title>
|
||||
<link href="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.css" rel="stylesheet" />
|
||||
<script src="https://cdn.jsdelivr.net/npm/vanilla-lazyload@19.1.3/dist/lazyload.min.js"></script>
|
||||
|
||||
<link
|
||||
rel="stylesheet"
|
||||
href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/styles/default.min.css"
|
||||
/>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/highlight.min.js"
|
||||
></script>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
|
||||
></script>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"
|
||||
></script>
|
||||
<script
|
||||
defer
|
||||
src="https://cdn.jsdelivr.net/npm/dompurify@3.0.6/dist/purify.min.js"
|
||||
></script>
|
||||
|
||||
<link href="/static/general.css" rel="stylesheet" />
|
||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Roboto:wght@400;500&display=swap" rel="stylesheet">
|
||||
<link
|
||||
href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700,900&display=swap"
|
||||
rel="stylesheet" />
|
||||
<link
|
||||
rel="stylesheet"
|
||||
href="https://cdn.jsdelivr.net/npm/tw-elements/css/tw-elements.min.css" />
|
||||
<script src="https://cdn.tailwindcss.com/3.3.0"></script>
|
||||
<script>
|
||||
tailwind.config = {
|
||||
darkMode: "class",
|
||||
theme: {
|
||||
fontFamily: {
|
||||
sans: ["Roboto", "sans-serif"],
|
||||
body: ["Roboto", "sans-serif"],
|
||||
mono: ["ui-monospace", "monospace"],
|
||||
},
|
||||
},
|
||||
corePlugins: {
|
||||
preflight: false,
|
||||
},
|
||||
};
|
||||
</script>
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.1.1/css/all.min.css">
|
||||
<script src="https://unpkg.com/htmx.org@1.9.12" integrity="sha384-ujb1lZYygJmzgSwoxRggbCHcjc0rB2XoQrxeTUQyRjrOnlCoYta87iKBWq3EsdM2" crossorigin="anonymous"></script>
|
||||
</head>
|
||||
|
||||
<body class="bg-gray-900 text-gray-200">
|
||||
<div class="flex flex-col min-h-screen">
|
||||
|
||||
<nav class="bg-gray-800 shadow-lg">
|
||||
<div class="container mx-auto px-4 py-4">
|
||||
<div class="flex items-center justify-between">
|
||||
<div class="flex items-center">
|
||||
<a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
|
||||
<a href="/" class="text-white text-xl font-bold">LocalAI</a>
|
||||
</div>
|
||||
<!-- Menu button for small screens -->
|
||||
<div class="lg:hidden">
|
||||
<button id="menu-toggle" class="text-gray-400 hover:text-white focus:outline-none">
|
||||
<i class="fas fa-bars fa-lg"></i>
|
||||
</button>
|
||||
</div>
|
||||
<!-- Navigation links -->
|
||||
<div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
|
||||
<a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
|
||||
</div>
|
||||
</div>
|
||||
<!-- Collapsible menu for small screens -->
|
||||
<div class="hidden lg:hidden" id="mobile-menu">
|
||||
<div class="pt-4 pb-3 border-t border-gray-700">
|
||||
|
||||
<a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<style>
|
||||
.is-hidden {
|
||||
display: none;
|
||||
}
|
||||
</style>
|
||||
|
||||
<div class="container mx-auto px-4 flex-grow">
|
||||
|
||||
<div class="models mt-12">
|
||||
<h2 class="text-center text-3xl font-semibold text-gray-100">
|
||||
LocalAI model gallery list </h2><br>
|
||||
|
||||
<h2 class="text-center text-3xl font-semibold text-gray-100">
|
||||
|
||||
🖼️ Available {{.AvailableModels}} models</i> <a href="https://localai.io/models/" target="_blank" >
|
||||
<i class="fas fa-circle-info pr-2"></i>
|
||||
</a></h2>
|
||||
|
||||
<h3>
|
||||
Refer to the Model gallery <a href="https://localai.io/models/" target="_blank" ><i class="fas fa-circle-info pr-2"></i></a> for more information on how to use the models with LocalAI.<br>
|
||||
|
||||
You can install models with the CLI command <code>local-ai models install <model-name></code>. or by using the WebUI.
|
||||
</h3>
|
||||
|
||||
<input class="form-control appearance-none block w-full mt-5 px-3 py-2 text-base font-normal text-gray-300 pb-2 mb-5 bg-gray-800 bg-clip-padding border border-solid border-gray-600 rounded transition ease-in-out m-0 focus:text-gray-300 focus:bg-gray-900 focus:border-blue-500 focus:outline-none" type="search"
|
||||
id="searchbox" placeholder="Live search keyword..">
|
||||
<div class="dark grid grid-cols-1 grid-rows-1 md:grid-cols-3 block rounded-lg shadow-secondary-1 dark:bg-surface-dark">
|
||||
{{ range $_, $model := .Models }}
|
||||
<div class="box me-4 mb-2 block rounded-lg bg-white shadow-secondary-1 dark:bg-gray-800 dark:bg-surface-dark dark:text-white text-surface pb-2">
|
||||
<div>
|
||||
{{ $icon := "https://upload.wikimedia.org/wikipedia/commons/6/65/No-Image-Placeholder.svg" }}
|
||||
{{ if $model.Icon }}
|
||||
{{ $icon = $model.Icon }}
|
||||
{{ end }}
|
||||
<div class="flex justify-center items-center">
|
||||
<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="rounded-t-lg max-h-48 max-w-96 object-cover mt-3 lazy">
|
||||
</div>
|
||||
<div class="p-6 text-surface dark:text-white">
|
||||
<h5 class="mb-2 text-xl font-medium leading-tight">{{$model.Name}}</h5>
|
||||
|
||||
|
||||
<p class="mb-4 text-base truncate">{{ $model.Description }}</p>
|
||||
|
||||
</div>
|
||||
<div class="px-6 pt-4 pb-2">
|
||||
|
||||
<!-- Modal toggle -->
|
||||
<button data-modal-target="{{ $model.Name}}-modal" data-modal-toggle="{{ $model.Name }}-modal" class="block text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800" type="button">
|
||||
More info
|
||||
</button>
|
||||
|
||||
<!-- Main modal -->
|
||||
<div id="{{ $model.Name}}-modal" tabindex="-1" aria-hidden="true" class="hidden overflow-y-auto overflow-x-hidden fixed top-0 right-0 left-0 z-50 justify-center items-center w-full md:inset-0 h-[calc(100%-1rem)] max-h-full">
|
||||
<div class="relative p-4 w-full max-w-2xl max-h-full">
|
||||
<!-- Modal content -->
|
||||
<div class="relative bg-white rounded-lg shadow dark:bg-gray-700">
|
||||
<!-- Modal header -->
|
||||
<div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
|
||||
<h3 class="text-xl font-semibold text-gray-900 dark:text-white">
|
||||
{{ $model.Name}}
|
||||
</h3>
|
||||
<button type="button" class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white" data-modal-hide="{{$model.Name}}-modal">
|
||||
<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 14 14">
|
||||
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6"/>
|
||||
</svg>
|
||||
<span class="sr-only">Close modal</span>
|
||||
</button>
|
||||
</div>
|
||||
<!-- Modal body -->
|
||||
<div class="p-4 md:p-5 space-y-4">
|
||||
<div class="flex justify-center items-center">
|
||||
<img data-src="{{ $icon }}" alt="{{$model.Name}}" class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3">
|
||||
</div>
|
||||
|
||||
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
|
||||
{{ $model.Description }}
|
||||
|
||||
</p>
|
||||
|
||||
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
|
||||
To install the model with the CLI, run: <br>
|
||||
<code> local-ai models install {{$model.Name}} </code> <br>
|
||||
|
||||
<hr>
|
||||
See also <a href="https://localai.io/models/" target="_blank" >
|
||||
Installation <i class="fas fa-circle-info pr-2"></i>
|
||||
</a> to see how to install models with the REST API.
|
||||
</p>
|
||||
|
||||
<p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">
|
||||
<ul>
|
||||
{{ range $_, $u := $model.URLs }}
|
||||
<li><a href="{{ $u }}" target=_blank><i class="fa-solid fa-link"></i> {{ $u }}</a></li>
|
||||
{{ end }}
|
||||
</ul>
|
||||
</p>
|
||||
</div>
|
||||
<!-- Modal footer -->
|
||||
<div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
|
||||
<button data-modal-hide="{{ $model.Name}}-modal" type="button" class="py-2.5 px-5 ms-3 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-100 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
var lazyLoadInstance = new LazyLoad({
|
||||
// Your custom settings go here
|
||||
});
|
||||
|
||||
let cards = document.querySelectorAll('.box')
|
||||
|
||||
function liveSearch() {
|
||||
let search_query = document.getElementById("searchbox").value;
|
||||
|
||||
//Use innerText if all contents are visible
|
||||
//Use textContent for including hidden elements
|
||||
for (var i = 0; i < cards.length; i++) {
|
||||
if(cards[i].textContent.toLowerCase()
|
||||
.includes(search_query.toLowerCase())) {
|
||||
cards[i].classList.remove("is-hidden");
|
||||
} else {
|
||||
cards[i].classList.add("is-hidden");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//A little delay
|
||||
let typingTimer;
|
||||
let typeInterval = 500;
|
||||
let searchInput = document.getElementById('searchbox');
|
||||
|
||||
searchInput.addEventListener('keyup', () => {
|
||||
clearTimeout(typingTimer);
|
||||
typingTimer = setTimeout(liveSearch, typeInterval);
|
||||
});
|
||||
</script>
|
||||
|
||||
</div>
|
||||
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/flowbite/2.3.0/flowbite.min.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
type GalleryModel struct {
|
||||
Name string `json:"name" yaml:"name"`
|
||||
URLs []string `json:"urls" yaml:"urls"`
|
||||
Icon string `json:"icon" yaml:"icon"`
|
||||
Description string `json:"description" yaml:"description"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
// read the YAML file which contains the models
|
||||
|
||||
f, err := ioutil.ReadFile(os.Args[1])
|
||||
if err != nil {
|
||||
fmt.Println("Error reading file:", err)
|
||||
return
|
||||
}
|
||||
|
||||
models := []*GalleryModel{}
|
||||
err = yaml.Unmarshal(f, &models)
|
||||
if err != nil {
|
||||
// write to stderr
|
||||
os.Stderr.WriteString("Error unmarshaling YAML: " + err.Error() + "\n")
|
||||
return
|
||||
}
|
||||
|
||||
// render the template
|
||||
data := struct {
|
||||
Models []*GalleryModel
|
||||
AvailableModels int
|
||||
}{
|
||||
Models: models,
|
||||
AvailableModels: len(models),
|
||||
}
|
||||
tmpl := template.Must(template.New("modelPage").Parse(modelPageTemplate))
|
||||
|
||||
err = tmpl.Execute(os.Stdout, data)
|
||||
if err != nil {
|
||||
fmt.Println("Error executing template:", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
4
.github/workflows/generate_grpc_cache.yaml
vendored
4
.github/workflows/generate_grpc_cache.yaml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
include:
|
||||
- grpc-base-image: ubuntu:22.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64'
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
steps:
|
||||
- name: Release space from worker
|
||||
@@ -84,7 +84,7 @@ jobs:
|
||||
build-args: |
|
||||
GRPC_BASE_IMAGE=${{ matrix.grpc-base-image }}
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.63.0
|
||||
GRPC_VERSION=v1.64.0
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-to: type=gha,ignore-error=true
|
||||
|
||||
59
.github/workflows/generate_intel_image.yaml
vendored
Normal file
59
.github/workflows/generate_intel_image.yaml
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
name: 'generate and publish intel docker caches'
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
concurrency:
|
||||
group: intel-cache-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate_caches:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- base-image: intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04
|
||||
runs-on: 'ubuntu-latest'
|
||||
platforms: 'linux/amd64'
|
||||
runs-on: ${{matrix.runs-on}}
|
||||
steps:
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@master
|
||||
with:
|
||||
platforms: all
|
||||
- name: Login to DockerHub
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Login to quay
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: quay.io
|
||||
username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
|
||||
password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@master
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cache Intel images
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
BASE_IMAGE=${{ matrix.base-image }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
tags: quay.io/go-skynet/intel-oneapi-base:latest
|
||||
push: true
|
||||
target: intel
|
||||
platforms: ${{ matrix.platforms }}
|
||||
4
.github/workflows/image-pr.yml
vendored
4
.github/workflows/image-pr.yml
vendored
@@ -68,7 +68,7 @@ jobs:
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: 'sycl-f16-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
@@ -110,7 +110,7 @@ jobs:
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: 'sycl-f16-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
|
||||
14
.github/workflows/image.yml
vendored
14
.github/workflows/image.yml
vendored
@@ -148,7 +148,7 @@ jobs:
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
@@ -161,7 +161,7 @@ jobs:
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'auto'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-ffmpeg'
|
||||
ffmpeg: 'true'
|
||||
@@ -175,7 +175,7 @@ jobs:
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-core'
|
||||
ffmpeg: 'false'
|
||||
@@ -185,7 +185,7 @@ jobs:
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-core'
|
||||
ffmpeg: 'false'
|
||||
@@ -195,7 +195,7 @@ jobs:
|
||||
- build-type: 'sycl_f16'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f16-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
@@ -205,7 +205,7 @@ jobs:
|
||||
- build-type: 'sycl_f32'
|
||||
platforms: 'linux/amd64'
|
||||
tag-latest: 'false'
|
||||
base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
|
||||
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
|
||||
grpc-base-image: "ubuntu:22.04"
|
||||
tag-suffix: '-sycl-f32-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
@@ -260,7 +260,7 @@ jobs:
|
||||
matrix:
|
||||
include:
|
||||
- build-type: ''
|
||||
platforms: 'linux/amd64'
|
||||
platforms: 'linux/amd64,linux/arm64'
|
||||
tag-latest: 'auto'
|
||||
tag-suffix: '-ffmpeg-core'
|
||||
ffmpeg: 'true'
|
||||
|
||||
54
.github/workflows/image_build.yml
vendored
54
.github/workflows/image_build.yml
vendored
@@ -136,6 +136,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
@@ -148,7 +149,20 @@ jobs:
|
||||
flavor: |
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.tag-suffix }}
|
||||
|
||||
- name: Docker meta for PR
|
||||
id: meta_pull_request
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
ttl.sh/localai-ci-pr-${{ github.event.number }}
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
type=sha
|
||||
flavor: |
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.tag-suffix }}
|
||||
- name: Docker meta AIO (quay.io)
|
||||
if: inputs.aio != ''
|
||||
id: meta_aio
|
||||
@@ -174,7 +188,6 @@ jobs:
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{raw}}
|
||||
flavor: |
|
||||
latest=${{ inputs.tag-latest }}
|
||||
suffix=${{ inputs.aio }}
|
||||
|
||||
- name: Set up QEMU
|
||||
@@ -203,6 +216,7 @@ jobs:
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
if: github.event_name != 'pull_request'
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||
@@ -218,7 +232,7 @@ jobs:
|
||||
BASE_IMAGE=${{ inputs.base-image }}
|
||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.63.0
|
||||
GRPC_VERSION=v1.64.0
|
||||
MAKEFLAGS=${{ inputs.makeflags }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
@@ -227,7 +241,39 @@ jobs:
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
### Start testing image
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v5
|
||||
if: github.event_name == 'pull_request'
|
||||
with:
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
# The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
|
||||
# This means that even the MAKEFLAGS have to be an EXACT match.
|
||||
# If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
|
||||
# This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
|
||||
build-args: |
|
||||
BUILD_TYPE=${{ inputs.build-type }}
|
||||
CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
|
||||
CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
|
||||
FFMPEG=${{ inputs.ffmpeg }}
|
||||
IMAGE_TYPE=${{ inputs.image-type }}
|
||||
BASE_IMAGE=${{ inputs.base-image }}
|
||||
GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
|
||||
GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
|
||||
GRPC_VERSION=v1.64.0
|
||||
MAKEFLAGS=${{ inputs.makeflags }}
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
cache-from: type=gha
|
||||
platforms: ${{ inputs.platforms }}
|
||||
push: true
|
||||
tags: ${{ steps.meta_pull_request.outputs.tags }}
|
||||
labels: ${{ steps.meta_pull_request.outputs.labels }}
|
||||
- name: Testing image
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "Image is available at ttl.sh/localai-ci-pr-${{ github.event.number }}:${{ steps.meta_pull_request.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
||||
## End testing image
|
||||
- name: Build and push AIO image
|
||||
if: inputs.aio != ''
|
||||
uses: docker/build-push-action@v5
|
||||
|
||||
204
.github/workflows/release.yaml
vendored
204
.github/workflows/release.yaml
vendored
@@ -5,7 +5,7 @@ on:
|
||||
- pull_request
|
||||
|
||||
env:
|
||||
GRPC_VERSION: v1.63.0
|
||||
GRPC_VERSION: v1.64.0
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -15,7 +15,8 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build-linux:
|
||||
|
||||
build-linux-arm:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
@@ -26,10 +27,133 @@ jobs:
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler ccache
|
||||
sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
|
||||
- name: Install CUDA Dependencies
|
||||
run: |
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cuda-cross-aarch64 cuda-nvcc-cross-aarch64-${CUDA_VERSION} libcublas-cross-aarch64-${CUDA_VERSION}
|
||||
env:
|
||||
CUDA_VERSION: 12-4
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: grpc
|
||||
key: ${{ runner.os }}-arm-grpc-${{ env.GRPC_VERSION }}
|
||||
- name: Build grpc
|
||||
if: steps.cache-grpc.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
|
||||
git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
|
||||
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
|
||||
-DgRPC_BUILD_TESTS=OFF \
|
||||
../.. && sudo make --jobs 5 --output-sync=target
|
||||
- name: Install gRPC
|
||||
run: |
|
||||
GNU_HOST=aarch64-linux-gnu
|
||||
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
||||
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
||||
|
||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
||||
|
||||
# https://cmake.org/cmake/help/v3.13/manual/cmake-toolchains.7.html#cross-compiling-for-linux
|
||||
echo "set(CMAKE_SYSTEM_NAME Linux)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_SYSTEM_PROCESSOR arm)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_STAGING_PREFIX $CROSS_STAGING_PREFIX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_SYSROOT ${CROSS_TOOLCHAIN}/sysroot)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_C_COMPILER /usr/bin/$C_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_CXX_COMPILER /usr/bin/$CXX_COMPILER_ARM_LINUX)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN && \
|
||||
echo "set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)" >> $CMAKE_CROSS_TOOLCHAIN
|
||||
GRPC_DIR=$PWD/grpc
|
||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install && \
|
||||
GRPC_CROSS_BUILD_DIR=$GRPC_DIR/cmake/cross_build && \
|
||||
mkdir -p $GRPC_CROSS_BUILD_DIR && \
|
||||
cd $GRPC_CROSS_BUILD_DIR && \
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$CROSS_TOOLCHAIN/grpc_install \
|
||||
../.. && \
|
||||
sudo make -j`nproc` install
|
||||
- name: Build
|
||||
id: build
|
||||
run: |
|
||||
GNU_HOST=aarch64-linux-gnu
|
||||
C_COMPILER_ARM_LINUX=$GNU_HOST-gcc
|
||||
CXX_COMPILER_ARM_LINUX=$GNU_HOST-g++
|
||||
|
||||
CROSS_TOOLCHAIN=/usr/$GNU_HOST
|
||||
CROSS_STAGING_PREFIX=$CROSS_TOOLCHAIN/stage
|
||||
CMAKE_CROSS_TOOLCHAIN=/tmp/arm.toolchain.cmake
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
export PATH=/usr/local/cuda/bin:$PATH
|
||||
sudo rm -rf /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||
sudo cp -rf /usr/aarch64-linux-gnu/lib/libstdc++.so* /usr/aarch64-linux-gnu/lib/libstdc++.so.6
|
||||
sudo cp /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 ld.so
|
||||
GO_TAGS=p2p \
|
||||
BACKEND_LIBS="./grpc/cmake/cross_build/third_party/re2/libre2.a ./grpc/cmake/cross_build/libgrpc.a ./grpc/cmake/cross_build/libgrpc++.a ./grpc/cmake/cross_build/third_party/protobuf/libprotobuf.a /usr/aarch64-linux-gnu/lib/libc.so.6 /usr/aarch64-linux-gnu/lib/libstdc++.so.6 /usr/aarch64-linux-gnu/lib/libgomp.so.1 /usr/aarch64-linux-gnu/lib/libm.so.6 /usr/aarch64-linux-gnu/lib/libgcc_s.so.1 /usr/aarch64-linux-gnu/lib/libdl.so.2 /usr/aarch64-linux-gnu/lib/libpthread.so.0 ./ld.so" \
|
||||
GOOS=linux \
|
||||
GOARCH=arm64 \
|
||||
CMAKE_ARGS="-DProtobuf_INCLUDE_DIRS=$CROSS_STAGING_PREFIX/include -DProtobuf_DIR=$CROSS_STAGING_PREFIX/lib/cmake/protobuf -DgRPC_DIR=$CROSS_STAGING_PREFIX/lib/cmake/grpc -DCMAKE_TOOLCHAIN_FILE=$CMAKE_CROSS_TOOLCHAIN -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++" make dist-cross-linux-arm64
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-linux-arm64
|
||||
path: release/
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
files: |
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
build-linux:
|
||||
runs-on: arc-runner-set
|
||||
steps:
|
||||
- name: Force Install GIT latest
|
||||
run: |
|
||||
sudo apt-get update \
|
||||
&& sudo apt-get install -y software-properties-common \
|
||||
&& sudo apt-get update \
|
||||
&& sudo add-apt-repository -y ppa:git-core/ppa \
|
||||
&& sudo apt-get update \
|
||||
&& sudo apt-get install -y git
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: '1.21.x'
|
||||
cache: false
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg protobuf-compiler
|
||||
sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache cmake
|
||||
- name: Intel Dependencies
|
||||
run: |
|
||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
||||
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
|
||||
sudo apt update
|
||||
sudo apt install -y intel-basekit
|
||||
- name: Install CUDA Dependencies
|
||||
run: |
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
@@ -38,6 +162,31 @@ jobs:
|
||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||
env:
|
||||
CUDA_VERSION: 12-3
|
||||
- name: "Install Hipblas"
|
||||
env:
|
||||
ROCM_VERSION: "6.1"
|
||||
AMDGPU_VERSION: "6.1"
|
||||
run: |
|
||||
set -ex
|
||||
|
||||
sudo apt-get update
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg
|
||||
|
||||
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
|
||||
|
||||
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | sudo tee /etc/apt/sources.list.d/rocm.list
|
||||
|
||||
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | sudo tee /etc/apt/sources.list.d/amdgpu.list
|
||||
printf 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt-get update
|
||||
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
hipblas-dev rocm-dev \
|
||||
rocblas-dev
|
||||
|
||||
sudo apt-get clean
|
||||
sudo rm -rf /var/lib/apt/lists/*
|
||||
sudo ldconfig
|
||||
- name: Cache grpc
|
||||
id: cache-grpc
|
||||
uses: actions/cache@v4
|
||||
@@ -54,14 +203,20 @@ jobs:
|
||||
- name: Install gRPC
|
||||
run: |
|
||||
cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
|
||||
# BACKEND_LIBS needed for gpu-workload: /opt/intel/oneapi/*/lib/libiomp5.so /opt/intel/oneapi/*/lib/libmkl_core.so /opt/intel/oneapi/*/lib/libmkl_core.so.2 /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so /opt/intel/oneapi/*/lib/libmkl_intel_ilp64.so.2 /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so /opt/intel/oneapi/*/lib/libmkl_sycl_blas.so.4 /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so /opt/intel/oneapi/*/lib/libmkl_tbb_thread.so.2 /opt/intel/oneapi/*/lib/libsycl.so /opt/intel/oneapi/*/lib/libsycl.so.7 /opt/intel/oneapi/*/lib/libsycl.so.7.1.0 /opt/rocm-*/lib/libamdhip64.so /opt/rocm-*/lib/libamdhip64.so.5 /opt/rocm-*/lib/libamdhip64.so.6 /opt/rocm-*/lib/libamdhip64.so.6.1.60100 /opt/rocm-*/lib/libhipblas.so /opt/rocm-*/lib/libhipblas.so.2 /opt/rocm-*/lib/libhipblas.so.2.1.60100 /opt/rocm-*/lib/librocblas.so /opt/rocm-*/lib/librocblas.so.4 /opt/rocm-*/lib/librocblas.so.4.1.60100 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/librt.so.1 /usr/local/cuda-*/targets/x86_64-linux/lib/libcublas.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcublasLt.so /usr/local/cuda-*/targets/x86_64-linux/lib/libcudart.so /usr/local/cuda-*/targets/x86_64-linux/lib/stubs/libcuda.so
|
||||
- name: Build
|
||||
id: build
|
||||
run: |
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
export PATH=/usr/local/cuda/bin:$PATH
|
||||
make dist
|
||||
export PATH=/opt/rocm/bin:$PATH
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
sudo cp /lib64/ld-linux-x86-64.so.2 ld.so
|
||||
GO_TAGS=p2p \
|
||||
BACKEND_LIBS="./ld.so /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libm.so.6 /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 /usr/lib/x86_64-linux-gnu/libc.so.6 /usr/lib/x86_64-linux-gnu/libgomp.so.1" \
|
||||
make -j4 dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-linux
|
||||
@@ -72,7 +227,13 @@ jobs:
|
||||
with:
|
||||
files: |
|
||||
release/*
|
||||
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
build-stablediffusion:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -86,18 +247,27 @@ jobs:
|
||||
cache: false
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
- name: Build stablediffusion
|
||||
run: |
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
make backend-assets/grpc/stablediffusion
|
||||
mkdir -p release && cp backend-assets/grpc/stablediffusion release
|
||||
env:
|
||||
GO_TAGS: stablediffusion
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: stablediffusion
|
||||
path: release/
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
files: |
|
||||
release/*
|
||||
|
||||
build-macOS-arm64:
|
||||
runs-on: macos-14
|
||||
@@ -113,15 +283,16 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
- name: Build
|
||||
id: build
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
export CPLUS_INCLUDE_PATH=/usr/local/include
|
||||
export PATH=$PATH:$GOPATH/bin
|
||||
make dist
|
||||
|
||||
BACKEND_LIBS="$(ls /opt/homebrew/opt/grpc/lib/*.dylib /opt/homebrew/opt/re2/lib/*.dylib /opt/homebrew/opt/openssl@3/lib/*.dylib /opt/homebrew/opt/protobuf/lib/*.dylib /opt/homebrew/opt/abseil/lib/*.dylib | xargs)" GO_TAGS=p2p make dist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: LocalAI-MacOS-arm64
|
||||
@@ -132,3 +303,10 @@ jobs:
|
||||
with:
|
||||
files: |
|
||||
release/*
|
||||
- name: Setup tmate session if tests fail
|
||||
if: ${{ failure() }}
|
||||
uses: mxschmitt/action-tmate@v3.18
|
||||
with:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
44
.github/workflows/test-extra.yml
vendored
44
.github/workflows/test-extra.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
- name: Test transformers
|
||||
run: |
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
- name: Test sentencetransformers
|
||||
run: |
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
- name: Test rerankers
|
||||
run: |
|
||||
@@ -96,7 +96,7 @@ jobs:
|
||||
sudo apt-get install -y libopencv-dev
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
- name: Test diffusers
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/diffusers
|
||||
@@ -117,12 +117,34 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
- name: Test parler-tts
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts
|
||||
make --jobs=5 --output-sync=target -C backend/python/parler-tts test
|
||||
|
||||
tests-openvoice:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential ffmpeg
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
- name: Test openvoice
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/openvoice
|
||||
make --jobs=5 --output-sync=target -C backend/python/openvoice test
|
||||
|
||||
tests-transformers-musicgen:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -139,7 +161,7 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
|
||||
- name: Test transformers-musicgen
|
||||
run: |
|
||||
@@ -163,7 +185,7 @@ jobs:
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools==1.63.0
|
||||
# pip install --user grpcio-tools==1.64.0
|
||||
|
||||
# - name: Test petals
|
||||
# run: |
|
||||
@@ -227,7 +249,7 @@ jobs:
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools==1.63.0
|
||||
# pip install --user grpcio-tools==1.64.0
|
||||
|
||||
# - name: Test bark
|
||||
# run: |
|
||||
@@ -252,7 +274,7 @@ jobs:
|
||||
# curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
# sudo apt-get install -y libopencv-dev
|
||||
# pip install --user grpcio-tools==1.63.0
|
||||
# pip install --user grpcio-tools==1.64.0
|
||||
# - name: Test vllm
|
||||
# run: |
|
||||
# make --jobs=5 --output-sync=target -C backend/python/vllm
|
||||
@@ -272,7 +294,7 @@ jobs:
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
|
||||
sudo apt-get install -y libopencv-dev
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
- name: Test vall-e-x
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/vall-e-x
|
||||
@@ -292,7 +314,7 @@ jobs:
|
||||
sudo apt-get install -y ca-certificates cmake curl patch espeak espeak-ng python3-pip
|
||||
# Install UV
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
- name: Test coqui
|
||||
run: |
|
||||
make --jobs=5 --output-sync=target -C backend/python/coqui
|
||||
|
||||
10
.github/workflows/test.yml
vendored
10
.github/workflows/test.yml
vendored
@@ -10,7 +10,7 @@ on:
|
||||
- '*'
|
||||
|
||||
env:
|
||||
GRPC_VERSION: v1.63.0
|
||||
GRPC_VERSION: v1.64.0
|
||||
|
||||
concurrency:
|
||||
group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
|
||||
@@ -93,8 +93,8 @@ jobs:
|
||||
sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
|
||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
|
||||
|
||||
# The python3-grpc-tools package in 22.04 is too old
|
||||
pip install --user grpcio-tools
|
||||
@@ -178,7 +178,7 @@ jobs:
|
||||
submodules: true
|
||||
- name: Build images
|
||||
run: |
|
||||
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
||||
- name: Test
|
||||
run: |
|
||||
@@ -213,7 +213,7 @@ jobs:
|
||||
- name: Dependencies
|
||||
run: |
|
||||
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc
|
||||
pip install --user grpcio-tools==1.63.0
|
||||
pip install --user grpcio-tools==1.64.0
|
||||
- name: Test
|
||||
run: |
|
||||
export C_INCLUDE_PATH=/usr/local/include
|
||||
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -6,6 +6,9 @@ get-sources
|
||||
prepare-sources
|
||||
/backend/cpp/llama/grpc-server
|
||||
/backend/cpp/llama/llama.cpp
|
||||
/backend/cpp/llama-*
|
||||
|
||||
*.log
|
||||
|
||||
go-ggml-transformers
|
||||
go-gpt2
|
||||
@@ -39,6 +42,7 @@ backend-assets/*
|
||||
!backend-assets/.keep
|
||||
prepare
|
||||
/ggml-metal.metal
|
||||
docs/static/gallery.html
|
||||
|
||||
# Protobuf generated files
|
||||
*.pb.go
|
||||
@@ -49,4 +53,4 @@ prepare
|
||||
.scannerwork
|
||||
|
||||
# backend virtual environments
|
||||
**/venv
|
||||
**/venv
|
||||
|
||||
93
Dockerfile
93
Dockerfile
@@ -1,45 +1,40 @@
|
||||
ARG IMAGE_TYPE=extras
|
||||
ARG BASE_IMAGE=ubuntu:22.04
|
||||
ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
|
||||
ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
|
||||
|
||||
# The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it.
|
||||
FROM ${BASE_IMAGE} AS requirements-core
|
||||
|
||||
USER root
|
||||
|
||||
ARG GO_VERSION=1.21.7
|
||||
ARG GO_VERSION=1.22.4
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
|
||||
|
||||
ARG GO_TAGS="stablediffusion tinydream tts"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ccache \
|
||||
ca-certificates \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
python3-pip \
|
||||
python-is-python3 \
|
||||
unzip && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
pip install --upgrade pip
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Go
|
||||
RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
|
||||
ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
|
||||
|
||||
# Install grpc compilers
|
||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
|
||||
|
||||
# Install grpcio-tools (the version in 22.04 is too old)
|
||||
RUN pip install --user grpcio-tools
|
||||
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.1 && \
|
||||
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||
|
||||
COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
|
||||
RUN update-ca-certificates
|
||||
@@ -84,10 +79,16 @@ RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
espeak-ng \
|
||||
espeak \
|
||||
python3-pip \
|
||||
python-is-python3 \
|
||||
python3-dev \
|
||||
python3-venv && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
pip install --upgrade pip
|
||||
|
||||
# Install grpcio-tools (the version in 22.04 is too old)
|
||||
RUN pip install --user grpcio-tools
|
||||
|
||||
###################################
|
||||
###################################
|
||||
@@ -98,21 +99,48 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
|
||||
|
||||
ARG BUILD_TYPE
|
||||
ARG CUDA_MAJOR_VERSION=11
|
||||
ARG CUDA_MINOR_VERSION=7
|
||||
ARG CUDA_MINOR_VERSION=8
|
||||
|
||||
ENV BUILD_TYPE=${BUILD_TYPE}
|
||||
|
||||
# CuBLAS requirements
|
||||
RUN <<EOT bash
|
||||
if [ "${BUILD_TYPE}" = "cublas" ]; then
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common pciutils
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
|
||||
fi
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
EOT
|
||||
|
||||
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common && \
|
||||
software-properties-common pciutils && \
|
||||
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
|
||||
dpkg -i cuda-keyring_1.1-1_all.deb && \
|
||||
rm -f cuda-keyring_1.1-1_all.deb && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcufft-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
|
||||
@@ -145,13 +173,24 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# Temporary workaround for Intel's repository to work correctly
|
||||
# https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/APT-Repository-not-working-signatures-invalid/m-p/1599436/highlight/true#M36143
|
||||
# This is a temporary workaround until Intel fixes their repository
|
||||
FROM ${INTEL_BASE_IMAGE} AS intel
|
||||
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
|
||||
gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
|
||||
RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
|
||||
|
||||
###################################
|
||||
###################################
|
||||
|
||||
# The grpc target does one thing, it builds and installs GRPC. This is in it's own layer so that it can be effectively cached by CI.
|
||||
# You probably don't need to change anything here, and if you do, make sure that CI is adjusted so that the cache continues to work.
|
||||
FROM ${GRPC_BASE_IMAGE} AS grpc
|
||||
|
||||
# This is a bit of a hack, but it's required in order to be able to effectively cache this layer in CI
|
||||
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
|
||||
ARG GRPC_VERSION=v1.58.0
|
||||
ARG GRPC_VERSION=v1.64.2
|
||||
|
||||
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
|
||||
|
||||
@@ -184,7 +223,7 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
|
||||
# Adjustments to the build process should likely be made here.
|
||||
FROM requirements-drivers AS builder
|
||||
|
||||
ARG GO_TAGS="stablediffusion tts"
|
||||
ARG GO_TAGS="stablediffusion tts p2p"
|
||||
ARG GRPC_BACKENDS
|
||||
ARG MAKEFLAGS
|
||||
|
||||
@@ -206,9 +245,18 @@ RUN make prepare
|
||||
# We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below
|
||||
# but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only
|
||||
# here so that we can generate the grpc code for the stablediffusion build
|
||||
RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
RUN <<EOT bash
|
||||
if [ "amd64" = "$TARGETARCH" ]; then
|
||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-x86_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
fi
|
||||
if [ "arm64" = "$TARGETARCH" ]; then
|
||||
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v27.1/protoc-27.1-linux-aarch_64.zip -o protoc.zip && \
|
||||
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||
rm protoc.zip
|
||||
fi
|
||||
EOT
|
||||
|
||||
# stablediffusion does not tolerate a newer version of abseil, build it first
|
||||
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
|
||||
@@ -305,6 +353,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
|
||||
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/vall-e-x \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/openvoice \
|
||||
; fi && \
|
||||
if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
|
||||
make -C backend/python/petals \
|
||||
; fi && \
|
||||
@@ -340,7 +391,7 @@ RUN mkdir -p /build/models
|
||||
# Define the health check command
|
||||
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
|
||||
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
|
||||
|
||||
|
||||
VOLUME /build/models
|
||||
EXPOSE 8080
|
||||
ENTRYPOINT [ "/build/entrypoint.sh" ]
|
||||
|
||||
122
Makefile
122
Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
|
||||
|
||||
# llama.cpp versions
|
||||
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||
CPPLLAMA_VERSION?=dc685be46622a8fabfd57cfa804237c8f15679b8
|
||||
CPPLLAMA_VERSION?=37bef8943312d91183ff06d8f1214082a17344a5
|
||||
|
||||
# gpt4all version
|
||||
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
|
||||
@@ -16,10 +16,10 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
||||
|
||||
# whisper.cpp version
|
||||
WHISPER_CPP_VERSION?=4ef8d9f44eb402c528ab6d990ab50a9f4f666347
|
||||
WHISPER_CPP_VERSION?=b29b3b29240aac8b71ce8e5a4360c1f1562ad66f
|
||||
|
||||
# bert.cpp version
|
||||
BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
|
||||
BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
|
||||
|
||||
# go-piper version
|
||||
PIPER_VERSION?=9d0100873a7dbb0824dfea40e8cec70a1b110759
|
||||
@@ -100,7 +100,7 @@ ifeq ($(BUILD_TYPE),cublas)
|
||||
CGO_LDFLAGS+=-lcublas -lcudart -L$(CUDA_LIBPATH)
|
||||
export LLAMA_CUBLAS=1
|
||||
export WHISPER_CUDA=1
|
||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda
|
||||
CGO_LDFLAGS_WHISPER+=-L$(CUDA_LIBPATH)/stubs/ -lcuda -lcufft
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_TYPE),hipblas)
|
||||
@@ -112,7 +112,7 @@ ifeq ($(BUILD_TYPE),hipblas)
|
||||
# llama-ggml has no hipblas support, so override it here.
|
||||
export STABLE_BUILD_TYPE=
|
||||
export WHISPER_HIPBLAS=1
|
||||
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
|
||||
GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
|
||||
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
|
||||
CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
|
||||
CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib
|
||||
@@ -158,6 +158,8 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
|
||||
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
|
||||
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
|
||||
@@ -311,28 +313,53 @@ build: prepare backend-assets grpcs ## Build the project
|
||||
$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
|
||||
$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
|
||||
$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
|
||||
ifneq ($(BACKEND_LIBS),)
|
||||
$(MAKE) backend-assets/lib
|
||||
cp $(BACKEND_LIBS) backend-assets/lib/
|
||||
endif
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
|
||||
|
||||
build-minimal:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp" GO_TAGS=none $(MAKE) build
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build
|
||||
|
||||
build-api:
|
||||
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
|
||||
|
||||
backend-assets/lib:
|
||||
mkdir -p backend-assets/lib
|
||||
|
||||
dist:
|
||||
STATIC=true $(MAKE) backend-assets/grpc/llama-cpp-avx2
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-avx2
|
||||
ifeq ($(OS),Darwin)
|
||||
$(info ${GREEN}I Skip CUDA build on MacOS${RESET})
|
||||
$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
|
||||
else
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-cuda
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-hipblas
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f16
|
||||
$(MAKE) backend-assets/grpc/llama-cpp-sycl_f32
|
||||
endif
|
||||
$(MAKE) build
|
||||
STATIC=true $(MAKE) build
|
||||
mkdir -p release
|
||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||
ifeq ($(BUILD_ID),)
|
||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-$(ARCH)
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(OS)-$(ARCH).sha256
|
||||
else
|
||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH) > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH).sha256
|
||||
endif
|
||||
|
||||
dist-cross-linux-arm64:
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
|
||||
STATIC=true $(MAKE) build
|
||||
mkdir -p release
|
||||
# if BUILD_ID is empty, then we don't append it to the binary name
|
||||
ifeq ($(BUILD_ID),)
|
||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(OS)-arm64
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(OS)-arm64 > release/$(BINARY_NAME)-$(OS)-arm64.sha256
|
||||
else
|
||||
cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64
|
||||
shasum -a 256 release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64 > release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-arm64.sha256
|
||||
endif
|
||||
|
||||
osx-signed: build
|
||||
@@ -443,7 +470,7 @@ protogen-clean: protogen-go-clean protogen-python-clean
|
||||
.PHONY: protogen-go
|
||||
protogen-go:
|
||||
mkdir -p pkg/grpc/proto
|
||||
protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
||||
protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
|
||||
backend/backend.proto
|
||||
|
||||
.PHONY: protogen-go-clean
|
||||
@@ -452,10 +479,10 @@ protogen-go-clean:
|
||||
$(RM) bin/*
|
||||
|
||||
.PHONY: protogen-python
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
|
||||
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
|
||||
|
||||
.PHONY: protogen-python-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
|
||||
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
|
||||
|
||||
.PHONY: autogptq-protogen
|
||||
autogptq-protogen:
|
||||
@@ -569,6 +596,14 @@ vall-e-x-protogen:
|
||||
vall-e-x-protogen-clean:
|
||||
$(MAKE) -C backend/python/vall-e-x protogen-clean
|
||||
|
||||
.PHONY: openvoice-protogen
|
||||
openvoice-protogen:
|
||||
$(MAKE) -C backend/python/openvoice protogen
|
||||
|
||||
.PHONY: openvoice-protogen-clean
|
||||
openvoice-protogen-clean:
|
||||
$(MAKE) -C backend/python/openvoice protogen-clean
|
||||
|
||||
.PHONY: vllm-protogen
|
||||
vllm-protogen:
|
||||
$(MAKE) -C backend/python/vllm protogen
|
||||
@@ -592,6 +627,7 @@ prepare-extra-conda-environments: protogen-python
|
||||
$(MAKE) -C backend/python/transformers-musicgen
|
||||
$(MAKE) -C backend/python/parler-tts
|
||||
$(MAKE) -C backend/python/vall-e-x
|
||||
$(MAKE) -C backend/python/openvoice
|
||||
$(MAKE) -C backend/python/exllama
|
||||
$(MAKE) -C backend/python/petals
|
||||
$(MAKE) -C backend/python/exllama2
|
||||
@@ -659,6 +695,14 @@ else
|
||||
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
|
||||
endif
|
||||
|
||||
# This target is for manually building a variant with-auto detected flags
|
||||
backend-assets/grpc/llama-cpp: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-cpp
|
||||
$(MAKE) -C backend/cpp/llama-cpp purge
|
||||
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
|
||||
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
|
||||
|
||||
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-avx2
|
||||
$(MAKE) -C backend/cpp/llama-avx2 purge
|
||||
@@ -691,6 +735,38 @@ backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
|
||||
|
||||
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
|
||||
$(MAKE) -C backend/cpp/llama-hipblas purge
|
||||
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
|
||||
BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
|
||||
|
||||
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
|
||||
$(MAKE) -C backend/cpp/llama-sycl_f16 purge
|
||||
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
|
||||
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
|
||||
|
||||
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
|
||||
$(MAKE) -C backend/cpp/llama-sycl_f32 purge
|
||||
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
|
||||
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
|
||||
|
||||
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
|
||||
cp -rf backend/cpp/llama backend/cpp/llama-grpc
|
||||
$(MAKE) -C backend/cpp/llama-grpc purge
|
||||
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
|
||||
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
|
||||
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
|
||||
|
||||
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
|
||||
mkdir -p backend-assets/util/
|
||||
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
|
||||
|
||||
backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
|
||||
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
|
||||
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
|
||||
@@ -764,3 +840,25 @@ docker-image-intel-xpu:
|
||||
.PHONY: swagger
|
||||
swagger:
|
||||
swag init -g core/http/app.go --output swagger
|
||||
|
||||
.PHONY: gen-assets
|
||||
gen-assets:
|
||||
$(GOCMD) run core/dependencies_manager/manager.go embedded/webui_static.yaml core/http/static/assets
|
||||
|
||||
## Documentation
|
||||
docs/layouts/_default:
|
||||
mkdir -p docs/layouts/_default
|
||||
|
||||
docs/static/gallery.html: docs/layouts/_default
|
||||
$(GOCMD) run ./.github/ci/modelslist.go ./gallery/index.yaml > docs/static/gallery.html
|
||||
|
||||
docs/public: docs/layouts/_default docs/static/gallery.html
|
||||
cd docs && hugo --minify
|
||||
|
||||
docs-clean:
|
||||
rm -rf docs/public
|
||||
rm -rf docs/static/gallery.html
|
||||
|
||||
.PHONY: docs
|
||||
docs: docs/static/gallery.html
|
||||
cd docs && hugo serve
|
||||
71
README.md
71
README.md
@@ -46,18 +46,32 @@
|
||||
|
||||
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that’s compatible with OpenAI (Elevenlabs, Anthropic... ) API specifications for local AI inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families. Does not require GPU. It is created and maintained by [Ettore Di Giacinto](https://github.com/mudler).
|
||||
|
||||

|
||||
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
# Alternative images:
|
||||
# - if you have an Nvidia GPU:
|
||||
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||
# - without preconfigured models
|
||||
# docker run -ti --name local-ai -p 8080:8080 localai/localai:latest
|
||||
# - without preconfigured models for Nvidia GPUs
|
||||
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
|
||||
```
|
||||
|
||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||
|
||||
## 🔥🔥 Hot topics / Roadmap
|
||||
|
||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||
|
||||
- 🆕 You can browse now the model gallery without LocalAI! Check out https://models.localai.io
|
||||
- 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/
|
||||
- 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
|
||||
- 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
|
||||
- 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
|
||||
- Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
||||
- Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||
- Gallery WebUI: https://github.com/mudler/LocalAI/pull/2104
|
||||
- llama3: https://github.com/mudler/LocalAI/discussions/2076
|
||||
- Parler-TTS: https://github.com/mudler/LocalAI/pull/2027
|
||||
- Openvino support: https://github.com/mudler/LocalAI/pull/1892
|
||||
- Vector store: https://github.com/mudler/LocalAI/pull/1795
|
||||
- All-in-one container image: https://github.com/mudler/LocalAI/issues/1855
|
||||
|
||||
Hot topics (looking for contributors):
|
||||
|
||||
@@ -70,30 +84,19 @@ Hot topics (looking for contributors):
|
||||
|
||||
If you want to help and contribute, issues up for grabs: https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22up+for+grabs%22
|
||||
|
||||
## 💻 [Getting started](https://localai.io/basics/getting_started/index.html)
|
||||
|
||||
For a detailed step-by-step introduction, refer to the [Getting Started](https://localai.io/basics/getting_started/index.html) guide.
|
||||
|
||||
For those in a hurry, here's a straightforward one-liner to launch a LocalAI AIO(All-in-one) Image using `docker`:
|
||||
|
||||
```bash
|
||||
docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
||||
# or, if you have an Nvidia GPU:
|
||||
# docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
|
||||
```
|
||||
|
||||
## 🚀 [Features](https://localai.io/features/)
|
||||
|
||||
- 📖 [Text generation with GPTs](https://localai.io/features/text-generation/) (`llama.cpp`, `gpt4all.cpp`, ... [:book: and more](https://localai.io/model-compatibility/index.html#model-compatibility-table))
|
||||
- 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
|
||||
- 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
|
||||
- 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
|
||||
- 🔥 [OpenAI functions](https://localai.io/features/openai-functions/) 🆕
|
||||
- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/)
|
||||
- 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
|
||||
- ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
|
||||
- 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
|
||||
- 🥽 [Vision API](https://localai.io/features/gpt-vision/)
|
||||
- 🆕 [Reranker API](https://localai.io/features/reranker/)
|
||||
- 📈 [Reranker API](https://localai.io/features/reranker/)
|
||||
- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
|
||||
|
||||
## 💻 Usage
|
||||
|
||||
@@ -107,6 +110,7 @@ Build and deploy custom containers:
|
||||
WebUIs:
|
||||
- https://github.com/Jirubizu/localai-admin
|
||||
- https://github.com/go-skynet/LocalAI-frontend
|
||||
- QA-Pilot(An interactive chat project that leverages LocalAI LLMs for rapid understanding and navigation of GitHub code repository) https://github.com/reid41/QA-Pilot
|
||||
|
||||
Model galleries
|
||||
- https://github.com/go-skynet/model-gallery
|
||||
@@ -116,16 +120,17 @@ Other:
|
||||
- VSCode extension https://github.com/badgooooor/localai-vscode-plugin
|
||||
- Terminal utility https://github.com/djcopley/ShellOracle
|
||||
- Local Smart assistant https://github.com/mudler/LocalAGI
|
||||
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation
|
||||
- Home Assistant https://github.com/sammcj/homeassistant-localai / https://github.com/drndos/hass-openai-custom-conversation / https://github.com/valentinfrlch/ha-gpt4vision
|
||||
- Discord bot https://github.com/mudler/LocalAGI/tree/main/examples/discord
|
||||
- Slack bot https://github.com/mudler/LocalAGI/tree/main/examples/slack
|
||||
- Shell-Pilot(Interact with LLM using LocalAI models via pure shell scripts on your Linux or MacOS system) https://github.com/reid41/shell-pilot
|
||||
- Telegram bot https://github.com/mudler/LocalAI/tree/master/examples/telegram-bot
|
||||
- Examples: https://github.com/mudler/LocalAI/tree/master/examples/
|
||||
|
||||
|
||||
### 🔗 Resources
|
||||
|
||||
- 🆕 New! [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
|
||||
- [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
|
||||
- [How to build locally](https://localai.io/basics/build/index.html)
|
||||
- [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
|
||||
- [Projects integrating LocalAI](https://localai.io/docs/integrations/)
|
||||
@@ -133,6 +138,7 @@ Other:
|
||||
|
||||
## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
|
||||
|
||||
- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
|
||||
- [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
|
||||
- [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
|
||||
- [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)
|
||||
@@ -160,17 +166,16 @@ If you utilize this repository, data in a downstream project, please consider ci
|
||||
|
||||
Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
|
||||
|
||||
A huge thank you to our generous sponsors who support this project:
|
||||
A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
|
||||
|
||||
|  |
|
||||
|:-----------------------------------------------:|
|
||||
| [Spectro Cloud](https://www.spectrocloud.com/) |
|
||||
| Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs! |
|
||||
|
||||
And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
|
||||
|
||||
- [Sponsor list](https://github.com/sponsors/mudler)
|
||||
- JDAM00 (donating HW for the CI)
|
||||
<p align="center">
|
||||
<a href="https://www.spectrocloud.com/" target="blank">
|
||||
<img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
|
||||
</a>
|
||||
<a href="https://www.premai.io/" target="blank">
|
||||
<img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
|
||||
</a>
|
||||
</p>
|
||||
|
||||
## 🌟 Star history
|
||||
|
||||
@@ -180,7 +185,7 @@ And a huge shout-out to individuals sponsoring the project by donating hardware
|
||||
|
||||
LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
|
||||
|
||||
MIT - Author Ettore Di Giacinto
|
||||
MIT - Author Ettore Di Giacinto <mudler@localai.io>
|
||||
|
||||
## 🙇 Acknowledgements
|
||||
|
||||
|
||||
@@ -2,8 +2,63 @@ name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
context_size: 8192
|
||||
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
- "<dummy32000>"
|
||||
- "</tool_call>"
|
||||
- "<|eot_id|>"
|
||||
- "<|end_of_text|>"
|
||||
|
||||
function:
|
||||
# disable injecting the "answer" tool
|
||||
disable_no_action: true
|
||||
|
||||
grammar:
|
||||
# This allows the grammar to also return messages
|
||||
mixed_mode: true
|
||||
# Suffix to add to the grammar
|
||||
#prefix: '<tool_call>\n'
|
||||
# Force parallel calls in the grammar
|
||||
# parallel_calls: true
|
||||
|
||||
return_name_in_function_response: true
|
||||
# Without grammar uncomment the lines below
|
||||
# Warning: this is relying only on the capability of the
|
||||
# LLM model to generate the correct function call.
|
||||
json_regex_match:
|
||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
||||
- "(?s)<tool_call>(.*?)"
|
||||
replace_llm_results:
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
replace_function_results:
|
||||
# Replace everything that is not JSON array or object
|
||||
#
|
||||
- key: '(?s)^[^{\[]*'
|
||||
value: ""
|
||||
- key: '(?s)[^}\]]*$'
|
||||
value: ""
|
||||
- key: "'([^']*?)'"
|
||||
value: "_DQUOTE_${1}_DQUOTE_"
|
||||
- key: '\\"'
|
||||
value: "__TEMP_QUOTE__"
|
||||
- key: "\'"
|
||||
value: "'"
|
||||
- key: "_DQUOTE_"
|
||||
value: '"'
|
||||
- key: "__TEMP_QUOTE__"
|
||||
value: '"'
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
|
||||
template:
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}
|
||||
@@ -22,38 +77,25 @@ template:
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |-
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
You are a function calling AI model.
|
||||
Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
You should call the tools provided to you sequentially
|
||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
||||
<scratchpad>
|
||||
{step-by-step reasoning and plan in bullet points}
|
||||
</scratchpad>
|
||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
{"arguments": <args-dict>, "name": <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "\n</tool_call>"
|
||||
- "\n\n\n"
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
|
||||
@@ -2,8 +2,63 @@ name: gpt-4
|
||||
mmap: true
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
context_size: 8192
|
||||
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
- "<dummy32000>"
|
||||
- "</tool_call>"
|
||||
- "<|eot_id|>"
|
||||
- "<|end_of_text|>"
|
||||
|
||||
function:
|
||||
# disable injecting the "answer" tool
|
||||
disable_no_action: true
|
||||
|
||||
grammar:
|
||||
# This allows the grammar to also return messages
|
||||
mixed_mode: true
|
||||
# Suffix to add to the grammar
|
||||
#prefix: '<tool_call>\n'
|
||||
# Force parallel calls in the grammar
|
||||
# parallel_calls: true
|
||||
|
||||
return_name_in_function_response: true
|
||||
# Without grammar uncomment the lines below
|
||||
# Warning: this is relying only on the capability of the
|
||||
# LLM model to generate the correct function call.
|
||||
json_regex_match:
|
||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
||||
- "(?s)<tool_call>(.*?)"
|
||||
replace_llm_results:
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
replace_function_results:
|
||||
# Replace everything that is not JSON array or object
|
||||
#
|
||||
- key: '(?s)^[^{\[]*'
|
||||
value: ""
|
||||
- key: '(?s)[^}\]]*$'
|
||||
value: ""
|
||||
- key: "'([^']*?)'"
|
||||
value: "_DQUOTE_${1}_DQUOTE_"
|
||||
- key: '\\"'
|
||||
value: "__TEMP_QUOTE__"
|
||||
- key: "\'"
|
||||
value: "'"
|
||||
- key: "_DQUOTE_"
|
||||
value: '"'
|
||||
- key: "__TEMP_QUOTE__"
|
||||
value: '"'
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
|
||||
template:
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}
|
||||
@@ -22,38 +77,25 @@ template:
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |-
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
You are a function calling AI model.
|
||||
Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
You should call the tools provided to you sequentially
|
||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
||||
<scratchpad>
|
||||
{step-by-step reasoning and plan in bullet points}
|
||||
</scratchpad>
|
||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
{"arguments": <args-dict>, "name": <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
f16: true
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- <dummy32000>
|
||||
- "\n</tool_call>"
|
||||
- "\n\n\n"
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
<|im_start|>assistant
|
||||
@@ -1,10 +1,66 @@
|
||||
name: gpt-4
|
||||
mmap: false
|
||||
context_size: 8192
|
||||
|
||||
f16: false
|
||||
parameters:
|
||||
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
||||
|
||||
stopwords:
|
||||
- "<|im_end|>"
|
||||
- "<dummy32000>"
|
||||
- "</tool_call>"
|
||||
- "<|eot_id|>"
|
||||
- "<|end_of_text|>"
|
||||
|
||||
function:
|
||||
# disable injecting the "answer" tool
|
||||
disable_no_action: true
|
||||
|
||||
grammar:
|
||||
# This allows the grammar to also return messages
|
||||
mixed_mode: true
|
||||
# Suffix to add to the grammar
|
||||
#prefix: '<tool_call>\n'
|
||||
# Force parallel calls in the grammar
|
||||
# parallel_calls: true
|
||||
|
||||
return_name_in_function_response: true
|
||||
# Without grammar uncomment the lines below
|
||||
# Warning: this is relying only on the capability of the
|
||||
# LLM model to generate the correct function call.
|
||||
json_regex_match:
|
||||
- "(?s)<tool_call>(.*?)</tool_call>"
|
||||
- "(?s)<tool_call>(.*?)"
|
||||
replace_llm_results:
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
replace_function_results:
|
||||
# Replace everything that is not JSON array or object
|
||||
#
|
||||
- key: '(?s)^[^{\[]*'
|
||||
value: ""
|
||||
- key: '(?s)[^}\]]*$'
|
||||
value: ""
|
||||
- key: "'([^']*?)'"
|
||||
value: "_DQUOTE_${1}_DQUOTE_"
|
||||
- key: '\\"'
|
||||
value: "__TEMP_QUOTE__"
|
||||
- key: "\'"
|
||||
value: "'"
|
||||
- key: "_DQUOTE_"
|
||||
value: '"'
|
||||
- key: "__TEMP_QUOTE__"
|
||||
value: '"'
|
||||
# Drop the scratchpad content from responses
|
||||
- key: "(?s)<scratchpad>.*</scratchpad>"
|
||||
value: ""
|
||||
|
||||
template:
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
chat_message: |
|
||||
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
|
||||
{{- if .FunctionCall }}
|
||||
@@ -23,37 +79,25 @@ template:
|
||||
{{- else if eq .RoleName "tool" }}
|
||||
</tool_response>
|
||||
{{- end }}<|im_end|>
|
||||
# https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
|
||||
function: |
|
||||
completion: |
|
||||
{{.Input}}
|
||||
function: |-
|
||||
<|im_start|>system
|
||||
You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
You are a function calling AI model.
|
||||
Here are the available tools:
|
||||
<tools>
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
</tools>
|
||||
Use the following pydantic model json schema for each tool call you will make:
|
||||
{'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
|
||||
For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
You should call the tools provided to you sequentially
|
||||
Please use <scratchpad> XML tags to record your reasoning and planning before you call the functions as follows:
|
||||
<scratchpad>
|
||||
{step-by-step reasoning and plan in bullet points}
|
||||
</scratchpad>
|
||||
For each function call return a json object with function name and arguments within <tool_call> XML tags as follows:
|
||||
<tool_call>
|
||||
{'arguments': <args-dict>, 'name': <function-name>}
|
||||
{"arguments": <args-dict>, "name": <function-name>}
|
||||
</tool_call><|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
<tool_call>
|
||||
chat: |
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant
|
||||
completion: |
|
||||
{{.Input}}
|
||||
context_size: 4096
|
||||
stopwords:
|
||||
- <|im_end|>
|
||||
- "\n</tool_call>"
|
||||
- <dummy32000>
|
||||
- "\n\n\n"
|
||||
usage: |
|
||||
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
|
||||
}'
|
||||
|
||||
@@ -266,6 +266,7 @@ message TTSRequest {
|
||||
string model = 2;
|
||||
string dst = 3;
|
||||
string voice = 4;
|
||||
optional string language = 5;
|
||||
}
|
||||
|
||||
message TokenizationResponse {
|
||||
|
||||
@@ -791,7 +791,7 @@ struct llama_server_context
|
||||
sampler_names.emplace_back(sampler_name);
|
||||
}
|
||||
}
|
||||
slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
|
||||
slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1146,7 +1146,7 @@ struct llama_server_context
|
||||
std::vector<std::string> samplers_sequence;
|
||||
for (const auto &sampler_type : slot.sparams.samplers_sequence)
|
||||
{
|
||||
samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
|
||||
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
|
||||
}
|
||||
|
||||
return json {
|
||||
@@ -2217,6 +2217,12 @@ static void params_parse(const backend::ModelOptions* request,
|
||||
} else {
|
||||
params.n_parallel = 1;
|
||||
}
|
||||
|
||||
const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS");
|
||||
if (llama_grpc_servers != NULL) {
|
||||
params.rpc_servers = std::string(llama_grpc_servers);
|
||||
}
|
||||
|
||||
// TODO: Add yarn
|
||||
|
||||
if (!request->tensorsplit().empty()) {
|
||||
|
||||
2
backend/python/autogptq/requirements-hipblas.txt
Normal file
2
backend/python/autogptq/requirements-hipblas.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
@@ -1,4 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
auto-gptq==0.7.1
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
torch
|
||||
certifi
|
||||
|
||||
3
backend/python/bark/requirements-hipblas.txt
Normal file
3
backend/python/bark/requirements-hipblas.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torchaudio
|
||||
@@ -2,4 +2,5 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
bark==0.1.5
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
2
backend/python/common/template/requirements-hipblas.txt
Normal file
2
backend/python/common/template/requirements-hipblas.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
@@ -1,2 +1,2 @@
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
@@ -66,7 +66,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
def TTS(self, request, context):
|
||||
try:
|
||||
self.tts.tts_to_file(text=request.text, speaker_wav=self.AudioPath, language=COQUI_LANGUAGE, file_path=request.dst)
|
||||
# if model is multilangual add language from request or env as fallback
|
||||
lang = request.language or COQUI_LANGUAGE
|
||||
if lang == "":
|
||||
lang = None
|
||||
if self.tts.is_multi_lingual and lang is None:
|
||||
return backend_pb2.Result(success=False, message=f"Model is multi-lingual, but no language was provided")
|
||||
|
||||
# if model is multi-speaker, use speaker_wav or the speaker_id from request.voice
|
||||
if self.tts.is_multi_speaker and self.AudioPath is None and request.voice is None:
|
||||
return backend_pb2.Result(success=False, message=f"Model is multi-speaker, but no speaker was provided")
|
||||
|
||||
if self.tts.is_multi_speaker and request.voice is not None:
|
||||
self.tts.tts_to_file(text=request.text, speaker=request.voice, language=lang, file_path=request.dst)
|
||||
else:
|
||||
self.tts.tts_to_file(text=request.text, speaker_wav=self.AudioPath, language=lang, file_path=request.dst)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
3
backend/python/coqui/requirements-hipblas.txt
Normal file
3
backend/python/coqui/requirements-hipblas.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torchaudio
|
||||
@@ -2,4 +2,5 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
TTS==0.22.0
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -17,7 +17,7 @@ import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
from diffusers import StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
|
||||
from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, EulerAncestralDiscreteScheduler
|
||||
from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
|
||||
from diffusers.pipelines.stable_diffusion import safety_checker
|
||||
from diffusers.utils import load_image,export_to_video
|
||||
@@ -225,6 +225,17 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True,
|
||||
variant=variant)
|
||||
elif request.PipelineType == "StableDiffusion3Pipeline":
|
||||
if fromSingleFile:
|
||||
self.pipe = StableDiffusion3Pipeline.from_single_file(modelFile,
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True)
|
||||
else:
|
||||
self.pipe = StableDiffusion3Pipeline.from_pretrained(
|
||||
request.Model,
|
||||
torch_dtype=torchType,
|
||||
use_safetensors=True,
|
||||
variant=variant)
|
||||
|
||||
if CLIPSKIP and request.CLIPSkip != 0:
|
||||
self.clip_skip = request.CLIPSkip
|
||||
|
||||
3
backend/python/diffusers/requirements-hipblas.txt
Normal file
3
backend/python/diffusers/requirements-hipblas.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torchvision
|
||||
@@ -2,4 +2,5 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchvision
|
||||
optimum[openvino]
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,10 +1,11 @@
|
||||
accelerate
|
||||
compel
|
||||
diffusers
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
opencv-python
|
||||
pillow
|
||||
protobuf
|
||||
sentencepiece
|
||||
torch
|
||||
transformers
|
||||
certifi
|
||||
certifi
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
torch
|
||||
transformers
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
accelerate
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
certifi
|
||||
torch
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
causal-conv1d==1.2.0.post2
|
||||
mamba-ssm==1.2.0.post1
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
25
backend/python/openvoice/Makefile
Normal file
25
backend/python/openvoice/Makefile
Normal file
@@ -0,0 +1,25 @@
|
||||
.DEFAULT_GOAL := install
|
||||
|
||||
.PHONY: install
|
||||
install: protogen
|
||||
bash install.sh
|
||||
|
||||
.PHONY: protogen
|
||||
protogen: backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
.PHONY: protogen-clean
|
||||
protogen-clean:
|
||||
$(RM) backend_pb2_grpc.py backend_pb2.py
|
||||
|
||||
backend_pb2_grpc.py backend_pb2.py:
|
||||
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
|
||||
|
||||
.PHONY: clean
|
||||
clean: protogen-clean
|
||||
rm -rf venv __pycache__
|
||||
|
||||
.PHONY: test
|
||||
test: protogen
|
||||
@echo "Testing openvoice..."
|
||||
bash test.sh
|
||||
@echo "openvoice tested."
|
||||
158
backend/python/openvoice/backend.py
Executable file
158
backend/python/openvoice/backend.py
Executable file
@@ -0,0 +1,158 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Extra gRPC server for OpenVoice models.
|
||||
"""
|
||||
from concurrent import futures
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
import torch
|
||||
from openvoice import se_extractor
|
||||
from openvoice.api import ToneColorConverter
|
||||
from melo.api import TTS
|
||||
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
|
||||
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
||||
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
|
||||
# Implement the BackendServicer class with the service methods
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
A gRPC servicer for the backend service.
|
||||
|
||||
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
|
||||
"""
|
||||
def Health(self, request, context):
|
||||
"""
|
||||
A gRPC method that returns the health status of the backend service.
|
||||
|
||||
Args:
|
||||
request: A HealthRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Reply object that contains the health status of the backend service.
|
||||
"""
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
"""
|
||||
A gRPC method that loads a model into memory.
|
||||
|
||||
Args:
|
||||
request: A LoadModelRequest object that contains the request parameters.
|
||||
context: A grpc.ServicerContext object that provides information about the RPC.
|
||||
|
||||
Returns:
|
||||
A Result object that contains the result of the LoadModel operation.
|
||||
"""
|
||||
model_name = request.Model
|
||||
try:
|
||||
|
||||
self.clonedVoice = False
|
||||
# Assume directory from request.ModelFile.
|
||||
# Only if request.LoraAdapter it's not an absolute path
|
||||
if request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
|
||||
# get base path of modelFile
|
||||
modelFileBase = os.path.dirname(request.ModelFile)
|
||||
request.AudioPath = os.path.join(modelFileBase, request.AudioPath)
|
||||
if request.AudioPath != "":
|
||||
self.clonedVoice = True
|
||||
|
||||
self.modelpath = request.ModelFile
|
||||
self.speaker = request.Type
|
||||
self.ClonedVoicePath = request.AudioPath
|
||||
|
||||
ckpt_converter = request.Model+'/converter'
|
||||
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
||||
self.device = device
|
||||
self.tone_color_converter = None
|
||||
if self.clonedVoice:
|
||||
self.tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device)
|
||||
self.tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth')
|
||||
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
|
||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||
|
||||
def TTS(self, request, context):
|
||||
model_name = request.model
|
||||
if model_name == "":
|
||||
return backend_pb2.Result(success=False, message="request.model is required")
|
||||
try:
|
||||
# Speed is adjustable
|
||||
speed = 1.0
|
||||
voice = "EN"
|
||||
if request.voice:
|
||||
voice = request.voice
|
||||
model = TTS(language=voice, device=self.device)
|
||||
speaker_ids = model.hps.data.spk2id
|
||||
speaker_key = self.speaker
|
||||
modelpath = self.modelpath
|
||||
for s in speaker_ids.keys():
|
||||
print(f"Speaker: {s} - ID: {speaker_ids[s]}")
|
||||
speaker_id = speaker_ids[speaker_key]
|
||||
speaker_key = speaker_key.lower().replace('_', '-')
|
||||
source_se = torch.load(f'{modelpath}/base_speakers/ses/{speaker_key}.pth', map_location=self.device)
|
||||
model.tts_to_file(request.text, speaker_id, request.dst, speed=speed)
|
||||
if self.clonedVoice:
|
||||
reference_speaker = self.ClonedVoicePath
|
||||
target_se, audio_name = se_extractor.get_se(reference_speaker, self.tone_color_converter, vad=False)
|
||||
# Run the tone color converter
|
||||
encode_message = "@MyShell"
|
||||
self.tone_color_converter.convert(
|
||||
audio_src_path=request.dst,
|
||||
src_se=source_se,
|
||||
tgt_se=target_se,
|
||||
output_path=request.dst,
|
||||
message=encode_message)
|
||||
|
||||
print("[OpenVoice] TTS generated!", file=sys.stderr)
|
||||
print("[OpenVoice] TTS saved to", request.dst, file=sys.stderr)
|
||||
print(request, file=sys.stderr)
|
||||
except Exception as err:
|
||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||
return backend_pb2.Result(success=True)
|
||||
|
||||
def serve(address):
|
||||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
||||
server.add_insecure_port(address)
|
||||
server.start()
|
||||
print("[OpenVoice] Server started. Listening on: " + address, file=sys.stderr)
|
||||
|
||||
# Define the signal handler function
|
||||
def signal_handler(sig, frame):
|
||||
print("[OpenVoice] Received termination signal. Shutting down...")
|
||||
server.stop(0)
|
||||
sys.exit(0)
|
||||
|
||||
# Set the signal handlers for SIGINT and SIGTERM
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
while True:
|
||||
time.sleep(_ONE_DAY_IN_SECONDS)
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
"--addr", default="localhost:50051", help="The address to bind the server to."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
print(f"[OpenVoice] startup: {args}", file=sys.stderr)
|
||||
serve(args.addr)
|
||||
16
backend/python/openvoice/install.sh
Executable file
16
backend/python/openvoice/install.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
|
||||
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
|
||||
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
|
||||
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
|
||||
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||
fi
|
||||
|
||||
installRequirements
|
||||
|
||||
python -m unidic download
|
||||
2
backend/python/openvoice/requirements-hipblas.txt
Normal file
2
backend/python/openvoice/requirements-hipblas.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
23
backend/python/openvoice/requirements-intel.txt
Normal file
23
backend/python/openvoice/requirements-intel.txt
Normal file
@@ -0,0 +1,23 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
librosa==0.9.1
|
||||
faster-whisper==0.9.0
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
numpy==1.22.0
|
||||
eng_to_ipa==0.0.2
|
||||
inflect==7.0.0
|
||||
unidecode==1.3.7
|
||||
whisper-timestamped==1.14.2
|
||||
openai
|
||||
python-dotenv
|
||||
pypinyin==0.50.0
|
||||
cn2an==0.5.22
|
||||
jieba==0.42.1
|
||||
gradio==3.48.0
|
||||
langid==1.1.6
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
20
backend/python/openvoice/requirements.txt
Normal file
20
backend/python/openvoice/requirements.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
librosa==0.9.1
|
||||
faster-whisper==0.9.0
|
||||
pydub==0.25.1
|
||||
wavmark==0.0.3
|
||||
numpy==1.22.0
|
||||
eng_to_ipa==0.0.2
|
||||
inflect==7.0.0
|
||||
unidecode==1.3.7
|
||||
whisper-timestamped==1.14.2
|
||||
openai
|
||||
python-dotenv
|
||||
pypinyin==0.50.0
|
||||
cn2an==0.5.22
|
||||
jieba==0.42.1
|
||||
gradio==3.48.0
|
||||
langid==1.1.6
|
||||
git+https://github.com/myshell-ai/MeloTTS.git
|
||||
git+https://github.com/myshell-ai/OpenVoice.git
|
||||
4
backend/python/openvoice/run.sh
Executable file
4
backend/python/openvoice/run.sh
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
startBackend $@
|
||||
82
backend/python/openvoice/test.py
Normal file
82
backend/python/openvoice/test.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
A test script to test the gRPC service
|
||||
"""
|
||||
import unittest
|
||||
import subprocess
|
||||
import time
|
||||
import backend_pb2
|
||||
import backend_pb2_grpc
|
||||
|
||||
import grpc
|
||||
|
||||
|
||||
class TestBackendServicer(unittest.TestCase):
|
||||
"""
|
||||
TestBackendServicer is the class that tests the gRPC service
|
||||
"""
|
||||
def setUp(self):
|
||||
"""
|
||||
This method sets up the gRPC service by starting the server
|
||||
"""
|
||||
self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"])
|
||||
time.sleep(10)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
"""
|
||||
This method tears down the gRPC service by terminating the server
|
||||
"""
|
||||
self.service.terminate()
|
||||
self.service.wait()
|
||||
|
||||
def test_server_startup(self):
|
||||
"""
|
||||
This method tests if the server starts up successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.Health(backend_pb2.HealthMessage())
|
||||
self.assertEqual(response.message, b'OK')
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("Server failed to start")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_load_model(self):
|
||||
"""
|
||||
This method tests if the model is loaded successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="checkpoints_v2",
|
||||
Type="en-us"))
|
||||
self.assertTrue(response.success)
|
||||
self.assertEqual(response.message, "Model loaded successfully")
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("LoadModel service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
|
||||
def test_tts(self):
|
||||
"""
|
||||
This method tests if the embeddings are generated successfully
|
||||
"""
|
||||
try:
|
||||
self.setUp()
|
||||
with grpc.insecure_channel("localhost:50051") as channel:
|
||||
stub = backend_pb2_grpc.BackendStub(channel)
|
||||
response = stub.LoadModel(backend_pb2.ModelOptions(Model="dingzhen"))
|
||||
self.assertTrue(response.success)
|
||||
tts_request = backend_pb2.TTSRequest(text="80s TV news production music hit for tonight's biggest story", voice="EN")
|
||||
tts_response = stub.TTS(tts_request)
|
||||
self.assertIsNotNone(tts_response)
|
||||
except Exception as err:
|
||||
print(err)
|
||||
self.fail("TTS service failed")
|
||||
finally:
|
||||
self.tearDown()
|
||||
12
backend/python/openvoice/test.sh
Executable file
12
backend/python/openvoice/test.sh
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
# Download checkpoints if not present
|
||||
if [ ! -d "checkpoints_v2" ]; then
|
||||
wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
|
||||
unzip checkpoints_v2.zip
|
||||
fi
|
||||
|
||||
runUnittests
|
||||
3
backend/python/parler-tts/requirements-hipblas.txt
Normal file
3
backend/python/parler-tts/requirements-hipblas.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torchaudio
|
||||
@@ -2,4 +2,5 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,5 +1,5 @@
|
||||
accelerate
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
torch
|
||||
git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
|
||||
|
||||
2
backend/python/petals/requirements-hipblas.txt
Normal file
2
backend/python/petals/requirements-hipblas.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
@@ -1,4 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
2
backend/python/rerankers/requirements-hipblas.txt
Normal file
2
backend/python/rerankers/requirements-hipblas.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
@@ -1,4 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
rerankers[transformers]
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
@@ -0,0 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
@@ -1,4 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
sentence-transformers==2.5.1
|
||||
transformers
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
certifi
|
||||
@@ -0,0 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
@@ -1,4 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
transformers
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
torch
|
||||
scipy==1.13.0
|
||||
|
||||
68
backend/python/transformers/backend.py
Executable file → Normal file
68
backend/python/transformers/backend.py
Executable file → Normal file
@@ -21,10 +21,7 @@ import torch.cuda
|
||||
|
||||
|
||||
XPU=os.environ.get("XPU", "0") == "1"
|
||||
if XPU:
|
||||
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer
|
||||
else:
|
||||
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer
|
||||
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
|
||||
|
||||
|
||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||
@@ -77,11 +74,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
"""
|
||||
model_name = request.Model
|
||||
|
||||
compute = "auto"
|
||||
compute = torch.float16
|
||||
if request.F16Memory == True:
|
||||
compute=torch.bfloat16
|
||||
|
||||
self.CUDA = request.CUDA
|
||||
self.CUDA = torch.cuda.is_available()
|
||||
self.OV=False
|
||||
|
||||
device_map="cpu"
|
||||
@@ -89,6 +86,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
quantization = None
|
||||
|
||||
if self.CUDA:
|
||||
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
|
||||
if request.MainGPU:
|
||||
device_map=request.MainGPU
|
||||
else:
|
||||
@@ -107,7 +105,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
bnb_4bit_compute_dtype = None,
|
||||
load_in_8bit=True,
|
||||
)
|
||||
|
||||
|
||||
try:
|
||||
if request.Type == "AutoModelForCausalLM":
|
||||
if XPU:
|
||||
@@ -189,6 +187,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
device=device_map)
|
||||
self.OV = True
|
||||
else:
|
||||
print("Automodel", file=sys.stderr)
|
||||
self.model = AutoModel.from_pretrained(model_name,
|
||||
trust_remote_code=request.TrustRemoteCode,
|
||||
use_safetensors=True,
|
||||
@@ -246,28 +245,28 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
# Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
|
||||
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
||||
# print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
|
||||
# print("Embeddings:", sentence_embeddings, file=sys.stderr)
|
||||
return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0])
|
||||
|
||||
async def _predict(self, request, context, streaming=False):
|
||||
set_seed(request.Seed)
|
||||
if request.TopP == 0:
|
||||
request.TopP = 0.9
|
||||
if request.TopP < 0 or request.TopP > 1:
|
||||
request.TopP = 1
|
||||
|
||||
if request.TopK == 0:
|
||||
request.TopK = 40
|
||||
if request.TopK <= 0:
|
||||
request.TopK = 50
|
||||
|
||||
if request.Temperature > 0 :
|
||||
sample=True
|
||||
else:
|
||||
sample=False
|
||||
request.TopP == None
|
||||
request.TopK == None
|
||||
request.Temperature == None
|
||||
|
||||
prompt = request.Prompt
|
||||
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
|
||||
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
|
||||
|
||||
eos_token_id = self.tokenizer.eos_token_id
|
||||
if request.StopPrompts:
|
||||
eos_token_id = []
|
||||
for word in request.StopPrompts:
|
||||
eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word))
|
||||
|
||||
inputs = self.tokenizer(prompt, return_tensors="pt")
|
||||
|
||||
if request.Tokens > 0:
|
||||
@@ -281,6 +280,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
inputs = inputs.to("xpu")
|
||||
streaming = False
|
||||
|
||||
criteria=[]
|
||||
if request.StopPrompts:
|
||||
criteria = StoppingCriteriaList(
|
||||
[
|
||||
StopStringCriteria(tokenizer=self.tokenizer, stop_strings=request.StopPrompts),
|
||||
]
|
||||
)
|
||||
|
||||
if streaming:
|
||||
streamer=TextIteratorStreamer(self.tokenizer,
|
||||
skip_prompt=True,
|
||||
@@ -290,11 +297,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
temperature=request.Temperature,
|
||||
top_p=request.TopP,
|
||||
top_k=request.TopK,
|
||||
do_sample=True,
|
||||
do_sample=sample,
|
||||
attention_mask=inputs["attention_mask"],
|
||||
eos_token_id=eos_token_id,
|
||||
eos_token_id=self.tokenizer.eos_token_id,
|
||||
pad_token_id=self.tokenizer.eos_token_id,
|
||||
streamer=streamer)
|
||||
streamer=streamer,
|
||||
stopping_criteria=criteria,
|
||||
use_cache=True,
|
||||
)
|
||||
thread=Thread(target=self.model.generate, kwargs=config)
|
||||
thread.start()
|
||||
generated_text = ""
|
||||
@@ -311,18 +321,20 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
temperature=request.Temperature,
|
||||
top_p=request.TopP,
|
||||
top_k=request.TopK,
|
||||
do_sample=True,
|
||||
do_sample=sample,
|
||||
pad_token=self.tokenizer.eos_token_id)
|
||||
else:
|
||||
outputs = self.model.generate(inputs["input_ids"],
|
||||
outputs = self.model.generate(**inputs,
|
||||
max_new_tokens=max_tokens,
|
||||
temperature=request.Temperature,
|
||||
top_p=request.TopP,
|
||||
top_k=request.TopK,
|
||||
do_sample=True,
|
||||
attention_mask=inputs["attention_mask"],
|
||||
eos_token_id=eos_token_id,
|
||||
pad_token_id=self.tokenizer.eos_token_id)
|
||||
do_sample=sample,
|
||||
eos_token_id=self.tokenizer.eos_token_id,
|
||||
pad_token_id=self.tokenizer.eos_token_id,
|
||||
stopping_criteria=criteria,
|
||||
use_cache=True,
|
||||
)
|
||||
generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
|
||||
|
||||
if streaming:
|
||||
|
||||
2
backend/python/transformers/requirements-hipblas.txt
Normal file
2
backend/python/transformers/requirements-hipblas.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
@@ -2,4 +2,4 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
setuptools
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,9 @@
|
||||
accelerate
|
||||
transformers
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
torch
|
||||
certifi
|
||||
certifi
|
||||
intel-extension-for-transformers
|
||||
bitsandbytes
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
|
||||
@@ -1,4 +1,10 @@
|
||||
#!/bin/bash
|
||||
source $(dirname $0)/../common/libbackend.sh
|
||||
|
||||
if [ -d "/opt/intel" ]; then
|
||||
# Assumes we are using the Intel oneAPI container image
|
||||
# https://github.com/intel/intel-extension-for-pytorch/issues/538
|
||||
export XPU=1
|
||||
fi
|
||||
|
||||
startBackend $@
|
||||
3
backend/python/vall-e-x/requirements-hipblas.txt
Normal file
3
backend/python/vall-e-x/requirements-hipblas.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
torchaudio
|
||||
@@ -2,4 +2,5 @@
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
torchaudio
|
||||
optimum[openvino]
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,4 +1,4 @@
|
||||
accelerate
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
certifi
|
||||
2
backend/python/vllm/requirements-hipblas.txt
Normal file
2
backend/python/vllm/requirements-hipblas.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
--extra-index-url https://download.pytorch.org/whl/rocm6.0
|
||||
torch
|
||||
@@ -1,4 +1,5 @@
|
||||
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
|
||||
intel-extension-for-pytorch
|
||||
torch
|
||||
optimum[openvino]
|
||||
optimum[openvino]
|
||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||
@@ -1,6 +1,6 @@
|
||||
accelerate
|
||||
vllm
|
||||
grpcio==1.63.0
|
||||
grpcio==1.64.0
|
||||
protobuf
|
||||
certifi
|
||||
transformers
|
||||
|
||||
@@ -57,7 +57,7 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
|
||||
if _, err := os.Stat(modelFile); os.IsNotExist(err) {
|
||||
utils.ResetDownloadTimers()
|
||||
// if we failed to load the model, we try to download it
|
||||
err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
|
||||
err := gallery.InstallModelFromGallery(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -29,7 +29,16 @@ func generateUniqueFileName(dir, baseName, ext string) string {
|
||||
}
|
||||
}
|
||||
|
||||
func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) {
|
||||
func ModelTTS(
|
||||
backend,
|
||||
text,
|
||||
modelFile,
|
||||
voice ,
|
||||
language string,
|
||||
loader *model.ModelLoader,
|
||||
appConfig *config.ApplicationConfig,
|
||||
backendConfig config.BackendConfig,
|
||||
) (string, *proto.Result, error) {
|
||||
bb := backend
|
||||
if bb == "" {
|
||||
bb = model.PiperBackend
|
||||
@@ -83,7 +92,13 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader,
|
||||
Model: modelPath,
|
||||
Voice: voice,
|
||||
Dst: filePath,
|
||||
Language: &language,
|
||||
})
|
||||
|
||||
// return RPC error if any
|
||||
if !res.Success {
|
||||
return "", nil, fmt.Errorf(res.Message)
|
||||
}
|
||||
|
||||
return filePath, res, err
|
||||
}
|
||||
|
||||
@@ -1,20 +1,17 @@
|
||||
package cli
|
||||
|
||||
import "embed"
|
||||
|
||||
type Context struct {
|
||||
Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
|
||||
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
|
||||
|
||||
// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
|
||||
BackendAssets embed.FS `kong:"-"`
|
||||
}
|
||||
import (
|
||||
cliContext "github.com/go-skynet/LocalAI/core/cli/context"
|
||||
"github.com/go-skynet/LocalAI/core/cli/worker"
|
||||
)
|
||||
|
||||
var CLI struct {
|
||||
Context `embed:""`
|
||||
cliContext.Context `embed:""`
|
||||
|
||||
Run RunCMD `cmd:"" help:"Run LocalAI, this the default command if no other command is specified. Run 'local-ai run --help' for more information" default:"withargs"`
|
||||
Models ModelsCMD `cmd:"" help:"Manage LocalAI models and definitions"`
|
||||
TTS TTSCMD `cmd:"" help:"Convert text to speech"`
|
||||
Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
|
||||
Worker worker.Worker `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
|
||||
Util UtilCMD `cmd:"" help:"Utility commands"`
|
||||
}
|
||||
|
||||
11
core/cli/context/context.go
Normal file
11
core/cli/context/context.go
Normal file
@@ -0,0 +1,11 @@
|
||||
package cliContext
|
||||
|
||||
import "embed"
|
||||
|
||||
type Context struct {
|
||||
Debug bool `env:"LOCALAI_DEBUG,DEBUG" default:"false" hidden:"" help:"DEPRECATED, use --log-level=debug instead. Enable debug logging"`
|
||||
LogLevel *string `env:"LOCALAI_LOG_LEVEL" enum:"error,warn,info,debug,trace" help:"Set the level of logs to output [${enum}]"`
|
||||
|
||||
// This field is not a command line argument/flag, the struct tag excludes it from the parsed CLI
|
||||
BackendAssets embed.FS `kong:"-"`
|
||||
}
|
||||
@@ -4,13 +4,16 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
cliContext "github.com/go-skynet/LocalAI/core/cli/context"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
"github.com/go-skynet/LocalAI/pkg/startup"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
type ModelsCMDFlags struct {
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models"`
|
||||
Galleries string `env:"LOCALAI_GALLERIES,GALLERIES" help:"JSON list of galleries" group:"models" default:"${galleries}"`
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
}
|
||||
|
||||
@@ -29,7 +32,7 @@ type ModelsCMD struct {
|
||||
Install ModelsInstall `cmd:"" help:"Install a model from the gallery"`
|
||||
}
|
||||
|
||||
func (ml *ModelsList) Run(ctx *Context) error {
|
||||
func (ml *ModelsList) Run(ctx *cliContext.Context) error {
|
||||
var galleries []gallery.Gallery
|
||||
if err := json.Unmarshal([]byte(ml.Galleries), &galleries); err != nil {
|
||||
log.Error().Err(err).Msg("unable to load galleries")
|
||||
@@ -49,30 +52,44 @@ func (ml *ModelsList) Run(ctx *Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mi *ModelsInstall) Run(ctx *Context) error {
|
||||
modelName := mi.ModelArgs[0]
|
||||
|
||||
func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
|
||||
var galleries []gallery.Gallery
|
||||
if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
|
||||
log.Error().Err(err).Msg("unable to load galleries")
|
||||
}
|
||||
|
||||
progressBar := progressbar.NewOptions(
|
||||
1000,
|
||||
progressbar.OptionSetDescription(fmt.Sprintf("downloading model %s", modelName)),
|
||||
progressbar.OptionShowBytes(false),
|
||||
progressbar.OptionClearOnFinish(),
|
||||
)
|
||||
progressCallback := func(fileName string, current string, total string, percentage float64) {
|
||||
v := int(percentage * 10)
|
||||
err := progressBar.Set(v)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
|
||||
for _, modelName := range mi.ModelArgs {
|
||||
|
||||
progressBar := progressbar.NewOptions(
|
||||
1000,
|
||||
progressbar.OptionSetDescription(fmt.Sprintf("downloading model %s", modelName)),
|
||||
progressbar.OptionShowBytes(false),
|
||||
progressbar.OptionClearOnFinish(),
|
||||
)
|
||||
progressCallback := func(fileName string, current string, total string, percentage float64) {
|
||||
v := int(percentage * 10)
|
||||
err := progressBar.Set(v)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Str("filename", fileName).Int("value", v).Msg("error while updating progress bar")
|
||||
}
|
||||
}
|
||||
//startup.InstallModels()
|
||||
models, err := gallery.AvailableGalleryModels(galleries, mi.ModelsPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
model := gallery.FindModel(models, modelName, mi.ModelsPath)
|
||||
if model == nil {
|
||||
log.Error().Str("model", modelName).Msg("model not found")
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info().Str("model", modelName).Str("license", model.License).Msg("installing model")
|
||||
err = startup.InstallModels(galleries, "", mi.ModelsPath, progressCallback, modelName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
err := gallery.InstallModelFromGallery(galleries, modelName, mi.ModelsPath, gallery.GalleryModel{}, progressCallback)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
cliContext "github.com/go-skynet/LocalAI/core/cli/context"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/core/http"
|
||||
"github.com/go-skynet/LocalAI/core/p2p"
|
||||
"github.com/go-skynet/LocalAI/core/startup"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -34,16 +37,20 @@ type RunCMD struct {
|
||||
PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
|
||||
|
||||
F16 bool `name:"f16" env:"LOCALAI_F16,F16" help:"Enable GPU acceleration" group:"performance"`
|
||||
Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" default:"4" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
|
||||
Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
|
||||
ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
|
||||
|
||||
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
|
||||
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
|
||||
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
|
||||
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
|
||||
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
|
||||
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
||||
|
||||
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
|
||||
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
|
||||
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
|
||||
LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
|
||||
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
|
||||
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
|
||||
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
|
||||
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
|
||||
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"api"`
|
||||
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
|
||||
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
|
||||
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
|
||||
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
|
||||
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
|
||||
@@ -54,7 +61,7 @@ type RunCMD struct {
|
||||
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
|
||||
}
|
||||
|
||||
func (r *RunCMD) Run(ctx *Context) error {
|
||||
func (r *RunCMD) Run(ctx *cliContext.Context) error {
|
||||
opts := []config.AppOption{
|
||||
config.WithConfigFile(r.ModelsConfigFile),
|
||||
config.WithJSONStringPreload(r.PreloadModels),
|
||||
@@ -73,12 +80,40 @@ func (r *RunCMD) Run(ctx *Context) error {
|
||||
config.WithModelLibraryURL(r.RemoteLibrary),
|
||||
config.WithCors(r.CORS),
|
||||
config.WithCorsAllowOrigins(r.CORSAllowOrigins),
|
||||
config.WithCsrf(r.CSRF),
|
||||
config.WithLibPath(r.LibraryPath),
|
||||
config.WithThreads(r.Threads),
|
||||
config.WithBackendAssets(ctx.BackendAssets),
|
||||
config.WithBackendAssetsOutput(r.BackendAssetsPath),
|
||||
config.WithUploadLimitMB(r.UploadLimit),
|
||||
config.WithApiKeys(r.APIKeys),
|
||||
config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
|
||||
config.WithOpaqueErrors(r.OpaqueErrors),
|
||||
}
|
||||
|
||||
if r.Peer2Peer || r.Peer2PeerToken != "" {
|
||||
log.Info().Msg("P2P mode enabled")
|
||||
token := r.Peer2PeerToken
|
||||
if token == "" {
|
||||
// IF no token is provided, and p2p is enabled,
|
||||
// we generate one and wait for the user to pick up the token (this is for interactive)
|
||||
log.Info().Msg("No token provided, generating one")
|
||||
token = p2p.GenerateToken()
|
||||
log.Info().Msg("Generated Token:")
|
||||
fmt.Println(token)
|
||||
|
||||
log.Info().Msg("To use the token, you can run the following command in another node or terminal:")
|
||||
fmt.Printf("export TOKEN=\"%s\"\nlocal-ai worker p2p-llama-cpp-rpc\n", token)
|
||||
|
||||
// Ask for user confirmation
|
||||
log.Info().Msg("Press a button to proceed")
|
||||
var input string
|
||||
fmt.Scanln(&input)
|
||||
}
|
||||
log.Info().Msg("Starting P2P server discovery...")
|
||||
if err := p2p.LLamaCPPRPCServerDiscoverer(context.Background(), token); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
idleWatchDog := r.EnableWatchdogIdle
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/backend"
|
||||
cliContext "github.com/go-skynet/LocalAI/core/cli/context"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -22,14 +23,14 @@ type TranscriptCMD struct {
|
||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
||||
}
|
||||
|
||||
func (t *TranscriptCMD) Run(ctx *Context) error {
|
||||
func (t *TranscriptCMD) Run(ctx *cliContext.Context) error {
|
||||
opts := &config.ApplicationConfig{
|
||||
ModelPath: t.ModelsPath,
|
||||
Context: context.Background(),
|
||||
AssetsDestination: t.BackendAssetsPath,
|
||||
}
|
||||
|
||||
cl := config.NewBackendConfigLoader()
|
||||
cl := config.NewBackendConfigLoader(t.ModelsPath)
|
||||
ml := model.NewModelLoader(opts.ModelPath)
|
||||
if err := cl.LoadBackendConfigsFromPath(t.ModelsPath); err != nil {
|
||||
return err
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/backend"
|
||||
cliContext "github.com/go-skynet/LocalAI/core/cli/context"
|
||||
"github.com/go-skynet/LocalAI/core/config"
|
||||
"github.com/go-skynet/LocalAI/pkg/model"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -19,12 +20,13 @@ type TTSCMD struct {
|
||||
Backend string `short:"b" default:"piper" help:"Backend to run the TTS model"`
|
||||
Model string `short:"m" required:"" help:"Model name to run the TTS"`
|
||||
Voice string `short:"v" help:"Voice name to run the TTS"`
|
||||
Language string `short:"l" help:"Language to use with the TTS"`
|
||||
OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"`
|
||||
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
||||
}
|
||||
|
||||
func (t *TTSCMD) Run(ctx *Context) error {
|
||||
func (t *TTSCMD) Run(ctx *cliContext.Context) error {
|
||||
outputFile := t.OutputFile
|
||||
outputDir := t.BackendAssetsPath
|
||||
if outputFile != "" {
|
||||
@@ -51,7 +53,7 @@ func (t *TTSCMD) Run(ctx *Context) error {
|
||||
options := config.BackendConfig{}
|
||||
options.SetDefaults()
|
||||
|
||||
filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options)
|
||||
filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, t.Language, ml, opts, options)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
55
core/cli/util.go
Normal file
55
core/cli/util.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
cliContext "github.com/go-skynet/LocalAI/core/cli/context"
|
||||
gguf "github.com/thxcode/gguf-parser-go"
|
||||
)
|
||||
|
||||
type UtilCMD struct {
|
||||
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
|
||||
}
|
||||
|
||||
type GGUFInfoCMD struct {
|
||||
Args []string `arg:"" optional:"" name:"args" help:"Arguments to pass to the utility command"`
|
||||
Header bool `optional:"" default:"false" name:"header" help:"Show header information"`
|
||||
}
|
||||
|
||||
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
|
||||
if u.Args == nil || len(u.Args) == 0 {
|
||||
return fmt.Errorf("no GGUF file provided")
|
||||
}
|
||||
// We try to guess only if we don't have a template defined already
|
||||
f, err := gguf.ParseGGUFFile(u.Args[0])
|
||||
if err != nil {
|
||||
// Only valid for gguf files
|
||||
log.Error().Msgf("guessDefaultsFromFile: %s", "not a GGUF file")
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info().
|
||||
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
||||
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
||||
Any("modelName", f.Model().Name).
|
||||
Any("architecture", f.Architecture().Architecture).Msgf("GGUF file loaded: %s", u.Args[0])
|
||||
|
||||
log.Info().Any("tokenizer", fmt.Sprintf("%+v", f.Tokenizer())).Msg("Tokenizer")
|
||||
log.Info().Any("architecture", fmt.Sprintf("%+v", f.Architecture())).Msg("Architecture")
|
||||
|
||||
v, exists := f.Header.MetadataKV.Get("tokenizer.chat_template")
|
||||
if exists {
|
||||
log.Info().Msgf("chat_template: %s", v.ValueString())
|
||||
}
|
||||
|
||||
if u.Header {
|
||||
for _, metadata := range f.Header.MetadataKV {
|
||||
log.Info().Msgf("%s: %+v", metadata.Key, metadata.Value)
|
||||
}
|
||||
// log.Info().Any("header", fmt.Sprintf("%+v", f.Header)).Msg("Header")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
10
core/cli/worker/worker.go
Normal file
10
core/cli/worker/worker.go
Normal file
@@ -0,0 +1,10 @@
|
||||
package worker
|
||||
|
||||
type WorkerFlags struct {
|
||||
BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
|
||||
}
|
||||
|
||||
type Worker struct {
|
||||
P2P P2P `cmd:"" name:"p2p-llama-cpp-rpc" help:"Starts a LocalAI llama.cpp worker in P2P mode (requires a token)"`
|
||||
LLamaCPP LLamaCPP `cmd:"" name:"llama-cpp-rpc" help:"Starts a llama.cpp worker in standalone mode"`
|
||||
}
|
||||
43
core/cli/worker/worker_llamacpp.go
Normal file
43
core/cli/worker/worker_llamacpp.go
Normal file
@@ -0,0 +1,43 @@
|
||||
package worker
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"syscall"
|
||||
|
||||
cliContext "github.com/go-skynet/LocalAI/core/cli/context"
|
||||
"github.com/go-skynet/LocalAI/pkg/assets"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type LLamaCPP struct {
|
||||
Args []string `arg:"" optional:"" name:"models" help:"Model configuration URLs to load"`
|
||||
WorkerFlags `embed:""`
|
||||
}
|
||||
|
||||
func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
|
||||
// Extract files from the embedded FS
|
||||
err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
|
||||
log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
|
||||
if err != nil {
|
||||
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
|
||||
}
|
||||
|
||||
if len(os.Args) < 4 {
|
||||
return fmt.Errorf("usage: local-ai worker llama-cpp-rpc -- <llama-rpc-server-args>")
|
||||
}
|
||||
|
||||
return syscall.Exec(
|
||||
assets.ResolvePath(
|
||||
r.BackendAssetsPath,
|
||||
"util",
|
||||
"llama-cpp-rpc-server",
|
||||
),
|
||||
append([]string{
|
||||
assets.ResolvePath(
|
||||
r.BackendAssetsPath,
|
||||
"util",
|
||||
"llama-cpp-rpc-server",
|
||||
)}, os.Args[4:]...),
|
||||
os.Environ())
|
||||
}
|
||||
16
core/cli/worker/worker_nop2p.go
Normal file
16
core/cli/worker/worker_nop2p.go
Normal file
@@ -0,0 +1,16 @@
|
||||
//go:build !p2p
|
||||
// +build !p2p
|
||||
|
||||
package worker
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
cliContext "github.com/go-skynet/LocalAI/core/cli/context"
|
||||
)
|
||||
|
||||
type P2P struct{}
|
||||
|
||||
func (r *P2P) Run(ctx *cliContext.Context) error {
|
||||
return fmt.Errorf("p2p mode is not enabled in this build")
|
||||
}
|
||||
104
core/cli/worker/worker_p2p.go
Normal file
104
core/cli/worker/worker_p2p.go
Normal file
@@ -0,0 +1,104 @@
|
||||
//go:build p2p
|
||||
// +build p2p
|
||||
|
||||
package worker
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
cliContext "github.com/go-skynet/LocalAI/core/cli/context"
|
||||
"github.com/go-skynet/LocalAI/core/p2p"
|
||||
"github.com/go-skynet/LocalAI/pkg/assets"
|
||||
"github.com/phayes/freeport"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
type P2P struct {
|
||||
WorkerFlags `embed:""`
|
||||
Token string `env:"LOCALAI_TOKEN,TOKEN" help:"JSON list of galleries"`
|
||||
NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
|
||||
RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
|
||||
RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
|
||||
ExtraLLamaCPPArgs []string `env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
|
||||
}
|
||||
|
||||
func (r *P2P) Run(ctx *cliContext.Context) error {
|
||||
// Extract files from the embedded FS
|
||||
err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
|
||||
log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
|
||||
if err != nil {
|
||||
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
|
||||
}
|
||||
|
||||
// Check if the token is set
|
||||
// as we always need it.
|
||||
if r.Token == "" {
|
||||
return fmt.Errorf("Token is required")
|
||||
}
|
||||
|
||||
port, err := freeport.GetFreePort()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
address := "127.0.0.1"
|
||||
|
||||
if r.NoRunner {
|
||||
// Let override which port and address to bind if the user
|
||||
// configure the llama-cpp service on its own
|
||||
p := fmt.Sprint(port)
|
||||
if r.RunnerAddress != "" {
|
||||
address = r.RunnerAddress
|
||||
}
|
||||
if r.RunnerPort != "" {
|
||||
p = r.RunnerPort
|
||||
}
|
||||
|
||||
err = p2p.BindLLamaCPPWorker(context.Background(), address, p, r.Token)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
log.Info().Msgf("You need to start llama-cpp-rpc-server on '%s:%s'", address, p)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Start llama.cpp directly from the version we have pre-packaged
|
||||
go func() {
|
||||
for {
|
||||
log.Info().Msgf("Starting llama-cpp-rpc-server on '%s:%d'", address, port)
|
||||
cmd := exec.Command(
|
||||
assets.ResolvePath(
|
||||
r.BackendAssetsPath,
|
||||
"util",
|
||||
"llama-cpp-rpc-server",
|
||||
),
|
||||
append([]string{"--host", address, "--port", fmt.Sprint(port)}, r.ExtraLLamaCPPArgs...)...,
|
||||
)
|
||||
|
||||
cmd.Env = os.Environ()
|
||||
|
||||
cmd.Stderr = os.Stdout
|
||||
cmd.Stdout = os.Stdout
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
log.Error().Err(err).Msg("Failed to start llama-cpp-rpc-server")
|
||||
}
|
||||
|
||||
cmd.Wait()
|
||||
}
|
||||
}()
|
||||
|
||||
err = p2p.BindLLamaCPPWorker(context.Background(), address, fmt.Sprint(port), r.Token)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for {
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/gallery"
|
||||
"github.com/go-skynet/LocalAI/pkg/xsysinfo"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
@@ -14,6 +15,7 @@ type ApplicationConfig struct {
|
||||
Context context.Context
|
||||
ConfigFile string
|
||||
ModelPath string
|
||||
LibPath string
|
||||
UploadLimitMB, Threads, ContextSize int
|
||||
DisableWebUI bool
|
||||
F16 bool
|
||||
@@ -25,10 +27,12 @@ type ApplicationConfig struct {
|
||||
DynamicConfigsDir string
|
||||
DynamicConfigsDirPollInterval time.Duration
|
||||
CORS bool
|
||||
CSRF bool
|
||||
PreloadJSONModels string
|
||||
PreloadModelsFromPath string
|
||||
CORSAllowOrigins string
|
||||
ApiKeys []string
|
||||
OpaqueErrors bool
|
||||
|
||||
ModelLibraryURL string
|
||||
|
||||
@@ -59,7 +63,6 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
|
||||
opt := &ApplicationConfig{
|
||||
Context: context.Background(),
|
||||
UploadLimitMB: 15,
|
||||
Threads: 1,
|
||||
ContextSize: 512,
|
||||
Debug: true,
|
||||
}
|
||||
@@ -87,12 +90,24 @@ func WithCors(b bool) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithCsrf(b bool) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.CSRF = b
|
||||
}
|
||||
}
|
||||
|
||||
func WithModelLibraryURL(url string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.ModelLibraryURL = url
|
||||
}
|
||||
}
|
||||
|
||||
func WithLibPath(path string) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.LibPath = path
|
||||
}
|
||||
}
|
||||
|
||||
var EnableWatchDog = func(o *ApplicationConfig) {
|
||||
o.WatchDog = true
|
||||
}
|
||||
@@ -213,6 +228,9 @@ func WithUploadLimitMB(limit int) AppOption {
|
||||
|
||||
func WithThreads(threads int) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
if threads == 0 { // 0 is not allowed
|
||||
threads = xsysinfo.CPUPhysicalCores()
|
||||
}
|
||||
o.Threads = threads
|
||||
}
|
||||
}
|
||||
@@ -277,6 +295,12 @@ func WithApiKeys(apiKeys []string) AppOption {
|
||||
}
|
||||
}
|
||||
|
||||
func WithOpaqueErrors(opaque bool) AppOption {
|
||||
return func(o *ApplicationConfig) {
|
||||
o.OpaqueErrors = opaque
|
||||
}
|
||||
}
|
||||
|
||||
// ToConfigLoaderOptions returns a slice of ConfigLoader Option.
|
||||
// Some options defined at the application level are going to be passed as defaults for
|
||||
// all the configuration for the models.
|
||||
@@ -289,6 +313,7 @@ func (o *ApplicationConfig) ToConfigLoaderOptions() []ConfigLoaderOption {
|
||||
LoadOptionDebug(o.Debug),
|
||||
LoadOptionF16(o.F16),
|
||||
LoadOptionThreads(o.Threads),
|
||||
ModelPath(o.ModelPath),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,8 @@ package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/go-skynet/LocalAI/core/schema"
|
||||
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
@@ -13,6 +15,15 @@ const (
|
||||
RAND_SEED = -1
|
||||
)
|
||||
|
||||
type TTSConfig struct {
|
||||
|
||||
// Voice wav path or id
|
||||
Voice string `yaml:"voice"`
|
||||
|
||||
// Vall-e-x
|
||||
VallE VallE `yaml:"vall-e"`
|
||||
}
|
||||
|
||||
type BackendConfig struct {
|
||||
schema.PredictionOptions `yaml:"parameters"`
|
||||
Name string `yaml:"name"`
|
||||
@@ -25,9 +36,11 @@ type BackendConfig struct {
|
||||
Backend string `yaml:"backend"`
|
||||
TemplateConfig TemplateConfig `yaml:"template"`
|
||||
|
||||
PromptStrings, InputStrings []string `yaml:"-"`
|
||||
InputToken [][]int `yaml:"-"`
|
||||
functionCallString, functionCallNameString string `yaml:"-"`
|
||||
PromptStrings, InputStrings []string `yaml:"-"`
|
||||
InputToken [][]int `yaml:"-"`
|
||||
functionCallString, functionCallNameString string `yaml:"-"`
|
||||
ResponseFormat string `yaml:"-"`
|
||||
ResponseFormatMap map[string]interface{} `yaml:"-"`
|
||||
|
||||
FunctionsConfig functions.FunctionsConfig `yaml:"function"`
|
||||
|
||||
@@ -45,8 +58,8 @@ type BackendConfig struct {
|
||||
// GRPC Options
|
||||
GRPC GRPC `yaml:"grpc"`
|
||||
|
||||
// Vall-e-x
|
||||
VallE VallE `yaml:"vall-e"`
|
||||
// TTS specifics
|
||||
TTSConfig `yaml:"tts"`
|
||||
|
||||
// CUDA
|
||||
// Explicitly enable CUDA or not (some backends might need it)
|
||||
@@ -93,6 +106,8 @@ type Diffusers struct {
|
||||
ControlNet string `yaml:"control_net"`
|
||||
}
|
||||
|
||||
// LLMConfig is a struct that holds the configuration that are
|
||||
// generic for most of the LLM backends.
|
||||
type LLMConfig struct {
|
||||
SystemPrompt string `yaml:"system_prompt"`
|
||||
TensorSplit string `yaml:"tensor_split"`
|
||||
@@ -144,6 +159,7 @@ type LLMConfig struct {
|
||||
YarnBetaSlow float32 `yaml:"yarn_beta_slow"`
|
||||
}
|
||||
|
||||
// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
|
||||
type AutoGPTQ struct {
|
||||
ModelBaseName string `yaml:"model_base_name"`
|
||||
Device string `yaml:"device"`
|
||||
@@ -151,13 +167,31 @@ type AutoGPTQ struct {
|
||||
UseFastTokenizer bool `yaml:"use_fast_tokenizer"`
|
||||
}
|
||||
|
||||
// TemplateConfig is a struct that holds the configuration of the templating system
|
||||
type TemplateConfig struct {
|
||||
Chat string `yaml:"chat"`
|
||||
ChatMessage string `yaml:"chat_message"`
|
||||
Completion string `yaml:"completion"`
|
||||
Edit string `yaml:"edit"`
|
||||
Functions string `yaml:"function"`
|
||||
UseTokenizerTemplate bool `yaml:"use_tokenizer_template"`
|
||||
// Chat is the template used in the chat completion endpoint
|
||||
Chat string `yaml:"chat"`
|
||||
|
||||
// ChatMessage is the template used for chat messages
|
||||
ChatMessage string `yaml:"chat_message"`
|
||||
|
||||
// Completion is the template used for completion requests
|
||||
Completion string `yaml:"completion"`
|
||||
|
||||
// Edit is the template used for edit completion requests
|
||||
Edit string `yaml:"edit"`
|
||||
|
||||
// Functions is the template used when tools are present in the client requests
|
||||
Functions string `yaml:"function"`
|
||||
|
||||
// UseTokenizerTemplate is a flag that indicates if the tokenizer template should be used.
|
||||
// Note: this is mostly consumed for backends such as vllm and transformers
|
||||
// that can use the tokenizers specified in the JSON config files of the models
|
||||
UseTokenizerTemplate bool `yaml:"use_tokenizer_template"`
|
||||
|
||||
// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
|
||||
// It defaults to \n
|
||||
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
|
||||
}
|
||||
|
||||
func (c *BackendConfig) SetFunctionCallString(s string) {
|
||||
@@ -334,4 +368,41 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
|
||||
if debug {
|
||||
cfg.Debug = &trueV
|
||||
}
|
||||
|
||||
guessDefaultsFromFile(cfg, lo.modelPath)
|
||||
}
|
||||
|
||||
func (c *BackendConfig) Validate() bool {
|
||||
downloadedFileNames := []string{}
|
||||
for _, f := range c.DownloadFiles {
|
||||
downloadedFileNames = append(downloadedFileNames, f.Filename)
|
||||
}
|
||||
validationTargets := []string{c.Backend, c.Model, c.MMProj}
|
||||
validationTargets = append(validationTargets, downloadedFileNames...)
|
||||
// Simple validation to make sure the model can be correctly loaded
|
||||
for _, n := range validationTargets {
|
||||
if n == "" {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(n, string(os.PathSeparator)) ||
|
||||
strings.Contains(n, "..") {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if c.Name == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
if c.Backend != "" {
|
||||
// a regex that checks that is a string name with no special characters, except '-' and '_'
|
||||
re := regexp.MustCompile(`^[a-zA-Z0-9-_]+$`)
|
||||
return re.MatchString(c.Backend)
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (c *BackendConfig) HasTemplate() bool {
|
||||
return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
|
||||
}
|
||||
|
||||
@@ -19,11 +19,20 @@ import (
|
||||
)
|
||||
|
||||
type BackendConfigLoader struct {
|
||||
configs map[string]BackendConfig
|
||||
configs map[string]BackendConfig
|
||||
modelPath string
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
func NewBackendConfigLoader(modelPath string) *BackendConfigLoader {
|
||||
return &BackendConfigLoader{
|
||||
configs: make(map[string]BackendConfig),
|
||||
modelPath: modelPath,
|
||||
}
|
||||
}
|
||||
|
||||
type LoadOptions struct {
|
||||
modelPath string
|
||||
debug bool
|
||||
threads, ctxSize int
|
||||
f16 bool
|
||||
@@ -47,6 +56,12 @@ func LoadOptionContextSize(ctxSize int) ConfigLoaderOption {
|
||||
}
|
||||
}
|
||||
|
||||
func ModelPath(modelPath string) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.modelPath = modelPath
|
||||
}
|
||||
}
|
||||
|
||||
func LoadOptionF16(f16 bool) ConfigLoaderOption {
|
||||
return func(o *LoadOptions) {
|
||||
o.f16 = f16
|
||||
@@ -61,46 +76,8 @@ func (lo *LoadOptions) Apply(options ...ConfigLoaderOption) {
|
||||
}
|
||||
}
|
||||
|
||||
// Load a config file for a model
|
||||
func (cl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
|
||||
|
||||
// Load a config file if present after the model name
|
||||
cfg := &BackendConfig{
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
Model: modelName,
|
||||
},
|
||||
}
|
||||
|
||||
cfgExisting, exists := cl.GetBackendConfig(modelName)
|
||||
if exists {
|
||||
cfg = &cfgExisting
|
||||
} else {
|
||||
// Try loading a model config file
|
||||
modelConfig := filepath.Join(modelPath, modelName+".yaml")
|
||||
if _, err := os.Stat(modelConfig); err == nil {
|
||||
if err := cl.LoadBackendConfig(
|
||||
modelConfig, opts...,
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||
}
|
||||
cfgExisting, exists = cl.GetBackendConfig(modelName)
|
||||
if exists {
|
||||
cfg = &cfgExisting
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cfg.SetDefaults(opts...)
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func NewBackendConfigLoader() *BackendConfigLoader {
|
||||
return &BackendConfigLoader{
|
||||
configs: make(map[string]BackendConfig),
|
||||
}
|
||||
}
|
||||
func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) {
|
||||
// TODO: either in the next PR or the next commit, I want to merge these down into a single function that looks at the first few characters of the file to determine if we need to deserialize to []BackendConfig or BackendConfig
|
||||
func readMultipleBackendConfigsFromFile(file string, opts ...ConfigLoaderOption) ([]*BackendConfig, error) {
|
||||
c := &[]*BackendConfig{}
|
||||
f, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
@@ -117,7 +94,7 @@ func ReadBackendConfigFile(file string, opts ...ConfigLoaderOption) ([]*BackendC
|
||||
return *c, nil
|
||||
}
|
||||
|
||||
func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
|
||||
func readBackendConfigFromFile(file string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
|
||||
lo := &LoadOptions{}
|
||||
lo.Apply(opts...)
|
||||
|
||||
@@ -134,44 +111,86 @@ func ReadBackendConfig(file string, opts ...ConfigLoaderOption) (*BackendConfig,
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (cm *BackendConfigLoader) LoadBackendConfigFile(file string, opts ...ConfigLoaderOption) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
c, err := ReadBackendConfigFile(file, opts...)
|
||||
// Load a config file for a model
|
||||
func (bcl *BackendConfigLoader) LoadBackendConfigFileByName(modelName, modelPath string, opts ...ConfigLoaderOption) (*BackendConfig, error) {
|
||||
|
||||
// Load a config file if present after the model name
|
||||
cfg := &BackendConfig{
|
||||
PredictionOptions: schema.PredictionOptions{
|
||||
Model: modelName,
|
||||
},
|
||||
}
|
||||
|
||||
cfgExisting, exists := bcl.GetBackendConfig(modelName)
|
||||
if exists {
|
||||
cfg = &cfgExisting
|
||||
} else {
|
||||
// Try loading a model config file
|
||||
modelConfig := filepath.Join(modelPath, modelName+".yaml")
|
||||
if _, err := os.Stat(modelConfig); err == nil {
|
||||
if err := bcl.LoadBackendConfig(
|
||||
modelConfig, opts...,
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
|
||||
}
|
||||
cfgExisting, exists = bcl.GetBackendConfig(modelName)
|
||||
if exists {
|
||||
cfg = &cfgExisting
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cfg.SetDefaults(opts...)
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// This format is currently only used when reading a single file at startup, passed in via ApplicationConfig.ConfigFile
|
||||
func (bcl *BackendConfigLoader) LoadMultipleBackendConfigsSingleFile(file string, opts ...ConfigLoaderOption) error {
|
||||
bcl.Lock()
|
||||
defer bcl.Unlock()
|
||||
c, err := readMultipleBackendConfigsFromFile(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot load config file: %w", err)
|
||||
}
|
||||
|
||||
for _, cc := range c {
|
||||
cm.configs[cc.Name] = *cc
|
||||
if cc.Validate() {
|
||||
bcl.configs[cc.Name] = *cc
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
c, err := ReadBackendConfig(file, opts...)
|
||||
func (bcl *BackendConfigLoader) LoadBackendConfig(file string, opts ...ConfigLoaderOption) error {
|
||||
bcl.Lock()
|
||||
defer bcl.Unlock()
|
||||
c, err := readBackendConfigFromFile(file, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read config file: %w", err)
|
||||
}
|
||||
|
||||
cl.configs[c.Name] = *c
|
||||
if c.Validate() {
|
||||
bcl.configs[c.Name] = *c
|
||||
} else {
|
||||
return fmt.Errorf("config is not valid")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
v, exists := cl.configs[m]
|
||||
func (bcl *BackendConfigLoader) GetBackendConfig(m string) (BackendConfig, bool) {
|
||||
bcl.Lock()
|
||||
defer bcl.Unlock()
|
||||
v, exists := bcl.configs[m]
|
||||
return v, exists
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
func (bcl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
|
||||
bcl.Lock()
|
||||
defer bcl.Unlock()
|
||||
var res []BackendConfig
|
||||
for _, v := range cl.configs {
|
||||
for _, v := range bcl.configs {
|
||||
res = append(res, v)
|
||||
}
|
||||
|
||||
@@ -182,26 +201,16 @@ func (cl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
|
||||
return res
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) RemoveBackendConfig(m string) {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
delete(cl.configs, m)
|
||||
}
|
||||
|
||||
func (cl *BackendConfigLoader) ListBackendConfigs() []string {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
var res []string
|
||||
for k := range cl.configs {
|
||||
res = append(res, k)
|
||||
}
|
||||
return res
|
||||
func (bcl *BackendConfigLoader) RemoveBackendConfig(m string) {
|
||||
bcl.Lock()
|
||||
defer bcl.Unlock()
|
||||
delete(bcl.configs, m)
|
||||
}
|
||||
|
||||
// Preload prepare models if they are not local but url or huggingface repositories
|
||||
func (cl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
cl.Lock()
|
||||
defer cl.Unlock()
|
||||
func (bcl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
bcl.Lock()
|
||||
defer bcl.Unlock()
|
||||
|
||||
status := func(fileName, current, total string, percent float64) {
|
||||
utils.DisplayDownloadFunction(fileName, current, total, percent)
|
||||
@@ -223,7 +232,7 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
}
|
||||
}
|
||||
|
||||
for i, config := range cl.configs {
|
||||
for i, config := range bcl.configs {
|
||||
|
||||
// Download files and verify their SHA
|
||||
for i, file := range config.DownloadFiles {
|
||||
@@ -252,10 +261,10 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
}
|
||||
}
|
||||
|
||||
cc := cl.configs[i]
|
||||
cc := bcl.configs[i]
|
||||
c := &cc
|
||||
c.PredictionOptions.Model = modelFileName
|
||||
cl.configs[i] = *c
|
||||
bcl.configs[i] = *c
|
||||
}
|
||||
|
||||
if config.IsMMProjURL() {
|
||||
@@ -269,22 +278,22 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
}
|
||||
}
|
||||
|
||||
cc := cl.configs[i]
|
||||
cc := bcl.configs[i]
|
||||
c := &cc
|
||||
c.MMProj = modelFileName
|
||||
cl.configs[i] = *c
|
||||
bcl.configs[i] = *c
|
||||
}
|
||||
|
||||
if cl.configs[i].Name != "" {
|
||||
glamText(fmt.Sprintf("**Model name**: _%s_", cl.configs[i].Name))
|
||||
if bcl.configs[i].Name != "" {
|
||||
glamText(fmt.Sprintf("**Model name**: _%s_", bcl.configs[i].Name))
|
||||
}
|
||||
if cl.configs[i].Description != "" {
|
||||
if bcl.configs[i].Description != "" {
|
||||
//glamText("**Description**")
|
||||
glamText(cl.configs[i].Description)
|
||||
glamText(bcl.configs[i].Description)
|
||||
}
|
||||
if cl.configs[i].Usage != "" {
|
||||
if bcl.configs[i].Usage != "" {
|
||||
//glamText("**Usage**")
|
||||
glamText(cl.configs[i].Usage)
|
||||
glamText(bcl.configs[i].Usage)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -292,12 +301,12 @@ func (cl *BackendConfigLoader) Preload(modelPath string) error {
|
||||
|
||||
// LoadBackendConfigsFromPath reads all the configurations of the models from a path
|
||||
// (non-recursive)
|
||||
func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
func (bcl *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...ConfigLoaderOption) error {
|
||||
bcl.Lock()
|
||||
defer bcl.Unlock()
|
||||
entries, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("cannot read directory '%s': %w", path, err)
|
||||
}
|
||||
files := make([]fs.FileInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
@@ -313,9 +322,15 @@ func (cm *BackendConfigLoader) LoadBackendConfigsFromPath(path string, opts ...C
|
||||
strings.HasPrefix(file.Name(), ".") {
|
||||
continue
|
||||
}
|
||||
c, err := ReadBackendConfig(filepath.Join(path, file.Name()), opts...)
|
||||
if err == nil {
|
||||
cm.configs[c.Name] = *c
|
||||
c, err := readBackendConfigFromFile(filepath.Join(path, file.Name()), opts...)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("cannot read config file: %s", file.Name())
|
||||
continue
|
||||
}
|
||||
if c.Validate() {
|
||||
bcl.configs[c.Name] = *c
|
||||
} else {
|
||||
log.Error().Err(err).Msgf("config is not valid")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
63
core/config/backend_config_test.go
Normal file
63
core/config/backend_config_test.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("Test cases for config related functions", func() {
|
||||
Context("Test Read configuration functions", func() {
|
||||
It("Test Validate", func() {
|
||||
tmp, err := os.CreateTemp("", "config.yaml")
|
||||
Expect(err).To(BeNil())
|
||||
defer os.Remove(tmp.Name())
|
||||
_, err = tmp.WriteString(
|
||||
`backend: "foo-bar"
|
||||
parameters:
|
||||
model: "foo-bar"`)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
config, err := readBackendConfigFromFile(tmp.Name())
|
||||
Expect(err).To(BeNil())
|
||||
Expect(config).ToNot(BeNil())
|
||||
Expect(config.Validate()).To(BeFalse())
|
||||
})
|
||||
It("Test Validate", func() {
|
||||
tmp, err := os.CreateTemp("", "config.yaml")
|
||||
Expect(err).To(BeNil())
|
||||
defer os.Remove(tmp.Name())
|
||||
_, err = tmp.WriteString(
|
||||
`name: bar-baz
|
||||
backend: "foo-bar"
|
||||
parameters:
|
||||
model: "foo-bar"`)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
config, err := readBackendConfigFromFile(tmp.Name())
|
||||
Expect(err).To(BeNil())
|
||||
Expect(config).ToNot(BeNil())
|
||||
// two configs in config.yaml
|
||||
Expect(config.Name).To(Equal("bar-baz"))
|
||||
Expect(config.Validate()).To(BeTrue())
|
||||
|
||||
// download https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml
|
||||
httpClient := http.Client{}
|
||||
resp, err := httpClient.Get("https://raw.githubusercontent.com/mudler/LocalAI/master/embedded/models/hermes-2-pro-mistral.yaml")
|
||||
Expect(err).To(BeNil())
|
||||
defer resp.Body.Close()
|
||||
tmp, err = os.CreateTemp("", "config.yaml")
|
||||
Expect(err).To(BeNil())
|
||||
defer os.Remove(tmp.Name())
|
||||
_, err = io.Copy(tmp, resp.Body)
|
||||
Expect(err).To(BeNil())
|
||||
config, err = readBackendConfigFromFile(tmp.Name())
|
||||
Expect(err).To(BeNil())
|
||||
Expect(config).ToNot(BeNil())
|
||||
// two configs in config.yaml
|
||||
Expect(config.Name).To(Equal("hermes-2-pro-mistral"))
|
||||
Expect(config.Validate()).To(BeTrue())
|
||||
})
|
||||
})
|
||||
})
|
||||
13
core/config/config_suite_test.go
Normal file
13
core/config/config_suite_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package config_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestConfig(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Config test suite")
|
||||
}
|
||||
@@ -1,10 +1,8 @@
|
||||
package config_test
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
. "github.com/go-skynet/LocalAI/core/config"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
@@ -17,8 +15,8 @@ var _ = Describe("Test cases for config related functions", func() {
|
||||
|
||||
Context("Test Read configuration functions", func() {
|
||||
configFile = os.Getenv("CONFIG_FILE")
|
||||
It("Test ReadConfigFile", func() {
|
||||
config, err := ReadBackendConfigFile(configFile)
|
||||
It("Test readConfigFile", func() {
|
||||
config, err := readMultipleBackendConfigsFromFile(configFile)
|
||||
Expect(err).To(BeNil())
|
||||
Expect(config).ToNot(BeNil())
|
||||
// two configs in config.yaml
|
||||
@@ -27,26 +25,28 @@ var _ = Describe("Test cases for config related functions", func() {
|
||||
})
|
||||
|
||||
It("Test LoadConfigs", func() {
|
||||
cm := NewBackendConfigLoader()
|
||||
opts := NewApplicationConfig()
|
||||
err := cm.LoadBackendConfigsFromPath(opts.ModelPath)
|
||||
|
||||
bcl := NewBackendConfigLoader(os.Getenv("MODELS_PATH"))
|
||||
err := bcl.LoadBackendConfigsFromPath(os.Getenv("MODELS_PATH"))
|
||||
|
||||
Expect(err).To(BeNil())
|
||||
Expect(cm.ListBackendConfigs()).ToNot(BeNil())
|
||||
configs := bcl.GetAllBackendConfigs()
|
||||
loadedModelNames := []string{}
|
||||
for _, v := range configs {
|
||||
loadedModelNames = append(loadedModelNames, v.Name)
|
||||
}
|
||||
Expect(configs).ToNot(BeNil())
|
||||
|
||||
// config should includes gpt4all models's api.config
|
||||
Expect(cm.ListBackendConfigs()).To(ContainElements("gpt4all"))
|
||||
|
||||
// config should includes gpt2 models's api.config
|
||||
Expect(cm.ListBackendConfigs()).To(ContainElements("gpt4all-2"))
|
||||
Expect(loadedModelNames).To(ContainElements("code-search-ada-code-001"))
|
||||
|
||||
// config should includes text-embedding-ada-002 models's api.config
|
||||
Expect(cm.ListBackendConfigs()).To(ContainElements("text-embedding-ada-002"))
|
||||
Expect(loadedModelNames).To(ContainElements("text-embedding-ada-002"))
|
||||
|
||||
// config should includes rwkv_test models's api.config
|
||||
Expect(cm.ListBackendConfigs()).To(ContainElements("rwkv_test"))
|
||||
Expect(loadedModelNames).To(ContainElements("rwkv_test"))
|
||||
|
||||
// config should includes whisper-1 models's api.config
|
||||
Expect(cm.ListBackendConfigs()).To(ContainElements("whisper-1"))
|
||||
Expect(loadedModelNames).To(ContainElements("whisper-1"))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
226
core/config/guesser.go
Normal file
226
core/config/guesser.go
Normal file
@@ -0,0 +1,226 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
gguf "github.com/thxcode/gguf-parser-go"
|
||||
)
|
||||
|
||||
type familyType uint8
|
||||
|
||||
const (
|
||||
Unknown familyType = iota
|
||||
LLaMa3
|
||||
CommandR
|
||||
Phi3
|
||||
ChatML
|
||||
Mistral03
|
||||
Gemma
|
||||
)
|
||||
|
||||
type settingsConfig struct {
|
||||
StopWords []string
|
||||
TemplateConfig TemplateConfig
|
||||
}
|
||||
|
||||
// default settings to adopt with a given model family
|
||||
var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
|
||||
Gemma: {
|
||||
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input }}\n<|start_of_turn|>model\n",
|
||||
ChatMessage: "<|start_of_turn|>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<|end_of_turn|>",
|
||||
Completion: "{{.Input}}",
|
||||
},
|
||||
},
|
||||
LLaMa3: {
|
||||
StopWords: []string{"<|eot_id|>"},
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "<|begin_of_text|>{{.Input }}\n<|start_header_id|>assistant<|end_header_id|>",
|
||||
ChatMessage: "<|start_header_id|>{{ .RoleName }}<|end_header_id|>\n\n{{.Content }}<|eot_id|>",
|
||||
},
|
||||
},
|
||||
CommandR: {
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input -}}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
||||
Functions: `<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
|
||||
You are a function calling AI model, you can call the following functions:
|
||||
## Available Tools
|
||||
{{range .Functions}}
|
||||
- {"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
When using a tool, reply with JSON, for instance {"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}
|
||||
<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Input -}}`,
|
||||
ChatMessage: `{{if eq .RoleName "user" -}}
|
||||
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if eq .RoleName "system" -}}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if eq .RoleName "assistant" -}}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if eq .RoleName "tool" -}}
|
||||
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
|
||||
{{- else if .FunctionCall -}}
|
||||
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{toJson .FunctionCall}}}<|END_OF_TURN_TOKEN|>
|
||||
{{- end -}}`,
|
||||
},
|
||||
StopWords: []string{"<|END_OF_TURN_TOKEN|>"},
|
||||
},
|
||||
Phi3: {
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input}}\n<|assistant|>",
|
||||
ChatMessage: "<|{{ .RoleName }}|>\n{{.Content}}<|end|>",
|
||||
Completion: "{{.Input}}",
|
||||
},
|
||||
StopWords: []string{"<|end|>", "<|endoftext|>"},
|
||||
},
|
||||
ChatML: {
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input -}}\n<|im_start|>assistant",
|
||||
Functions: `<|im_start|>system
|
||||
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
{{range .Functions}}
|
||||
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
|
||||
{{end}}
|
||||
For each function call return a json object with function name and arguments
|
||||
<|im_end|>
|
||||
{{.Input -}}
|
||||
<|im_start|>assistant`,
|
||||
ChatMessage: `<|im_start|>{{ .RoleName }}
|
||||
{{ if .FunctionCall -}}
|
||||
Function call:
|
||||
{{ else if eq .RoleName "tool" -}}
|
||||
Function response:
|
||||
{{ end -}}
|
||||
{{ if .Content -}}
|
||||
{{.Content }}
|
||||
{{ end -}}
|
||||
{{ if .FunctionCall -}}
|
||||
{{toJson .FunctionCall}}
|
||||
{{ end -}}<|im_end|>`,
|
||||
},
|
||||
StopWords: []string{"<|im_end|>", "<dummy32000>", "</s>"},
|
||||
},
|
||||
Mistral03: {
|
||||
TemplateConfig: TemplateConfig{
|
||||
Chat: "{{.Input -}}",
|
||||
Functions: `[AVAILABLE_TOOLS] [{{range .Functions}}{"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}{{end}} ] [/AVAILABLE_TOOLS]{{.Input }}`,
|
||||
ChatMessage: `{{if eq .RoleName "user" -}}
|
||||
[INST] {{.Content }} [/INST]
|
||||
{{- else if .FunctionCall -}}
|
||||
[TOOL_CALLS] {{toJson .FunctionCall}} [/TOOL_CALLS]
|
||||
{{- else if eq .RoleName "tool" -}}
|
||||
[TOOL_RESULTS] {{.Content}} [/TOOL_RESULTS]
|
||||
{{- else -}}
|
||||
{{ .Content -}}
|
||||
{{ end -}}`,
|
||||
},
|
||||
StopWords: []string{"<|im_end|>", "<dummy32000>", "</tool_call>", "<|eot_id|>", "<|end_of_text|>", "</s>", "[/TOOL_CALLS]", "[/ACTIONS]"},
|
||||
},
|
||||
}
|
||||
|
||||
// this maps well known template used in HF to model families defined above
|
||||
var knownTemplates = map[string]familyType{
|
||||
`{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}`: ChatML,
|
||||
`{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`: Mistral03,
|
||||
}
|
||||
|
||||
func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
|
||||
|
||||
if os.Getenv("LOCALAI_DISABLE_GUESSING") == "true" {
|
||||
log.Debug().Msgf("guessDefaultsFromFile: %s", "guessing disabled with LOCALAI_DISABLE_GUESSING")
|
||||
return
|
||||
}
|
||||
|
||||
if modelPath == "" {
|
||||
log.Debug().Msgf("guessDefaultsFromFile: %s", "modelPath is empty")
|
||||
return
|
||||
}
|
||||
|
||||
if cfg.HasTemplate() {
|
||||
// nothing to guess here
|
||||
log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set")
|
||||
return
|
||||
}
|
||||
|
||||
// We try to guess only if we don't have a template defined already
|
||||
f, err := gguf.ParseGGUFFile(filepath.Join(modelPath, cfg.ModelFileName()))
|
||||
if err != nil {
|
||||
// Only valid for gguf files
|
||||
log.Debug().Msgf("guessDefaultsFromFile: %s", "not a GGUF file")
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug().
|
||||
Any("eosTokenID", f.Tokenizer().EOSTokenID).
|
||||
Any("bosTokenID", f.Tokenizer().BOSTokenID).
|
||||
Any("modelName", f.Model().Name).
|
||||
Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName())
|
||||
|
||||
// guess the name
|
||||
if cfg.Name == "" {
|
||||
cfg.Name = f.Model().Name
|
||||
}
|
||||
|
||||
family := identifyFamily(f)
|
||||
|
||||
if family == Unknown {
|
||||
log.Debug().Msgf("guessDefaultsFromFile: %s", "family not identified")
|
||||
return
|
||||
}
|
||||
|
||||
// identify template
|
||||
settings, ok := defaultsSettings[family]
|
||||
if ok {
|
||||
cfg.TemplateConfig = settings.TemplateConfig
|
||||
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: guessed template %+v", cfg.TemplateConfig)
|
||||
if len(cfg.StopWords) == 0 {
|
||||
cfg.StopWords = settings.StopWords
|
||||
}
|
||||
} else {
|
||||
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
|
||||
}
|
||||
}
|
||||
|
||||
func identifyFamily(f *gguf.GGUFFile) familyType {
|
||||
|
||||
// identify from well known templates first
|
||||
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
|
||||
if found && chatTemplate.ValueString() != "" {
|
||||
if family, ok := knownTemplates[chatTemplate.ValueString()]; ok {
|
||||
return family
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise try to identify from the model properties
|
||||
arch := f.Architecture().Architecture
|
||||
eosTokenID := f.Tokenizer().EOSTokenID
|
||||
bosTokenID := f.Tokenizer().BOSTokenID
|
||||
|
||||
isYI := arch == "llama" && bosTokenID == 1 && eosTokenID == 2
|
||||
// WTF! Mistral0.3 and isYi have same bosTokenID and eosTokenID
|
||||
|
||||
llama3 := arch == "llama" && eosTokenID == 128009
|
||||
commandR := arch == "command-r" && eosTokenID == 255001
|
||||
qwen2 := arch == "qwen2"
|
||||
phi3 := arch == "phi-3"
|
||||
gemma := strings.HasPrefix(f.Model().Name, "gemma")
|
||||
|
||||
switch {
|
||||
case gemma:
|
||||
return Gemma
|
||||
case llama3:
|
||||
return LLaMa3
|
||||
case commandR:
|
||||
return CommandR
|
||||
case phi3:
|
||||
return Phi3
|
||||
case qwen2, isYI:
|
||||
return ChatML
|
||||
default:
|
||||
return Unknown
|
||||
}
|
||||
}
|
||||
46
core/dependencies_manager/manager.go
Normal file
46
core/dependencies_manager/manager.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/go-skynet/LocalAI/pkg/downloader"
|
||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type Asset struct {
|
||||
FileName string `yaml:"filename"`
|
||||
URL string `yaml:"url"`
|
||||
SHA string `yaml:"sha"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
||||
// read the YAML file which contains a list of assets
|
||||
// and download them in the asset path
|
||||
assets := []Asset{}
|
||||
|
||||
assetFile := os.Args[1]
|
||||
destPath := os.Args[2]
|
||||
|
||||
// read the YAML file
|
||||
f, err := os.ReadFile(assetFile)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
// unmarshal the YAML data into a struct
|
||||
if err := yaml.Unmarshal(f, &assets); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// download the assets
|
||||
for _, asset := range assets {
|
||||
if err := downloader.DownloadFile(asset.URL, filepath.Join(destPath, asset.FileName), asset.SHA, 1, 1, utils.DisplayDownloadFunction); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Println("Finished downloading assets")
|
||||
}
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"github.com/gofiber/contrib/fiberzerolog"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
"github.com/gofiber/fiber/v2/middleware/cors"
|
||||
"github.com/gofiber/fiber/v2/middleware/csrf"
|
||||
"github.com/gofiber/fiber/v2/middleware/favicon"
|
||||
"github.com/gofiber/fiber/v2/middleware/filesystem"
|
||||
"github.com/gofiber/fiber/v2/middleware/recover"
|
||||
@@ -65,15 +66,19 @@ var embedDirStatic embed.FS
|
||||
// @name Authorization
|
||||
|
||||
func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) (*fiber.App, error) {
|
||||
// Return errors as JSON responses
|
||||
app := fiber.New(fiber.Config{
|
||||
|
||||
fiberCfg := fiber.Config{
|
||||
Views: renderEngine(),
|
||||
BodyLimit: appConfig.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
|
||||
// We disable the Fiber startup message as it does not conform to structured logging.
|
||||
// We register a startup log line with connection information in the OnListen hook to keep things user friendly though
|
||||
DisableStartupMessage: true,
|
||||
// Override default error handler
|
||||
ErrorHandler: func(ctx *fiber.Ctx, err error) error {
|
||||
}
|
||||
|
||||
if !appConfig.OpaqueErrors {
|
||||
// Normally, return errors as JSON responses
|
||||
fiberCfg.ErrorHandler = func(ctx *fiber.Ctx, err error) error {
|
||||
// Status code defaults to 500
|
||||
code := fiber.StatusInternalServerError
|
||||
|
||||
@@ -89,8 +94,15 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
|
||||
Error: &schema.APIError{Message: err.Error(), Code: code},
|
||||
},
|
||||
)
|
||||
},
|
||||
})
|
||||
}
|
||||
} else {
|
||||
// If OpaqueErrors are required, replace everything with a blank 500.
|
||||
fiberCfg.ErrorHandler = func(ctx *fiber.Ctx, _ error) error {
|
||||
return ctx.Status(500).SendString("")
|
||||
}
|
||||
}
|
||||
|
||||
app := fiber.New(fiberCfg)
|
||||
|
||||
app.Hooks().OnListen(func(listenData fiber.ListenData) error {
|
||||
scheme := "http"
|
||||
@@ -167,12 +179,17 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
|
||||
app.Use(c)
|
||||
}
|
||||
|
||||
if appConfig.CSRF {
|
||||
log.Debug().Msg("Enabling CSRF middleware. Tokens are now required for state-modifying requests")
|
||||
app.Use(csrf.New())
|
||||
}
|
||||
|
||||
// Load config jsons
|
||||
utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
|
||||
utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsFileConfigFile, &openai.AssistantFiles)
|
||||
|
||||
galleryService := services.NewGalleryService(appConfig.ModelPath)
|
||||
galleryService := services.NewGalleryService(appConfig)
|
||||
galleryService.Start(appConfig.Context, cl)
|
||||
|
||||
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth)
|
||||
|
||||
@@ -73,7 +73,8 @@ func getModelStatus(url string) (response map[string]interface{}) {
|
||||
}
|
||||
|
||||
func getModels(url string) (response []gallery.GalleryModel) {
|
||||
downloader.GetURI(url, func(url string, i []byte) error {
|
||||
// TODO: No tests currently seem to exercise file:// urls. Fix?
|
||||
downloader.GetURI(url, "", func(url string, i []byte) error {
|
||||
// Unmarshal YAML data into a struct
|
||||
return json.Unmarshal(i, &response)
|
||||
})
|
||||
@@ -221,6 +222,8 @@ var _ = Describe("API test", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
modelDir = filepath.Join(tmpdir, "models")
|
||||
err = os.Mkdir(modelDir, 0750)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
backendAssetsDir := filepath.Join(tmpdir, "backend-assets")
|
||||
err = os.Mkdir(backendAssetsDir, 0750)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
@@ -241,13 +244,13 @@ var _ = Describe("API test", func() {
|
||||
}
|
||||
out, err := yaml.Marshal(g)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
err = os.WriteFile(filepath.Join(tmpdir, "gallery_simple.yaml"), out, 0600)
|
||||
err = os.WriteFile(filepath.Join(modelDir, "gallery_simple.yaml"), out, 0600)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
galleries := []gallery.Gallery{
|
||||
{
|
||||
Name: "test",
|
||||
URL: "file://" + filepath.Join(tmpdir, "gallery_simple.yaml"),
|
||||
URL: "file://" + filepath.Join(modelDir, "gallery_simple.yaml"),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -243,13 +243,13 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri
|
||||
},
|
||||
elem.H5(
|
||||
attrs.Props{
|
||||
"class": "mb-2 text-xl font-medium leading-tight",
|
||||
"class": "mb-2 text-xl font-bold leading-tight",
|
||||
},
|
||||
elem.Text(m.Name),
|
||||
),
|
||||
elem.P(
|
||||
attrs.Props{
|
||||
"class": "mb-4 text-base",
|
||||
"class": "mb-4 text-sm [&:not(:hover)]:truncate text-base",
|
||||
},
|
||||
elem.Text(m.Description),
|
||||
),
|
||||
|
||||
@@ -52,7 +52,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
|
||||
}
|
||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||
|
||||
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, voiceID, ml, appConfig, *cfg)
|
||||
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, "", voiceID, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -12,10 +12,13 @@ import (
|
||||
)
|
||||
|
||||
// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
|
||||
// @Summary Generates audio from the input text.
|
||||
// @Param request body schema.TTSRequest true "query params"
|
||||
// @Success 200 {string} binary "Response"
|
||||
// @Router /v1/audio/speech [post]
|
||||
// @Summary Generates audio from the input text.
|
||||
// @Accept json
|
||||
// @Produce audio/x-wav
|
||||
// @Param request body schema.TTSRequest true "query params"
|
||||
// @Success 200 {string} binary "generated audio/wav file"
|
||||
// @Router /v1/audio/speech [post]
|
||||
// @Router /tts [post]
|
||||
func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
return func(c *fiber.Ctx) error {
|
||||
|
||||
@@ -40,6 +43,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
log.Err(err)
|
||||
modelFile = input.Model
|
||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||
} else {
|
||||
@@ -51,7 +55,15 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
|
||||
cfg.Backend = input.Backend
|
||||
}
|
||||
|
||||
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, input.Voice, ml, appConfig, *cfg)
|
||||
if input.Language != "" {
|
||||
cfg.Language = input.Language
|
||||
}
|
||||
|
||||
if input.Voice != "" {
|
||||
cfg.Voice = input.Voice
|
||||
}
|
||||
|
||||
filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ import (
|
||||
// @Success 200 {object} schema.OpenAIResponse "Response"
|
||||
// @Router /v1/chat/completions [post]
|
||||
func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||
emptyMessage := ""
|
||||
textContentToReturn := ""
|
||||
id := uuid.New().String()
|
||||
created := int(time.Now().Unix())
|
||||
|
||||
@@ -34,7 +34,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
responses <- initialMessage
|
||||
@@ -67,7 +67,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
return true
|
||||
})
|
||||
|
||||
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
|
||||
result = functions.CleanupLLMResult(result, config.FunctionsConfig)
|
||||
results := functions.ParseFunctionCall(result, config.FunctionsConfig)
|
||||
log.Debug().Msgf("Text content to return: %s", textContentToReturn)
|
||||
noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
|
||||
|
||||
switch {
|
||||
@@ -76,7 +79,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
ID: id,
|
||||
Created: created,
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
|
||||
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
|
||||
Object: "chat.completion.chunk",
|
||||
}
|
||||
responses <- initialMessage
|
||||
@@ -134,7 +137,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||
Choices: []schema.Choice{{
|
||||
Delta: &schema.Message{
|
||||
Role: "assistant",
|
||||
Role: "assistant",
|
||||
Content: &textContentToReturn,
|
||||
ToolCalls: []schema.ToolCall{
|
||||
{
|
||||
Index: i,
|
||||
@@ -181,8 +185,13 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
noActionDescription = config.FunctionsConfig.NoActionDescriptionName
|
||||
}
|
||||
|
||||
if input.ResponseFormat.Type == "json_object" {
|
||||
input.Grammar = functions.JSONBNF
|
||||
if config.ResponseFormatMap != nil {
|
||||
d := schema.ChatCompletionResponseFormat{}
|
||||
dat, _ := json.Marshal(config.ResponseFormatMap)
|
||||
_ = json.Unmarshal(dat, &d)
|
||||
if d.Type == "json_object" {
|
||||
input.Grammar = functions.JSONBNF
|
||||
}
|
||||
}
|
||||
|
||||
config.Grammar = input.Grammar
|
||||
@@ -192,7 +201,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
}
|
||||
|
||||
switch {
|
||||
case !config.FunctionsConfig.NoGrammar && shouldUseFn:
|
||||
case !config.FunctionsConfig.GrammarConfig.NoGrammar && shouldUseFn:
|
||||
noActionGrammar := functions.Function{
|
||||
Name: noActionName,
|
||||
Description: noActionDescription,
|
||||
@@ -219,15 +228,15 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
// Handle if we should return "name" instead of "functions"
|
||||
if config.FunctionsConfig.FunctionName {
|
||||
jsStruct := funcs.ToJSONNameStructure()
|
||||
config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls)
|
||||
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
||||
} else {
|
||||
jsStruct := funcs.ToJSONFunctionStructure()
|
||||
config.Grammar = jsStruct.Grammar("", config.FunctionsConfig.ParallelCalls)
|
||||
config.Grammar = jsStruct.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
||||
}
|
||||
case input.JSONFunctionGrammarObject != nil:
|
||||
config.Grammar = input.JSONFunctionGrammarObject.Grammar("", config.FunctionsConfig.ParallelCalls)
|
||||
config.Grammar = input.JSONFunctionGrammarObject.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
||||
case input.JSONFunctionGrammarObjectName != nil:
|
||||
config.Grammar = input.JSONFunctionGrammarObjectName.Grammar("", config.FunctionsConfig.ParallelCalls)
|
||||
config.Grammar = input.JSONFunctionGrammarObjectName.Grammar(config.FunctionsConfig.GrammarConfig.Options()...)
|
||||
default:
|
||||
// Force picking one of the functions by the request
|
||||
if config.FunctionToCall() != "" {
|
||||
@@ -349,7 +358,12 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
mess = append(mess, content)
|
||||
}
|
||||
|
||||
predInput = strings.Join(mess, "\n")
|
||||
joinCharacter := "\n"
|
||||
if config.TemplateConfig.JoinChatMessagesByCharacter != nil {
|
||||
joinCharacter = *config.TemplateConfig.JoinChatMessagesByCharacter
|
||||
}
|
||||
|
||||
predInput = strings.Join(mess, joinCharacter)
|
||||
log.Debug().Msgf("Prompt (before templating): %s", predInput)
|
||||
|
||||
templateFile := ""
|
||||
@@ -423,7 +437,6 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
if err != nil {
|
||||
log.Debug().Msgf("Sending chunk failed: %v", err)
|
||||
input.Cancel()
|
||||
break
|
||||
}
|
||||
w.Flush()
|
||||
}
|
||||
@@ -443,7 +456,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
{
|
||||
FinishReason: finishReason,
|
||||
Index: 0,
|
||||
Delta: &schema.Message{Content: &emptyMessage},
|
||||
Delta: &schema.Message{Content: &textContentToReturn},
|
||||
}},
|
||||
Object: "chat.completion.chunk",
|
||||
Usage: *usage,
|
||||
@@ -465,7 +478,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
return
|
||||
}
|
||||
|
||||
textContentToReturn = functions.ParseTextContent(s, config.FunctionsConfig)
|
||||
s = functions.CleanupLLMResult(s, config.FunctionsConfig)
|
||||
results := functions.ParseFunctionCall(s, config.FunctionsConfig)
|
||||
log.Debug().Msgf("Text content to return: %s", textContentToReturn)
|
||||
noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
|
||||
|
||||
switch {
|
||||
@@ -493,6 +509,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
if len(input.Tools) > 0 {
|
||||
// If we are using tools, we condense the function calls into
|
||||
// a single response choice with all the tools
|
||||
toolChoice.Message.Content = textContentToReturn
|
||||
toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls,
|
||||
schema.ToolCall{
|
||||
ID: id,
|
||||
@@ -508,7 +525,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
||||
*c = append(*c, schema.Choice{
|
||||
FinishReason: "function_call",
|
||||
Message: &schema.Message{
|
||||
Role: "assistant",
|
||||
Role: "assistant",
|
||||
Content: &textContentToReturn,
|
||||
FunctionCall: map[string]interface{}{
|
||||
"name": name,
|
||||
"arguments": args,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user