Compare commits

..

2 Commits

Author SHA1 Message Date
Ettore Di Giacinto
63c5d843b6 chore(gosec): fix CI
downgrade to latest known version of the gosec action

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-13 19:17:27 +02:00
Ettore Di Giacinto
a9b0e264f2 chore(exllama): drop exllama backend
For polishing and cleaning up it makes now sense to drop exllama which
is completely unmaintained, and was only supporting the llamav1
architecture (nowadays it's superseded by llamav1) .

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2024-09-13 19:09:43 +02:00
77 changed files with 613 additions and 1455 deletions

View File

@@ -9,7 +9,6 @@
# Param 2: email # Param 2: email
# #
config_user() { config_user() {
echo "Configuring git for $1 <$2>"
local gcn=$(git config --global user.name) local gcn=$(git config --global user.name)
if [ -z "${gcn}" ]; then if [ -z "${gcn}" ]; then
echo "Setting up git user / remote" echo "Setting up git user / remote"
@@ -25,7 +24,6 @@ config_user() {
# Param 2: remote url # Param 2: remote url
# #
config_remote() { config_remote() {
echo "Adding git remote and fetching $2 as $1"
local gr=$(git remote -v | grep $1) local gr=$(git remote -v | grep $1)
if [ -z "${gr}" ]; then if [ -z "${gr}" ]; then
git remote add $1 $2 git remote add $1 $2

View File

@@ -29,14 +29,9 @@ def calculate_sha256(file_path):
def manual_safety_check_hf(repo_id): def manual_safety_check_hf(repo_id):
scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan") scanResponse = requests.get('https://huggingface.co/api/models/' + repo_id + "/scan")
scan = scanResponse.json() scan = scanResponse.json()
# Check if 'hasUnsafeFile' exists in the response if scan['hasUnsafeFile']:
if 'hasUnsafeFile' in scan: return scan
if scan['hasUnsafeFile']: return None
return scan
else:
return None
else:
return None
download_type, repo_id_or_url = parse_uri(uri) download_type, repo_id_or_url = parse_uri(uri)

View File

@@ -13,78 +13,6 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
hipblas-jobs:
uses: ./.github/workflows/image_build.yml
with:
tag-latest: ${{ matrix.tag-latest }}
tag-suffix: ${{ matrix.tag-suffix }}
ffmpeg: ${{ matrix.ffmpeg }}
image-type: ${{ matrix.image-type }}
build-type: ${{ matrix.build-type }}
cuda-major-version: ${{ matrix.cuda-major-version }}
cuda-minor-version: ${{ matrix.cuda-minor-version }}
platforms: ${{ matrix.platforms }}
runs-on: ${{ matrix.runs-on }}
base-image: ${{ matrix.base-image }}
grpc-base-image: ${{ matrix.grpc-base-image }}
aio: ${{ matrix.aio }}
makeflags: ${{ matrix.makeflags }}
latest-image: ${{ matrix.latest-image }}
latest-image-aio: ${{ matrix.latest-image-aio }}
secrets:
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
strategy:
# Pushing with all jobs in parallel
# eats the bandwidth of all the nodes
max-parallel: 2
matrix:
include:
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
self-hosted-jobs: self-hosted-jobs:
uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml
with: with:
@@ -111,7 +39,7 @@ jobs:
strategy: strategy:
# Pushing with all jobs in parallel # Pushing with all jobs in parallel
# eats the bandwidth of all the nodes # eats the bandwidth of all the nodes
max-parallel: ${{ github.event_name != 'pull_request' && 5 || 8 }} max-parallel: ${{ github.event_name != 'pull_request' && 6 || 10 }}
matrix: matrix:
include: include:
# Extra images # Extra images
@@ -194,6 +122,29 @@ jobs:
base-image: "ubuntu:22.04" base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'auto'
tag-suffix: '-hipblas-ffmpeg'
ffmpeg: 'true'
image-type: 'extras'
aio: "-aio-gpu-hipblas"
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
latest-image: 'latest-gpu-hipblas'
latest-image-aio: 'latest-aio-gpu-hipblas'
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas'
ffmpeg: 'false'
image-type: 'extras'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'sycl_f16' - build-type: 'sycl_f16'
platforms: 'linux/amd64' platforms: 'linux/amd64'
tag-latest: 'auto' tag-latest: 'auto'
@@ -261,6 +212,26 @@ jobs:
image-type: 'core' image-type: 'core'
runs-on: 'arc-runner-set' runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target" makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-ffmpeg-core'
ffmpeg: 'true'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
- build-type: 'hipblas'
platforms: 'linux/amd64'
tag-latest: 'false'
tag-suffix: '-hipblas-core'
ffmpeg: 'false'
image-type: 'core'
base-image: "rocm/dev-ubuntu-22.04:6.1"
grpc-base-image: "ubuntu:22.04"
runs-on: 'arc-runner-set'
makeflags: "--jobs=3 --output-sync=target"
core-image-build: core-image-build:
uses: ./.github/workflows/image_build.yml uses: ./.github/workflows/image_build.yml

View File

@@ -297,10 +297,10 @@ COPY .git .
RUN make prepare RUN make prepare
## Build the binary ## Build the binary
## If it's CUDA or hipblas, we want to skip some of the llama-compat backends to save space ## If it's CUDA, we want to skip some of the llama-compat backends to save space
## We only leave the most CPU-optimized variant and the fallback for the cublas/hipblas build ## We only leave the most CPU-optimized variant and the fallback for the cublas build
## (both will use CUDA or hipblas for the actual computation) ## (both will use CUDA for the actual computation)
RUN if [ "${BUILD_TYPE}" = "cublas" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \ SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
else \ else \
make build; \ make build; \
@@ -338,8 +338,9 @@ RUN if [ "${FFMPEG}" = "true" ]; then \
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
ssh less wget ssh less && \
# For the devcontainer, leave apt functional in case additional devtools are needed at runtime. apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN go install github.com/go-delve/delve/cmd/dlv@latest RUN go install github.com/go-delve/delve/cmd/dlv@latest

View File

@@ -8,7 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions # llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=70392f1f81470607ba3afef04aa56c9f65587664 CPPLLAMA_VERSION?=e6b7801bd189d102d901d3e72035611a25456ef1
# go-rwkv version # go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
# whisper.cpp version # whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=0d2e2aed80109e8696791083bde3b58e190b7812 WHISPER_CPP_VERSION?=a551933542d956ae84634937acd2942eb40efaaf
# bert.cpp version # bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -359,9 +359,6 @@ clean-tests:
rm -rf test-dir rm -rf test-dir
rm -rf core/http/backend-assets rm -rf core/http/backend-assets
clean-dc: clean
cp -r /build/backend-assets /workspace/backend-assets
## Build: ## Build:
build: prepare backend-assets grpcs ## Build the project build: prepare backend-assets grpcs ## Build the project
$(info ${GREEN}I local-ai build info:${RESET}) $(info ${GREEN}I local-ai build info:${RESET})

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096 context_size: 4096
f16: true f16: true
mmap: true mmap: true
name: gpt-4o name: gpt-4-vision-preview
roles: roles:
user: "USER:" user: "USER:"

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096 context_size: 4096
f16: true f16: true
mmap: true mmap: true
name: gpt-4o name: gpt-4-vision-preview
roles: roles:
user: "USER:" user: "USER:"

View File

@@ -2,7 +2,7 @@ backend: llama-cpp
context_size: 4096 context_size: 4096
mmap: false mmap: false
f16: false f16: false
name: gpt-4o name: gpt-4-vision-preview
roles: roles:
user: "USER:" user: "USER:"

View File

@@ -134,8 +134,6 @@ message PredictOptions {
repeated string Images = 42; repeated string Images = 42;
bool UseTokenizerTemplate = 43; bool UseTokenizerTemplate = 43;
repeated Message Messages = 44; repeated Message Messages = 44;
repeated string Videos = 45;
repeated string Audios = 46;
} }
// The response message containing the result // The response message containing the result

View File

@@ -13,7 +13,6 @@
#include <getopt.h> #include <getopt.h>
#include "clip.h" #include "clip.h"
#include "llava.h" #include "llava.h"
#include "log.h"
#include "stb_image.h" #include "stb_image.h"
#include "common.h" #include "common.h"
#include "json.hpp" #include "json.hpp"
@@ -449,7 +448,7 @@ struct llama_server_context
LOG_INFO("Multi Modal Mode Enabled", {}); LOG_INFO("Multi Modal Mode Enabled", {});
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1); clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
if(clp_ctx == nullptr) { if(clp_ctx == nullptr) {
LOG_ERR("unable to load clip model: %s", params.mmproj.c_str()); LOG_ERROR("unable to load clip model", {{"model", params.mmproj}});
return false; return false;
} }
@@ -463,7 +462,7 @@ struct llama_server_context
ctx = llama_init.context; ctx = llama_init.context;
if (model == nullptr) if (model == nullptr)
{ {
LOG_ERR("unable to load model: %s", params.model.c_str()); LOG_ERROR("unable to load model", {{"model", params.model}});
return false; return false;
} }
@@ -471,7 +470,7 @@ struct llama_server_context
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx); const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
const int n_embd_llm = llama_n_embd(model); const int n_embd_llm = llama_n_embd(model);
if (n_embd_clip != n_embd_llm) { if (n_embd_clip != n_embd_llm) {
LOG("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm); LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
llama_free(ctx); llama_free(ctx);
llama_free_model(model); llama_free_model(model);
return false; return false;
@@ -490,7 +489,7 @@ struct llama_server_context
std::vector<char> buf(1); std::vector<char> buf(1);
int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size()); int res = llama_chat_apply_template(model, nullptr, chat, 1, true, buf.data(), buf.size());
if (res < 0) { if (res < 0) {
LOG_ERR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", __func__); LOG_ERROR("The chat template comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses", {});
sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template sparams.chat_template = "<|im_start|>"; // llama_chat_apply_template only checks if <|im_start|> exist in the template
} }
} }
@@ -813,11 +812,10 @@ struct llama_server_context
img_sl.img_data = clip_image_u8_init(); img_sl.img_data = clip_image_u8_init();
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data)) if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
{ {
LOG_ERR("%s: failed to load image, slot_id: %d, img_sl_id: %d", LOG_ERROR("failed to load image", {
__func__, {"slot_id", slot->id},
slot->id, {"img_sl_id", img_sl.id}
img_sl.id });
);
return false; return false;
} }
LOG_VERBOSE("image loaded", { LOG_VERBOSE("image loaded", {
@@ -855,12 +853,12 @@ struct llama_server_context
} }
} }
if (!found) { if (!found) {
LOG("ERROR: Image with id: %i, not found.\n", img_id); LOG_TEE("ERROR: Image with id: %i, not found.\n", img_id);
slot->images.clear(); slot->images.clear();
return false; return false;
} }
} catch (const std::invalid_argument& e) { } catch (const std::invalid_argument& e) {
LOG("Invalid image number id in prompt\n"); LOG_TEE("Invalid image number id in prompt\n");
slot->images.clear(); slot->images.clear();
return false; return false;
} }
@@ -888,7 +886,7 @@ struct llama_server_context
{"task_id", slot->task_id}, {"task_id", slot->task_id},
}); });
// LOG("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str()); // LOG_TEE("sampling: \n%s\n", llama_sampling_print(slot->sparams).c_str());
return true; return true;
} }
@@ -928,7 +926,7 @@ struct llama_server_context
}; };
if (llama_decode(ctx, batch_view) != 0) if (llama_decode(ctx, batch_view) != 0)
{ {
LOG("%s: llama_decode() failed\n", __func__); LOG_TEE("%s: llama_decode() failed\n", __func__);
return; return;
} }
} }
@@ -940,7 +938,7 @@ struct llama_server_context
} }
} }
LOG("system prompt updated\n"); LOG_TEE("system prompt updated\n");
system_need_update = false; system_need_update = false;
} }
@@ -1122,7 +1120,7 @@ struct llama_server_context
} }
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) { if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
LOG("Error processing the given image"); LOG_TEE("Error processing the given image");
return false; return false;
} }
@@ -1134,7 +1132,7 @@ struct llama_server_context
void send_error(task_server& task, const std::string &error) void send_error(task_server& task, const std::string &error)
{ {
LOG("task %i - error: %s\n", task.id, error.c_str()); LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
task_result res; task_result res;
res.id = task.id; res.id = task.id;
res.multitask_id = task.multitask_id; res.multitask_id = task.multitask_id;
@@ -1373,7 +1371,7 @@ struct llama_server_context
}; };
if (llama_decode(ctx, batch_view)) if (llama_decode(ctx, batch_view))
{ {
LOG("%s : failed to eval\n", __func__); LOG_TEE("%s : failed to eval\n", __func__);
return false; return false;
} }
} }
@@ -1391,7 +1389,7 @@ struct llama_server_context
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, }; llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
if (llama_decode(ctx, batch_img)) if (llama_decode(ctx, batch_img))
{ {
LOG("%s : failed to eval image\n", __func__); LOG_TEE("%s : failed to eval image\n", __func__);
return false; return false;
} }
slot.n_past += n_eval; slot.n_past += n_eval;
@@ -1574,7 +1572,7 @@ struct llama_server_context
slot.n_past = 0; slot.n_past = 0;
slot.truncated = false; slot.truncated = false;
slot.has_next_token = true; slot.has_next_token = true;
LOG("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size()); LOG_TEE("Context exhausted. Slot %d released (%d tokens in cache)\n", slot.id, (int) slot.cache_tokens.size());
continue; continue;
// END LOCALAI changes // END LOCALAI changes
@@ -1822,11 +1820,10 @@ struct llama_server_context
if (has_images && !ingest_images(slot, n_batch)) if (has_images && !ingest_images(slot, n_batch))
{ {
LOG_ERR("%s: failed processing images Slot id : %d, Task id: %d", LOG_ERROR("failed processing images", {
__func__, "slot_id", slot.id,
slot.id, "task_id", slot.task_id,
slot.task_id });
);
// FIXME @phymbert: to be properly tested // FIXME @phymbert: to be properly tested
// early returning without changing the slot state will block the slot for ever // early returning without changing the slot state will block the slot for ever
// no one at the moment is checking the return value // no one at the moment is checking the return value
@@ -1866,10 +1863,10 @@ struct llama_server_context
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1); const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w; const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
LOG("\n"); LOG_TEE("\n");
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd); LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
LOG("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n); LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
LOG("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd); LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd); llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n); llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
@@ -1879,7 +1876,7 @@ struct llama_server_context
slot.ga_i += slot.ga_w / slot.ga_n; slot.ga_i += slot.ga_w / slot.ga_n;
LOG("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i); LOG_TEE("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
} }
slot.n_past_se += n_tokens; slot.n_past_se += n_tokens;
} }
@@ -1904,11 +1901,11 @@ struct llama_server_context
if (n_batch == 1 || ret < 0) if (n_batch == 1 || ret < 0)
{ {
// if you get here, it means the KV cache is full - try increasing it via the context size // if you get here, it means the KV cache is full - try increasing it via the context size
LOG("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret); LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
return false; return false;
} }
LOG("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2); LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
// retry with half the batch size to try to find a free slot in the KV cache // retry with half the batch size to try to find a free slot in the KV cache
n_batch /= 2; n_batch /= 2;

View File

@@ -2,4 +2,4 @@
intel-extension-for-pytorch intel-extension-for-pytorch
torch torch
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -3,6 +3,6 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
transformers transformers
accelerate accelerate

View File

@@ -3,6 +3,6 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers transformers
accelerate accelerate

View File

@@ -1,4 +1,4 @@
coqui-tts TTS==0.22.0
grpcio==1.66.1 grpcio==1.66.1
protobuf protobuf
certifi certifi

View File

@@ -3,7 +3,7 @@ intel-extension-for-pytorch
torch torch
torchvision torchvision
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
diffusers diffusers
opencv-python opencv-python
transformers transformers

View File

@@ -15,12 +15,5 @@ installRequirements
# https://github.com/descriptinc/audiotools/issues/101 # https://github.com/descriptinc/audiotools/issues/101
# incompatible protobuf versions. # incompatible protobuf versions.
PYDIR=python3.10 PYDIR=$(ls ${MY_DIR}/venv/lib)
pyenv="${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/" curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${MY_DIR}/venv/lib/${PYDIR}/site-packages/google/protobuf/internal/builder.py
if [ ! -d ${pyenv} ]; then
echo "(parler-tts/install.sh): Error: ${pyenv} does not exist"
exit 1
fi
curl -L https://raw.githubusercontent.com/protocolbuffers/protobuf/main/python/google/protobuf/internal/builder.py -o ${pyenv}/builder.py

View File

@@ -3,6 +3,6 @@ intel-extension-for-pytorch
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
transformers transformers
accelerate accelerate

View File

@@ -5,4 +5,4 @@ accelerate
torch torch
rerankers[transformers] rerankers[transformers]
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -55,7 +55,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
""" """
model_name = request.Model model_name = request.Model
try: try:
self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode) self.model = SentenceTransformer(model_name)
except Exception as err: except Exception as err:
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")

View File

@@ -2,5 +2,5 @@ torch
accelerate accelerate
transformers transformers
bitsandbytes bitsandbytes
sentence-transformers==3.1.1 sentence-transformers==3.0.1
transformers transformers

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu118
torch torch
accelerate accelerate
sentence-transformers==3.1.1 sentence-transformers==3.0.1
transformers transformers

View File

@@ -1,4 +1,4 @@
torch torch
accelerate accelerate
sentence-transformers==3.1.1 sentence-transformers==3.0.1
transformers transformers

View File

@@ -1,5 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/rocm6.0 --extra-index-url https://download.pytorch.org/whl/rocm6.0
torch torch
accelerate accelerate
sentence-transformers==3.1.1 sentence-transformers==3.0.1
transformers transformers

View File

@@ -4,5 +4,5 @@ torch
optimum[openvino] optimum[openvino]
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
accelerate accelerate
sentence-transformers==3.1.1 sentence-transformers==3.0.1
transformers transformers

View File

@@ -1,5 +1,3 @@
grpcio==1.66.1 grpcio==1.66.1
protobuf protobuf
certifi certifi
datasets
einops

View File

@@ -4,4 +4,4 @@ transformers
accelerate accelerate
torch torch
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -4,4 +4,4 @@ accelerate
torch torch
torchaudio torchaudio
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -4,4 +4,4 @@ accelerate
torch torch
transformers transformers
optimum[openvino] optimum[openvino]
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406

View File

@@ -12,7 +12,7 @@ import (
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) { func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
modelFile := backendConfig.Model modelFile := backendConfig.Model
grpcOpts := GRPCModelOpts(backendConfig) grpcOpts := gRPCModelOpts(backendConfig)
var inferenceModel interface{} var inferenceModel interface{}
var err error var err error

View File

@@ -12,7 +12,7 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
if *threads == 0 && appConfig.Threads != 0 { if *threads == 0 && appConfig.Threads != 0 {
threads = &appConfig.Threads threads = &appConfig.Threads
} }
gRPCOpts := GRPCModelOpts(backendConfig) gRPCOpts := gRPCModelOpts(backendConfig)
opts := modelOpts(backendConfig, appConfig, []model.Option{ opts := modelOpts(backendConfig, appConfig, []model.Option{
model.WithBackendString(backendConfig.Backend), model.WithBackendString(backendConfig.Backend),
model.WithAssetDir(appConfig.AssetsDestination), model.WithAssetDir(appConfig.AssetsDestination),

View File

@@ -31,13 +31,13 @@ type TokenUsage struct {
Completion int Completion int
} }
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) { func ModelInference(ctx context.Context, s string, messages []schema.Message, images []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
modelFile := c.Model modelFile := c.Model
threads := c.Threads threads := c.Threads
if *threads == 0 && o.Threads != 0 { if *threads == 0 && o.Threads != 0 {
threads = &o.Threads threads = &o.Threads
} }
grpcOpts := GRPCModelOpts(c) grpcOpts := gRPCModelOpts(c)
var inferenceModel grpc.Backend var inferenceModel grpc.Backend
var err error var err error
@@ -101,8 +101,6 @@ func ModelInference(ctx context.Context, s string, messages []schema.Message, im
opts.Messages = protoMessages opts.Messages = protoMessages
opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate opts.UseTokenizerTemplate = c.TemplateConfig.UseTokenizerTemplate
opts.Images = images opts.Images = images
opts.Videos = videos
opts.Audios = audios
tokenUsage := TokenUsage{} tokenUsage := TokenUsage{}

View File

@@ -44,7 +44,7 @@ func getSeed(c config.BackendConfig) int32 {
return seed return seed
} }
func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions { func gRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
b := 512 b := 512
if c.Batch != 0 { if c.Batch != 0 {
b = c.Batch b = c.Batch

View File

@@ -15,7 +15,7 @@ func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *mod
return nil, fmt.Errorf("backend is required") return nil, fmt.Errorf("backend is required")
} }
grpcOpts := GRPCModelOpts(backendConfig) grpcOpts := gRPCModelOpts(backendConfig)
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb), model.WithBackendString(bb),

View File

@@ -29,7 +29,7 @@ func SoundGeneration(
return "", nil, fmt.Errorf("backend is a required parameter") return "", nil, fmt.Errorf("backend is a required parameter")
} }
grpcOpts := GRPCModelOpts(backendConfig) grpcOpts := gRPCModelOpts(backendConfig)
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(backend), model.WithBackendString(backend),
model.WithModel(modelFile), model.WithModel(modelFile),

View File

@@ -28,7 +28,7 @@ func ModelTTS(
bb = model.PiperBackend bb = model.PiperBackend
} }
grpcOpts := GRPCModelOpts(backendConfig) grpcOpts := gRPCModelOpts(backendConfig)
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{ opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb), model.WithBackendString(bb),

View File

@@ -41,35 +41,31 @@ type RunCMD struct {
Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"` Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"` ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"` Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"` CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"` CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"` LibraryPath string `env:"LOCALAI_LIBRARY_PATH,LIBRARY_PATH" help:"Path to the library directory (for e.g. external libraries used by backends)" default:"/usr/share/local-ai/libs" group:"backends"`
CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"` CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"` UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"` APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"` DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"` DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"` OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"` Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
DisableApiKeyRequirementForHttpGet bool `env:"LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET" default:"false" help:"If true, a valid API key is not required to issue GET requests to portions of the web ui. This should only be enabled in secure testing environments" group:"hardening"` Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"`
HttpGetExemptedEndpoints []string `env:"LOCALAI_HTTP_GET_EXEMPTED_ENDPOINTS" default:"^/$,^/browse/?$,^/talk/?$,^/p2p/?$,^/chat/?$,^/text2image/?$,^/tts/?$,^/static/.*$,^/swagger.*$" help:"If LOCALAI_DISABLE_API_KEY_REQUIREMENT_FOR_HTTP_GET is overriden to true, this is the list of endpoints to exempt. Only adjust this in case of a security incident or as a result of a personal security posture review" group:"hardening"` Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"`
Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"` Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
Peer2PeerDHTInterval int `env:"LOCALAI_P2P_DHT_INTERVAL,P2P_DHT_INTERVAL" default:"360" name:"p2p-dht-interval" help:"Interval for DHT refresh (used during token generation)" group:"p2p"` Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
Peer2PeerOTPInterval int `env:"LOCALAI_P2P_OTP_INTERVAL,P2P_OTP_INTERVAL" default:"9000" name:"p2p-otp-interval" help:"Interval for OTP refresh (used during token generation)" group:"p2p"` ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"` SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"` PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"` ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"`
SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"` EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"`
PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"` WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"`
ExternalGRPCBackends []string `env:"LOCALAI_EXTERNAL_GRPC_BACKENDS,EXTERNAL_GRPC_BACKENDS" help:"A list of external grpc backends" group:"backends"` EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
EnableWatchdogIdle bool `env:"LOCALAI_WATCHDOG_IDLE,WATCHDOG_IDLE" default:"false" help:"Enable watchdog for stopping backends that are idle longer than the watchdog-idle-timeout" group:"backends"` WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
WatchdogIdleTimeout string `env:"LOCALAI_WATCHDOG_IDLE_TIMEOUT,WATCHDOG_IDLE_TIMEOUT" default:"15m" help:"Threshold beyond which an idle backend should be stopped" group:"backends"` Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"` DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"`
} }
func (r *RunCMD) Run(ctx *cliContext.Context) error { func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -101,11 +97,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithModelsURL(append(r.Models, r.ModelArgs...)...), config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
config.WithOpaqueErrors(r.OpaqueErrors), config.WithOpaqueErrors(r.OpaqueErrors),
config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan), config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
config.WithSubtleKeyComparison(r.UseSubtleKeyComparison),
config.WithDisableApiKeyRequirementForHttpGet(r.DisableApiKeyRequirementForHttpGet),
config.WithHttpGetExemptedEndpoints(r.HttpGetExemptedEndpoints),
config.WithP2PNetworkID(r.Peer2PeerNetworkID), config.WithP2PNetworkID(r.Peer2PeerNetworkID),
config.WithLoadToMemory(r.LoadToMemory),
} }
token := "" token := ""

View File

@@ -4,7 +4,6 @@ import (
"context" "context"
"embed" "embed"
"encoding/json" "encoding/json"
"regexp"
"time" "time"
"github.com/mudler/LocalAI/pkg/xsysinfo" "github.com/mudler/LocalAI/pkg/xsysinfo"
@@ -17,6 +16,7 @@ type ApplicationConfig struct {
ModelPath string ModelPath string
LibPath string LibPath string
UploadLimitMB, Threads, ContextSize int UploadLimitMB, Threads, ContextSize int
DisableWebUI bool
F16 bool F16 bool
Debug bool Debug bool
ImageDir string ImageDir string
@@ -31,18 +31,11 @@ type ApplicationConfig struct {
PreloadModelsFromPath string PreloadModelsFromPath string
CORSAllowOrigins string CORSAllowOrigins string
ApiKeys []string ApiKeys []string
EnforcePredownloadScans bool
OpaqueErrors bool
P2PToken string P2PToken string
P2PNetworkID string P2PNetworkID string
DisableWebUI bool
EnforcePredownloadScans bool
OpaqueErrors bool
UseSubtleKeyComparison bool
DisableApiKeyRequirementForHttpGet bool
HttpGetExemptedEndpoints []*regexp.Regexp
DisableGalleryEndpoint bool
LoadToMemory []string
ModelLibraryURL string ModelLibraryURL string
Galleries []Gallery Galleries []Gallery
@@ -64,6 +57,8 @@ type ApplicationConfig struct {
ModelsURL []string ModelsURL []string
WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
DisableGalleryEndpoint bool
} }
type AppOption func(*ApplicationConfig) type AppOption func(*ApplicationConfig)
@@ -332,38 +327,6 @@ func WithOpaqueErrors(opaque bool) AppOption {
} }
} }
func WithLoadToMemory(models []string) AppOption {
return func(o *ApplicationConfig) {
o.LoadToMemory = models
}
}
func WithSubtleKeyComparison(subtle bool) AppOption {
return func(o *ApplicationConfig) {
o.UseSubtleKeyComparison = subtle
}
}
func WithDisableApiKeyRequirementForHttpGet(required bool) AppOption {
return func(o *ApplicationConfig) {
o.DisableApiKeyRequirementForHttpGet = required
}
}
func WithHttpGetExemptedEndpoints(endpoints []string) AppOption {
return func(o *ApplicationConfig) {
o.HttpGetExemptedEndpoints = []*regexp.Regexp{}
for _, epr := range endpoints {
r, err := regexp.Compile(epr)
if err == nil && r != nil {
o.HttpGetExemptedEndpoints = append(o.HttpGetExemptedEndpoints, r)
} else {
log.Warn().Err(err).Str("regex", epr).Msg("Error while compiling HTTP Get Exemption regex, skipping this entry.")
}
}
}
}
// ToConfigLoaderOptions returns a slice of ConfigLoader Option. // ToConfigLoaderOptions returns a slice of ConfigLoader Option.
// Some options defined at the application level are going to be passed as defaults for // Some options defined at the application level are going to be passed as defaults for
// all the configuration for the models. // all the configuration for the models.

View File

@@ -132,7 +132,7 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*Gal
func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) { func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
var refFile string var refFile string
uri := downloader.URI(url) uri := downloader.URI(url)
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error { err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
refFile = string(d) refFile = string(d)
if len(refFile) == 0 { if len(refFile) == 0 {
return fmt.Errorf("invalid reference file at url %s: %s", url, d) return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@@ -156,7 +156,7 @@ func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel,
} }
uri := downloader.URI(gallery.URL) uri := downloader.URI(gallery.URL)
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error { err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
return yaml.Unmarshal(d, &models) return yaml.Unmarshal(d, &models)
}) })
if err != nil { if err != nil {

View File

@@ -69,7 +69,7 @@ type PromptTemplate struct {
func GetGalleryConfigFromURL(url string, basePath string) (Config, error) { func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
var config Config var config Config
uri := downloader.URI(url) uri := downloader.URI(url)
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error { err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
return yaml.Unmarshal(d, &config) return yaml.Unmarshal(d, &config)
}) })
if err != nil { if err != nil {

View File

@@ -3,15 +3,13 @@ package http
import ( import (
"embed" "embed"
"errors" "errors"
"fmt"
"net/http" "net/http"
"strings"
"github.com/dave-gray101/v2keyauth"
"github.com/mudler/LocalAI/pkg/utils" "github.com/mudler/LocalAI/pkg/utils"
"github.com/mudler/LocalAI/core/http/endpoints/localai" "github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/http/endpoints/openai" "github.com/mudler/LocalAI/core/http/endpoints/openai"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/http/routes" "github.com/mudler/LocalAI/core/http/routes"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/config"
@@ -31,6 +29,24 @@ import (
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
func readAuthHeader(c *fiber.Ctx) string {
authHeader := c.Get("Authorization")
// elevenlabs
xApiKey := c.Get("xi-api-key")
if xApiKey != "" {
authHeader = "Bearer " + xApiKey
}
// anthropic
xApiKey = c.Get("x-api-key")
if xApiKey != "" {
authHeader = "Bearer " + xApiKey
}
return authHeader
}
// Embed a directory // Embed a directory
// //
//go:embed static/* //go:embed static/*
@@ -121,17 +137,37 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
}) })
} }
// Health Checks should always be exempt from auth, so register these first // Auth middleware checking if API key is valid. If no API key is set, no auth is required.
routes.HealthRoutes(app) auth := func(c *fiber.Ctx) error {
if len(appConfig.ApiKeys) == 0 {
return c.Next()
}
kaConfig, err := middleware.GetKeyAuthConfig(appConfig) if len(appConfig.ApiKeys) == 0 {
if err != nil || kaConfig == nil { return c.Next()
return nil, fmt.Errorf("failed to create key auth config: %w", err) }
authHeader := readAuthHeader(c)
if authHeader == "" {
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Authorization header missing"})
}
// If it's a bearer token
authHeaderParts := strings.Split(authHeader, " ")
if len(authHeaderParts) != 2 || authHeaderParts[0] != "Bearer" {
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid Authorization header format"})
}
apiKey := authHeaderParts[1]
for _, key := range appConfig.ApiKeys {
if apiKey == key {
return c.Next()
}
}
return c.Status(fiber.StatusUnauthorized).JSON(fiber.Map{"message": "Invalid API key"})
} }
// Auth is applied to _all_ endpoints. No exceptions. Filtering out endpoints to bypass is the role of the Filter property of the KeyAuth Configuration
app.Use(v2keyauth.New(*kaConfig))
if appConfig.CORS { if appConfig.CORS {
var c func(ctx *fiber.Ctx) error var c func(ctx *fiber.Ctx) error
if appConfig.CORSAllowOrigins == "" { if appConfig.CORSAllowOrigins == "" {
@@ -156,13 +192,13 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
galleryService := services.NewGalleryService(appConfig) galleryService := services.NewGalleryService(appConfig)
galleryService.Start(appConfig.Context, cl) galleryService.Start(appConfig.Context, cl)
routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig) routes.RegisterElevenLabsRoutes(app, cl, ml, appConfig, auth)
routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService) routes.RegisterLocalAIRoutes(app, cl, ml, appConfig, galleryService, auth)
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig) routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth)
if !appConfig.DisableWebUI { if !appConfig.DisableWebUI {
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService) routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth)
} }
routes.RegisterJINARoutes(app, cl, ml, appConfig) routes.RegisterJINARoutes(app, cl, ml, appConfig, auth)
httpFS := http.FS(embedDirStatic) httpFS := http.FS(embedDirStatic)

View File

@@ -31,9 +31,6 @@ import (
"github.com/sashabaranov/go-openai/jsonschema" "github.com/sashabaranov/go-openai/jsonschema"
) )
const apiKey = "joshua"
const bearerKey = "Bearer " + apiKey
const testPrompt = `### System: const testPrompt = `### System:
You are an AI assistant that follows instruction extremely well. Help as much as you can. You are an AI assistant that follows instruction extremely well. Help as much as you can.
@@ -53,19 +50,11 @@ type modelApplyRequest struct {
func getModelStatus(url string) (response map[string]interface{}) { func getModelStatus(url string) (response map[string]interface{}) {
// Create the HTTP request // Create the HTTP request
req, err := http.NewRequest("GET", url, nil) resp, err := http.Get(url)
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", bearerKey)
if err != nil { if err != nil {
fmt.Println("Error creating request:", err) fmt.Println("Error creating request:", err)
return return
} }
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
fmt.Println("Error sending request:", err)
return
}
defer resp.Body.Close() defer resp.Body.Close()
body, err := io.ReadAll(resp.Body) body, err := io.ReadAll(resp.Body)
@@ -83,15 +72,14 @@ func getModelStatus(url string) (response map[string]interface{}) {
return return
} }
func getModels(url string) ([]gallery.GalleryModel, error) { func getModels(url string) (response []gallery.GalleryModel) {
response := []gallery.GalleryModel{}
uri := downloader.URI(url) uri := downloader.URI(url)
// TODO: No tests currently seem to exercise file:// urls. Fix? // TODO: No tests currently seem to exercise file:// urls. Fix?
err := uri.DownloadWithAuthorizationAndCallback("", bearerKey, func(url string, i []byte) error { uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
// Unmarshal YAML data into a struct // Unmarshal YAML data into a struct
return json.Unmarshal(i, &response) return json.Unmarshal(i, &response)
}) })
return response, err return
} }
func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) { func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) {
@@ -113,7 +101,6 @@ func postModelApplyRequest(url string, request modelApplyRequest) (response map[
return return
} }
req.Header.Set("Content-Type", "application/json") req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", bearerKey)
// Make the request // Make the request
client := &http.Client{} client := &http.Client{}
@@ -153,7 +140,6 @@ func postRequestJSON[B any](url string, bodyJson *B) error {
} }
req.Header.Set("Content-Type", "application/json") req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", bearerKey)
client := &http.Client{} client := &http.Client{}
resp, err := client.Do(req) resp, err := client.Do(req)
@@ -189,7 +175,6 @@ func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *
} }
req.Header.Set("Content-Type", "application/json") req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", bearerKey)
client := &http.Client{} client := &http.Client{}
resp, err := client.Do(req) resp, err := client.Do(req)
@@ -210,35 +195,6 @@ func postRequestResponseJSON[B1 any, B2 any](url string, reqJson *B1, respJson *
return json.Unmarshal(body, respJson) return json.Unmarshal(body, respJson)
} }
func postInvalidRequest(url string) (error, int) {
req, err := http.NewRequest("POST", url, bytes.NewBufferString("invalid request"))
if err != nil {
return err, -1
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return err, -1
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return err, -1
}
if resp.StatusCode < 200 || resp.StatusCode >= 400 {
return fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)), resp.StatusCode
}
return nil, resp.StatusCode
}
//go:embed backend-assets/* //go:embed backend-assets/*
var backendAssets embed.FS var backendAssets embed.FS
@@ -304,7 +260,6 @@ var _ = Describe("API test", func() {
config.WithContext(c), config.WithContext(c),
config.WithGalleries(galleries), config.WithGalleries(galleries),
config.WithModelPath(modelDir), config.WithModelPath(modelDir),
config.WithApiKeys([]string{apiKey}),
config.WithBackendAssets(backendAssets), config.WithBackendAssets(backendAssets),
config.WithBackendAssetsOutput(backendAssetsDir))...) config.WithBackendAssetsOutput(backendAssetsDir))...)
Expect(err).ToNot(HaveOccurred()) Expect(err).ToNot(HaveOccurred())
@@ -314,7 +269,7 @@ var _ = Describe("API test", func() {
go app.Listen("127.0.0.1:9090") go app.Listen("127.0.0.1:9090")
defaultConfig := openai.DefaultConfig(apiKey) defaultConfig := openai.DefaultConfig("")
defaultConfig.BaseURL = "http://127.0.0.1:9090/v1" defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
client2 = openaigo.NewClient("") client2 = openaigo.NewClient("")
@@ -340,19 +295,10 @@ var _ = Describe("API test", func() {
Expect(err).To(HaveOccurred()) Expect(err).To(HaveOccurred())
}) })
Context("Auth Tests", func() {
It("Should fail if the api key is missing", func() {
err, sc := postInvalidRequest("http://127.0.0.1:9090/models/available")
Expect(err).ToNot(BeNil())
Expect(sc).To(Equal(403))
})
})
Context("Applying models", func() { Context("Applying models", func() {
It("applies models from a gallery", func() { It("applies models from a gallery", func() {
models, err := getModels("http://127.0.0.1:9090/models/available") models := getModels("http://127.0.0.1:9090/models/available")
Expect(err).To(BeNil())
Expect(len(models)).To(Equal(2), fmt.Sprint(models)) Expect(len(models)).To(Equal(2), fmt.Sprint(models))
Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models)) Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models))
Expect(models[1].Installed).To(BeFalse(), fmt.Sprint(models)) Expect(models[1].Installed).To(BeFalse(), fmt.Sprint(models))
@@ -385,8 +331,7 @@ var _ = Describe("API test", func() {
Expect(content["backend"]).To(Equal("bert-embeddings")) Expect(content["backend"]).To(Equal("bert-embeddings"))
Expect(content["foo"]).To(Equal("bar")) Expect(content["foo"]).To(Equal("bar"))
models, err = getModels("http://127.0.0.1:9090/models/available") models = getModels("http://127.0.0.1:9090/models/available")
Expect(err).To(BeNil())
Expect(len(models)).To(Equal(2), fmt.Sprint(models)) Expect(len(models)).To(Equal(2), fmt.Sprint(models))
Expect(models[0].Name).To(Or(Equal("bert"), Equal("bert2"))) Expect(models[0].Name).To(Or(Equal("bert"), Equal("bert2")))
Expect(models[1].Name).To(Or(Equal("bert"), Equal("bert2"))) Expect(models[1].Name).To(Or(Equal("bert"), Equal("bert2")))

View File

@@ -640,16 +640,8 @@ func handleQuestion(config *config.BackendConfig, input *schema.OpenAIRequest, m
for _, m := range input.Messages { for _, m := range input.Messages {
images = append(images, m.StringImages...) images = append(images, m.StringImages...)
} }
videos := []string{}
for _, m := range input.Messages {
videos = append(videos, m.StringVideos...)
}
audios := []string{}
for _, m := range input.Messages {
audios = append(audios, m.StringAudios...)
}
predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, videos, audios, ml, *config, o, nil) predFunc, err := backend.ModelInference(input.Context, prompt, input.Messages, images, ml, *config, o, nil)
if err != nil { if err != nil {
log.Error().Err(err).Msg("model inference failed") log.Error().Err(err).Msg("model inference failed")
return "", err return "", err

View File

@@ -27,17 +27,9 @@ func ComputeChoices(
for _, m := range req.Messages { for _, m := range req.Messages {
images = append(images, m.StringImages...) images = append(images, m.StringImages...)
} }
videos := []string{}
for _, m := range req.Messages {
videos = append(videos, m.StringVideos...)
}
audios := []string{}
for _, m := range req.Messages {
audios = append(audios, m.StringAudios...)
}
// get the model function to call for the result // get the model function to call for the result
predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, videos, audios, loader, *config, o, tokenCallback) predFunc, err := backend.ModelInference(req.Context, predInput, req.Messages, images, loader, *config, o, tokenCallback)
if err != nil { if err != nil {
return result, backend.TokenUsage{}, err return result, backend.TokenUsage{}, err
} }

View File

@@ -135,7 +135,7 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
} }
// Decode each request's message content // Decode each request's message content
imgIndex, vidIndex, audioIndex := 0, 0, 0 index := 0
for i, m := range input.Messages { for i, m := range input.Messages {
switch content := m.Content.(type) { switch content := m.Content.(type) {
case string: case string:
@@ -144,44 +144,20 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
dat, _ := json.Marshal(content) dat, _ := json.Marshal(content)
c := []schema.Content{} c := []schema.Content{}
json.Unmarshal(dat, &c) json.Unmarshal(dat, &c)
CONTENT:
for _, pp := range c { for _, pp := range c {
switch pp.Type { if pp.Type == "text" {
case "text":
input.Messages[i].StringContent = pp.Text input.Messages[i].StringContent = pp.Text
case "video", "video_url": } else if pp.Type == "image_url" {
// Decode content as base64 either if it's an URL or base64 text // Detect if pp.ImageURL is an URL, if it is download the image and encode it in base64:
base64, err := utils.GetContentURIAsBase64(pp.VideoURL.URL) base64, err := utils.GetImageURLAsBase64(pp.ImageURL.URL)
if err != nil { if err == nil {
log.Error().Msgf("Failed encoding video: %s", err) input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
continue CONTENT // set a placeholder for each image
} input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", index) + input.Messages[i].StringContent
input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff index++
// set a placeholder for each image } else {
input.Messages[i].StringContent = fmt.Sprintf("[vid-%d]", vidIndex) + input.Messages[i].StringContent
vidIndex++
case "audio_url", "audio":
// Decode content as base64 either if it's an URL or base64 text
base64, err := utils.GetContentURIAsBase64(pp.AudioURL.URL)
if err != nil {
log.Error().Msgf("Failed encoding image: %s", err) log.Error().Msgf("Failed encoding image: %s", err)
continue CONTENT
} }
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
// set a placeholder for each image
input.Messages[i].StringContent = fmt.Sprintf("[audio-%d]", audioIndex) + input.Messages[i].StringContent
audioIndex++
case "image_url", "image":
// Decode content as base64 either if it's an URL or base64 text
base64, err := utils.GetContentURIAsBase64(pp.ImageURL.URL)
if err != nil {
log.Error().Msgf("Failed encoding image: %s", err)
continue CONTENT
}
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
// set a placeholder for each image
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", imgIndex) + input.Messages[i].StringContent
imgIndex++
} }
} }
} }

View File

@@ -1,94 +0,0 @@
package middleware
import (
"crypto/subtle"
"errors"
"github.com/dave-gray101/v2keyauth"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/keyauth"
"github.com/mudler/LocalAI/core/config"
)
// This file contains the configuration generators and handler functions that are used along with the fiber/keyauth middleware
// Currently this requires an upstream patch - and feature patches are no longer accepted to v2
// Therefore `dave-gray101/v2keyauth` contains the v2 backport of the middleware until v3 stabilizes and we migrate.
func GetKeyAuthConfig(applicationConfig *config.ApplicationConfig) (*v2keyauth.Config, error) {
customLookup, err := v2keyauth.MultipleKeySourceLookup([]string{"header:Authorization", "header:x-api-key", "header:xi-api-key"}, keyauth.ConfigDefault.AuthScheme)
if err != nil {
return nil, err
}
return &v2keyauth.Config{
CustomKeyLookup: customLookup,
Next: getApiKeyRequiredFilterFunction(applicationConfig),
Validator: getApiKeyValidationFunction(applicationConfig),
ErrorHandler: getApiKeyErrorHandler(applicationConfig),
AuthScheme: "Bearer",
}, nil
}
func getApiKeyErrorHandler(applicationConfig *config.ApplicationConfig) fiber.ErrorHandler {
return func(ctx *fiber.Ctx, err error) error {
if errors.Is(err, v2keyauth.ErrMissingOrMalformedAPIKey) {
if len(applicationConfig.ApiKeys) == 0 {
return ctx.Next() // if no keys are set up, any error we get here is not an error.
}
if applicationConfig.OpaqueErrors {
return ctx.SendStatus(403)
}
return ctx.Status(403).SendString(err.Error())
}
if applicationConfig.OpaqueErrors {
return ctx.SendStatus(500)
}
return err
}
}
func getApiKeyValidationFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx, string) (bool, error) {
if applicationConfig.UseSubtleKeyComparison {
return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
if len(applicationConfig.ApiKeys) == 0 {
return true, nil // If no keys are setup, accept everything
}
for _, validKey := range applicationConfig.ApiKeys {
if subtle.ConstantTimeCompare([]byte(apiKey), []byte(validKey)) == 1 {
return true, nil
}
}
return false, v2keyauth.ErrMissingOrMalformedAPIKey
}
}
return func(ctx *fiber.Ctx, apiKey string) (bool, error) {
if len(applicationConfig.ApiKeys) == 0 {
return true, nil // If no keys are setup, accept everything
}
for _, validKey := range applicationConfig.ApiKeys {
if apiKey == validKey {
return true, nil
}
}
return false, v2keyauth.ErrMissingOrMalformedAPIKey
}
}
func getApiKeyRequiredFilterFunction(applicationConfig *config.ApplicationConfig) func(*fiber.Ctx) bool {
if applicationConfig.DisableApiKeyRequirementForHttpGet {
return func(c *fiber.Ctx) bool {
if c.Method() != "GET" {
return false
}
for _, rx := range applicationConfig.HttpGetExemptedEndpoints {
if rx.MatchString(c.Path()) {
return true
}
}
return false
}
}
return func(c *fiber.Ctx) bool { return false }
}

View File

@@ -10,11 +10,12 @@ import (
func RegisterElevenLabsRoutes(app *fiber.App, func RegisterElevenLabsRoutes(app *fiber.App,
cl *config.BackendConfigLoader, cl *config.BackendConfigLoader,
ml *model.ModelLoader, ml *model.ModelLoader,
appConfig *config.ApplicationConfig) { appConfig *config.ApplicationConfig,
auth func(*fiber.Ctx) error) {
// Elevenlabs // Elevenlabs
app.Post("/v1/text-to-speech/:voice-id", elevenlabs.TTSEndpoint(cl, ml, appConfig)) app.Post("/v1/text-to-speech/:voice-id", auth, elevenlabs.TTSEndpoint(cl, ml, appConfig))
app.Post("/v1/sound-generation", elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig)) app.Post("/v1/sound-generation", auth, elevenlabs.SoundGenerationEndpoint(cl, ml, appConfig))
} }

View File

@@ -1,13 +0,0 @@
package routes
import "github.com/gofiber/fiber/v2"
func HealthRoutes(app *fiber.App) {
// Service health checks
ok := func(c *fiber.Ctx) error {
return c.SendStatus(200)
}
app.Get("/healthz", ok)
app.Get("/readyz", ok)
}

View File

@@ -11,7 +11,8 @@ import (
func RegisterJINARoutes(app *fiber.App, func RegisterJINARoutes(app *fiber.App,
cl *config.BackendConfigLoader, cl *config.BackendConfigLoader,
ml *model.ModelLoader, ml *model.ModelLoader,
appConfig *config.ApplicationConfig) { appConfig *config.ApplicationConfig,
auth func(*fiber.Ctx) error) {
// POST endpoint to mimic the reranking // POST endpoint to mimic the reranking
app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig)) app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))

View File

@@ -15,52 +15,61 @@ func RegisterLocalAIRoutes(app *fiber.App,
cl *config.BackendConfigLoader, cl *config.BackendConfigLoader,
ml *model.ModelLoader, ml *model.ModelLoader,
appConfig *config.ApplicationConfig, appConfig *config.ApplicationConfig,
galleryService *services.GalleryService) { galleryService *services.GalleryService,
auth func(*fiber.Ctx) error) {
app.Get("/swagger/*", swagger.HandlerDefault) // default app.Get("/swagger/*", swagger.HandlerDefault) // default
// LocalAI API endpoints // LocalAI API endpoints
if !appConfig.DisableGalleryEndpoint { if !appConfig.DisableGalleryEndpoint {
modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
app.Post("/models/apply", modelGalleryEndpointService.ApplyModelGalleryEndpoint()) app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
app.Post("/models/delete/:name", modelGalleryEndpointService.DeleteModelGalleryEndpoint()) app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint())
app.Get("/models/available", modelGalleryEndpointService.ListModelFromGalleryEndpoint()) app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
app.Get("/models/galleries", modelGalleryEndpointService.ListModelGalleriesEndpoint()) app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
app.Post("/models/galleries", modelGalleryEndpointService.AddModelGalleryEndpoint()) app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
app.Delete("/models/galleries", modelGalleryEndpointService.RemoveModelGalleryEndpoint()) app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
app.Get("/models/jobs/:uuid", modelGalleryEndpointService.GetOpStatusEndpoint()) app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
app.Get("/models/jobs", modelGalleryEndpointService.GetAllStatusEndpoint()) app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
} }
app.Post("/tts", localai.TTSEndpoint(cl, ml, appConfig)) app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
// Stores // Stores
sl := model.NewModelLoader("") sl := model.NewModelLoader("")
app.Post("/stores/set", localai.StoresSetEndpoint(sl, appConfig)) app.Post("/stores/set", auth, localai.StoresSetEndpoint(sl, appConfig))
app.Post("/stores/delete", localai.StoresDeleteEndpoint(sl, appConfig)) app.Post("/stores/delete", auth, localai.StoresDeleteEndpoint(sl, appConfig))
app.Post("/stores/get", localai.StoresGetEndpoint(sl, appConfig)) app.Post("/stores/get", auth, localai.StoresGetEndpoint(sl, appConfig))
app.Post("/stores/find", localai.StoresFindEndpoint(sl, appConfig)) app.Post("/stores/find", auth, localai.StoresFindEndpoint(sl, appConfig))
app.Get("/metrics", localai.LocalAIMetricsEndpoint()) // Kubernetes health checks
ok := func(c *fiber.Ctx) error {
return c.SendStatus(200)
}
app.Get("/healthz", ok)
app.Get("/readyz", ok)
app.Get("/metrics", auth, localai.LocalAIMetricsEndpoint())
// Experimental Backend Statistics Module // Experimental Backend Statistics Module
backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now backendMonitorService := services.NewBackendMonitorService(ml, cl, appConfig) // Split out for now
app.Get("/backend/monitor", localai.BackendMonitorEndpoint(backendMonitorService)) app.Get("/backend/monitor", auth, localai.BackendMonitorEndpoint(backendMonitorService))
app.Post("/backend/shutdown", localai.BackendShutdownEndpoint(backendMonitorService)) app.Post("/backend/shutdown", auth, localai.BackendShutdownEndpoint(backendMonitorService))
// p2p // p2p
if p2p.IsP2PEnabled() { if p2p.IsP2PEnabled() {
app.Get("/api/p2p", localai.ShowP2PNodes(appConfig)) app.Get("/api/p2p", auth, localai.ShowP2PNodes(appConfig))
app.Get("/api/p2p/token", localai.ShowP2PToken(appConfig)) app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig))
} }
app.Get("/version", func(c *fiber.Ctx) error { app.Get("/version", auth, func(c *fiber.Ctx) error {
return c.JSON(struct { return c.JSON(struct {
Version string `json:"version"` Version string `json:"version"`
}{Version: internal.PrintableVersion()}) }{Version: internal.PrintableVersion()})
}) })
app.Get("/system", localai.SystemInformations(ml, appConfig)) app.Get("/system", auth, localai.SystemInformations(ml, appConfig))
} }

View File

@@ -11,65 +11,66 @@ import (
func RegisterOpenAIRoutes(app *fiber.App, func RegisterOpenAIRoutes(app *fiber.App,
cl *config.BackendConfigLoader, cl *config.BackendConfigLoader,
ml *model.ModelLoader, ml *model.ModelLoader,
appConfig *config.ApplicationConfig) { appConfig *config.ApplicationConfig,
auth func(*fiber.Ctx) error) {
// openAI compatible API endpoint // openAI compatible API endpoint
// chat // chat
app.Post("/v1/chat/completions", openai.ChatEndpoint(cl, ml, appConfig)) app.Post("/v1/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
app.Post("/chat/completions", openai.ChatEndpoint(cl, ml, appConfig)) app.Post("/chat/completions", auth, openai.ChatEndpoint(cl, ml, appConfig))
// edit // edit
app.Post("/v1/edits", openai.EditEndpoint(cl, ml, appConfig)) app.Post("/v1/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
app.Post("/edits", openai.EditEndpoint(cl, ml, appConfig)) app.Post("/edits", auth, openai.EditEndpoint(cl, ml, appConfig))
// assistant // assistant
app.Get("/v1/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig)) app.Get("/v1/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
app.Get("/assistants", openai.ListAssistantsEndpoint(cl, ml, appConfig)) app.Get("/assistants", auth, openai.ListAssistantsEndpoint(cl, ml, appConfig))
app.Post("/v1/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig)) app.Post("/v1/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
app.Post("/assistants", openai.CreateAssistantEndpoint(cl, ml, appConfig)) app.Post("/assistants", auth, openai.CreateAssistantEndpoint(cl, ml, appConfig))
app.Delete("/v1/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig)) app.Delete("/v1/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
app.Delete("/assistants/:assistant_id", openai.DeleteAssistantEndpoint(cl, ml, appConfig)) app.Delete("/assistants/:assistant_id", auth, openai.DeleteAssistantEndpoint(cl, ml, appConfig))
app.Get("/v1/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig)) app.Get("/v1/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
app.Get("/assistants/:assistant_id", openai.GetAssistantEndpoint(cl, ml, appConfig)) app.Get("/assistants/:assistant_id", auth, openai.GetAssistantEndpoint(cl, ml, appConfig))
app.Post("/v1/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig)) app.Post("/v1/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
app.Post("/assistants/:assistant_id", openai.ModifyAssistantEndpoint(cl, ml, appConfig)) app.Post("/assistants/:assistant_id", auth, openai.ModifyAssistantEndpoint(cl, ml, appConfig))
app.Get("/v1/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) app.Get("/v1/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
app.Get("/assistants/:assistant_id/files", openai.ListAssistantFilesEndpoint(cl, ml, appConfig)) app.Get("/assistants/:assistant_id/files", auth, openai.ListAssistantFilesEndpoint(cl, ml, appConfig))
app.Post("/v1/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) app.Post("/v1/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
app.Post("/assistants/:assistant_id/files", openai.CreateAssistantFileEndpoint(cl, ml, appConfig)) app.Post("/assistants/:assistant_id/files", auth, openai.CreateAssistantFileEndpoint(cl, ml, appConfig))
app.Delete("/v1/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) app.Delete("/v1/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
app.Delete("/assistants/:assistant_id/files/:file_id", openai.DeleteAssistantFileEndpoint(cl, ml, appConfig)) app.Delete("/assistants/:assistant_id/files/:file_id", auth, openai.DeleteAssistantFileEndpoint(cl, ml, appConfig))
app.Get("/v1/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig)) app.Get("/v1/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
app.Get("/assistants/:assistant_id/files/:file_id", openai.GetAssistantFileEndpoint(cl, ml, appConfig)) app.Get("/assistants/:assistant_id/files/:file_id", auth, openai.GetAssistantFileEndpoint(cl, ml, appConfig))
// files // files
app.Post("/v1/files", openai.UploadFilesEndpoint(cl, appConfig)) app.Post("/v1/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
app.Post("/files", openai.UploadFilesEndpoint(cl, appConfig)) app.Post("/files", auth, openai.UploadFilesEndpoint(cl, appConfig))
app.Get("/v1/files", openai.ListFilesEndpoint(cl, appConfig)) app.Get("/v1/files", auth, openai.ListFilesEndpoint(cl, appConfig))
app.Get("/files", openai.ListFilesEndpoint(cl, appConfig)) app.Get("/files", auth, openai.ListFilesEndpoint(cl, appConfig))
app.Get("/v1/files/:file_id", openai.GetFilesEndpoint(cl, appConfig)) app.Get("/v1/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
app.Get("/files/:file_id", openai.GetFilesEndpoint(cl, appConfig)) app.Get("/files/:file_id", auth, openai.GetFilesEndpoint(cl, appConfig))
app.Delete("/v1/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig)) app.Delete("/v1/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
app.Delete("/files/:file_id", openai.DeleteFilesEndpoint(cl, appConfig)) app.Delete("/files/:file_id", auth, openai.DeleteFilesEndpoint(cl, appConfig))
app.Get("/v1/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig)) app.Get("/v1/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
app.Get("/files/:file_id/content", openai.GetFilesContentsEndpoint(cl, appConfig)) app.Get("/files/:file_id/content", auth, openai.GetFilesContentsEndpoint(cl, appConfig))
// completion // completion
app.Post("/v1/completions", openai.CompletionEndpoint(cl, ml, appConfig)) app.Post("/v1/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
app.Post("/completions", openai.CompletionEndpoint(cl, ml, appConfig)) app.Post("/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cl, ml, appConfig)) app.Post("/v1/engines/:model/completions", auth, openai.CompletionEndpoint(cl, ml, appConfig))
// embeddings // embeddings
app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig)) app.Post("/v1/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
app.Post("/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig)) app.Post("/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cl, ml, appConfig)) app.Post("/v1/engines/:model/embeddings", auth, openai.EmbeddingsEndpoint(cl, ml, appConfig))
// audio // audio
app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cl, ml, appConfig)) app.Post("/v1/audio/transcriptions", auth, openai.TranscriptEndpoint(cl, ml, appConfig))
app.Post("/v1/audio/speech", localai.TTSEndpoint(cl, ml, appConfig)) app.Post("/v1/audio/speech", auth, localai.TTSEndpoint(cl, ml, appConfig))
// images // images
app.Post("/v1/images/generations", openai.ImageEndpoint(cl, ml, appConfig)) app.Post("/v1/images/generations", auth, openai.ImageEndpoint(cl, ml, appConfig))
if appConfig.ImageDir != "" { if appConfig.ImageDir != "" {
app.Static("/generated-images", appConfig.ImageDir) app.Static("/generated-images", appConfig.ImageDir)
@@ -80,6 +81,6 @@ func RegisterOpenAIRoutes(app *fiber.App,
} }
// List models // List models
app.Get("/v1/models", openai.ListModelsEndpoint(cl, ml)) app.Get("/v1/models", auth, openai.ListModelsEndpoint(cl, ml))
app.Get("/models", openai.ListModelsEndpoint(cl, ml)) app.Get("/models", auth, openai.ListModelsEndpoint(cl, ml))
} }

View File

@@ -59,7 +59,8 @@ func RegisterUIRoutes(app *fiber.App,
cl *config.BackendConfigLoader, cl *config.BackendConfigLoader,
ml *model.ModelLoader, ml *model.ModelLoader,
appConfig *config.ApplicationConfig, appConfig *config.ApplicationConfig,
galleryService *services.GalleryService) { galleryService *services.GalleryService,
auth func(*fiber.Ctx) error) {
// keeps the state of models that are being installed from the UI // keeps the state of models that are being installed from the UI
var processingModels = NewModelOpCache() var processingModels = NewModelOpCache()
@@ -84,10 +85,10 @@ func RegisterUIRoutes(app *fiber.App,
return processingModelsData, taskTypes return processingModelsData, taskTypes
} }
app.Get("/", localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus)) app.Get("/", auth, localai.WelcomeEndpoint(appConfig, cl, ml, modelStatus))
if p2p.IsP2PEnabled() { if p2p.IsP2PEnabled() {
app.Get("/p2p", func(c *fiber.Ctx) error { app.Get("/p2p", auth, func(c *fiber.Ctx) error {
summary := fiber.Map{ summary := fiber.Map{
"Title": "LocalAI - P2P dashboard", "Title": "LocalAI - P2P dashboard",
"Version": internal.PrintableVersion(), "Version": internal.PrintableVersion(),
@@ -103,17 +104,17 @@ func RegisterUIRoutes(app *fiber.App,
}) })
/* show nodes live! */ /* show nodes live! */
app.Get("/p2p/ui/workers", func(c *fiber.Ctx) error { app.Get("/p2p/ui/workers", auth, func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID)))) return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
}) })
app.Get("/p2p/ui/workers-federation", func(c *fiber.Ctx) error { app.Get("/p2p/ui/workers-federation", auth, func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID)))) return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
}) })
app.Get("/p2p/ui/workers-stats", func(c *fiber.Ctx) error { app.Get("/p2p/ui/workers-stats", auth, func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID)))) return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
}) })
app.Get("/p2p/ui/workers-federation-stats", func(c *fiber.Ctx) error { app.Get("/p2p/ui/workers-federation-stats", auth, func(c *fiber.Ctx) error {
return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID)))) return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
}) })
} }
@@ -121,7 +122,7 @@ func RegisterUIRoutes(app *fiber.App,
if !appConfig.DisableGalleryEndpoint { if !appConfig.DisableGalleryEndpoint {
// Show the Models page (all models) // Show the Models page (all models)
app.Get("/browse", func(c *fiber.Ctx) error { app.Get("/browse", auth, func(c *fiber.Ctx) error {
term := c.Query("term") term := c.Query("term")
models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
@@ -166,7 +167,7 @@ func RegisterUIRoutes(app *fiber.App,
// Show the models, filtered from the user input // Show the models, filtered from the user input
// https://htmx.org/examples/active-search/ // https://htmx.org/examples/active-search/
app.Post("/browse/search/models", func(c *fiber.Ctx) error { app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
form := struct { form := struct {
Search string `form:"search"` Search string `form:"search"`
}{} }{}
@@ -187,7 +188,7 @@ func RegisterUIRoutes(app *fiber.App,
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
// https://htmx.org/examples/progress-bar/ // https://htmx.org/examples/progress-bar/
app.Post("/browse/install/model/:id", func(c *fiber.Ctx) error { app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
log.Debug().Msgf("UI job submitted to install : %+v\n", galleryID) log.Debug().Msgf("UI job submitted to install : %+v\n", galleryID)
@@ -214,7 +215,7 @@ func RegisterUIRoutes(app *fiber.App,
// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
// https://htmx.org/examples/progress-bar/ // https://htmx.org/examples/progress-bar/
app.Post("/browse/delete/model/:id", func(c *fiber.Ctx) error { app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error {
galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
log.Debug().Msgf("UI job submitted to delete : %+v\n", galleryID) log.Debug().Msgf("UI job submitted to delete : %+v\n", galleryID)
var galleryName = galleryID var galleryName = galleryID
@@ -254,7 +255,7 @@ func RegisterUIRoutes(app *fiber.App,
// Display the job current progress status // Display the job current progress status
// If the job is done, we trigger the /browse/job/:uid route // If the job is done, we trigger the /browse/job/:uid route
// https://htmx.org/examples/progress-bar/ // https://htmx.org/examples/progress-bar/
app.Get("/browse/job/progress/:uid", func(c *fiber.Ctx) error { app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
status := galleryService.GetStatus(jobUID) status := galleryService.GetStatus(jobUID)
@@ -278,7 +279,7 @@ func RegisterUIRoutes(app *fiber.App,
// this route is hit when the job is done, and we display the // this route is hit when the job is done, and we display the
// final state (for now just displays "Installation completed") // final state (for now just displays "Installation completed")
app.Get("/browse/job/:uid", func(c *fiber.Ctx) error { app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
status := galleryService.GetStatus(jobUID) status := galleryService.GetStatus(jobUID)
@@ -302,7 +303,7 @@ func RegisterUIRoutes(app *fiber.App,
} }
// Show the Chat page // Show the Chat page
app.Get("/chat/:model", func(c *fiber.Ctx) error { app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {
backendConfigs, _ := services.ListModels(cl, ml, "", true) backendConfigs, _ := services.ListModels(cl, ml, "", true)
summary := fiber.Map{ summary := fiber.Map{
@@ -317,7 +318,7 @@ func RegisterUIRoutes(app *fiber.App,
return c.Render("views/chat", summary) return c.Render("views/chat", summary)
}) })
app.Get("/talk/", func(c *fiber.Ctx) error { app.Get("/talk/", auth, func(c *fiber.Ctx) error {
backendConfigs, _ := services.ListModels(cl, ml, "", true) backendConfigs, _ := services.ListModels(cl, ml, "", true)
if len(backendConfigs) == 0 { if len(backendConfigs) == 0 {
@@ -337,7 +338,7 @@ func RegisterUIRoutes(app *fiber.App,
return c.Render("views/talk", summary) return c.Render("views/talk", summary)
}) })
app.Get("/chat/", func(c *fiber.Ctx) error { app.Get("/chat/", auth, func(c *fiber.Ctx) error {
backendConfigs, _ := services.ListModels(cl, ml, "", true) backendConfigs, _ := services.ListModels(cl, ml, "", true)
@@ -358,7 +359,7 @@ func RegisterUIRoutes(app *fiber.App,
return c.Render("views/chat", summary) return c.Render("views/chat", summary)
}) })
app.Get("/text2image/:model", func(c *fiber.Ctx) error { app.Get("/text2image/:model", auth, func(c *fiber.Ctx) error {
backendConfigs := cl.GetAllBackendConfigs() backendConfigs := cl.GetAllBackendConfigs()
summary := fiber.Map{ summary := fiber.Map{
@@ -373,7 +374,7 @@ func RegisterUIRoutes(app *fiber.App,
return c.Render("views/text2image", summary) return c.Render("views/text2image", summary)
}) })
app.Get("/text2image/", func(c *fiber.Ctx) error { app.Get("/text2image/", auth, func(c *fiber.Ctx) error {
backendConfigs := cl.GetAllBackendConfigs() backendConfigs := cl.GetAllBackendConfigs()
@@ -394,7 +395,7 @@ func RegisterUIRoutes(app *fiber.App,
return c.Render("views/text2image", summary) return c.Render("views/text2image", summary)
}) })
app.Get("/tts/:model", func(c *fiber.Ctx) error { app.Get("/tts/:model", auth, func(c *fiber.Ctx) error {
backendConfigs := cl.GetAllBackendConfigs() backendConfigs := cl.GetAllBackendConfigs()
summary := fiber.Map{ summary := fiber.Map{
@@ -409,7 +410,7 @@ func RegisterUIRoutes(app *fiber.App,
return c.Render("views/tts", summary) return c.Render("views/tts", summary)
}) })
app.Get("/tts/", func(c *fiber.Ctx) error { app.Get("/tts/", auth, func(c *fiber.Ctx) error {
backendConfigs := cl.GetAllBackendConfigs() backendConfigs := cl.GetAllBackendConfigs()

View File

@@ -58,8 +58,6 @@ type Content struct {
Type string `json:"type" yaml:"type"` Type string `json:"type" yaml:"type"`
Text string `json:"text" yaml:"text"` Text string `json:"text" yaml:"text"`
ImageURL ContentURL `json:"image_url" yaml:"image_url"` ImageURL ContentURL `json:"image_url" yaml:"image_url"`
AudioURL ContentURL `json:"audio_url" yaml:"audio_url"`
VideoURL ContentURL `json:"video_url" yaml:"video_url"`
} }
type ContentURL struct { type ContentURL struct {
@@ -78,8 +76,6 @@ type Message struct {
StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"` StringContent string `json:"string_content,omitempty" yaml:"string_content,omitempty"`
StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"` StringImages []string `json:"string_images,omitempty" yaml:"string_images,omitempty"`
StringVideos []string `json:"string_videos,omitempty" yaml:"string_videos,omitempty"`
StringAudios []string `json:"string_audios,omitempty" yaml:"string_audios,omitempty"`
// A result of a function call // A result of a function call
FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"` FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`

View File

@@ -1,243 +1,206 @@
package startup package startup
import ( import (
"fmt" "fmt"
"os" "os"
"github.com/mudler/LocalAI/core" "github.com/mudler/LocalAI/core"
"github.com/mudler/LocalAI/core/backend" "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/internal"
"github.com/mudler/LocalAI/internal" "github.com/mudler/LocalAI/pkg/assets"
"github.com/mudler/LocalAI/pkg/assets" "github.com/mudler/LocalAI/pkg/library"
"github.com/mudler/LocalAI/pkg/library" "github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/model" pkgStartup "github.com/mudler/LocalAI/pkg/startup"
pkgStartup "github.com/mudler/LocalAI/pkg/startup" "github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/mudler/LocalAI/pkg/xsysinfo" "github.com/rs/zerolog/log"
"github.com/rs/zerolog/log" )
)
func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) {
func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.ModelLoader, *config.ApplicationConfig, error) { options := config.NewApplicationConfig(opts...)
options := config.NewApplicationConfig(opts...)
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath)
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.ModelPath) log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) caps, err := xsysinfo.CPUCapabilities()
caps, err := xsysinfo.CPUCapabilities() if err == nil {
if err == nil { log.Debug().Msgf("CPU capabilities: %v", caps)
log.Debug().Msgf("CPU capabilities: %v", caps) }
} gpus, err := xsysinfo.GPUs()
gpus, err := xsysinfo.GPUs() if err == nil {
if err == nil { log.Debug().Msgf("GPU count: %d", len(gpus))
log.Debug().Msgf("GPU count: %d", len(gpus)) for _, gpu := range gpus {
for _, gpu := range gpus { log.Debug().Msgf("GPU: %s", gpu.String())
log.Debug().Msgf("GPU: %s", gpu.String()) }
} }
}
// Make sure directories exists
// Make sure directories exists if options.ModelPath == "" {
if options.ModelPath == "" { return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty")
return nil, nil, nil, fmt.Errorf("options.ModelPath cannot be empty") }
} err = os.MkdirAll(options.ModelPath, 0750)
err = os.MkdirAll(options.ModelPath, 0750) if err != nil {
if err != nil { return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err)
return nil, nil, nil, fmt.Errorf("unable to create ModelPath: %q", err) }
} if options.ImageDir != "" {
if options.ImageDir != "" { err := os.MkdirAll(options.ImageDir, 0750)
err := os.MkdirAll(options.ImageDir, 0750) if err != nil {
if err != nil { return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err)
return nil, nil, nil, fmt.Errorf("unable to create ImageDir: %q", err) }
} }
} if options.AudioDir != "" {
if options.AudioDir != "" { err := os.MkdirAll(options.AudioDir, 0750)
err := os.MkdirAll(options.AudioDir, 0750) if err != nil {
if err != nil { return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err)
return nil, nil, nil, fmt.Errorf("unable to create AudioDir: %q", err) }
} }
} if options.UploadDir != "" {
if options.UploadDir != "" { err := os.MkdirAll(options.UploadDir, 0750)
err := os.MkdirAll(options.UploadDir, 0750) if err != nil {
if err != nil { return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err)
return nil, nil, nil, fmt.Errorf("unable to create UploadDir: %q", err) }
} }
}
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil {
if err := pkgStartup.InstallModels(options.Galleries, options.ModelLibraryURL, options.ModelPath, options.EnforcePredownloadScans, nil, options.ModelsURL...); err != nil { log.Error().Err(err).Msg("error installing models")
log.Error().Err(err).Msg("error installing models") }
}
cl := config.NewBackendConfigLoader(options.ModelPath)
cl := config.NewBackendConfigLoader(options.ModelPath) ml := model.NewModelLoader(options.ModelPath)
ml := model.NewModelLoader(options.ModelPath)
configLoaderOpts := options.ToConfigLoaderOptions()
configLoaderOpts := options.ToConfigLoaderOptions()
if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil {
if err := cl.LoadBackendConfigsFromPath(options.ModelPath, configLoaderOpts...); err != nil { log.Error().Err(err).Msg("error loading config files")
log.Error().Err(err).Msg("error loading config files") }
}
if options.ConfigFile != "" {
if options.ConfigFile != "" { if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil {
if err := cl.LoadMultipleBackendConfigsSingleFile(options.ConfigFile, configLoaderOpts...); err != nil { log.Error().Err(err).Msg("error loading config file")
log.Error().Err(err).Msg("error loading config file") }
} }
}
if err := cl.Preload(options.ModelPath); err != nil {
if err := cl.Preload(options.ModelPath); err != nil { log.Error().Err(err).Msg("error downloading models")
log.Error().Err(err).Msg("error downloading models") }
}
if options.PreloadJSONModels != "" {
if options.PreloadJSONModels != "" { if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil {
if err := services.ApplyGalleryFromString(options.ModelPath, options.PreloadJSONModels, options.EnforcePredownloadScans, options.Galleries); err != nil { return nil, nil, nil, err
return nil, nil, nil, err }
} }
}
if options.PreloadModelsFromPath != "" {
if options.PreloadModelsFromPath != "" { if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil {
if err := services.ApplyGalleryFromFile(options.ModelPath, options.PreloadModelsFromPath, options.EnforcePredownloadScans, options.Galleries); err != nil { return nil, nil, nil, err
return nil, nil, nil, err }
} }
}
if options.Debug {
if options.Debug { for _, v := range cl.GetAllBackendConfigs() {
for _, v := range cl.GetAllBackendConfigs() { log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v)
log.Debug().Msgf("Model: %s (config: %+v)", v.Name, v) }
} }
}
if options.AssetsDestination != "" {
if options.AssetsDestination != "" { // Extract files from the embedded FS
// Extract files from the embedded FS err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination) log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination) if err != nil {
if err != nil { log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) }
} }
}
if options.LibPath != "" {
if options.LibPath != "" { // If there is a lib directory, set LD_LIBRARY_PATH to include it
// If there is a lib directory, set LD_LIBRARY_PATH to include it err := library.LoadExternal(options.LibPath)
err := library.LoadExternal(options.LibPath) if err != nil {
if err != nil { log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries")
log.Error().Err(err).Str("LibPath", options.LibPath).Msg("Error while loading external libraries") }
} }
}
// turn off any process that was started by GRPC if the context is canceled
// turn off any process that was started by GRPC if the context is canceled go func() {
go func() { <-options.Context.Done()
<-options.Context.Done() log.Debug().Msgf("Context canceled, shutting down")
log.Debug().Msgf("Context canceled, shutting down") err := ml.StopAllGRPC()
err := ml.StopAllGRPC() if err != nil {
if err != nil { log.Error().Err(err).Msg("error while stopping all grpc backends")
log.Error().Err(err).Msg("error while stopping all grpc backends") }
} }()
}()
if options.WatchDog {
if options.WatchDog { wd := model.NewWatchDog(
wd := model.NewWatchDog( ml,
ml, options.WatchDogBusyTimeout,
options.WatchDogBusyTimeout, options.WatchDogIdleTimeout,
options.WatchDogIdleTimeout, options.WatchDogBusy,
options.WatchDogBusy, options.WatchDogIdle)
options.WatchDogIdle) ml.SetWatchDog(wd)
ml.SetWatchDog(wd) go wd.Run()
go wd.Run() go func() {
go func() { <-options.Context.Done()
<-options.Context.Done() log.Debug().Msgf("Context canceled, shutting down")
log.Debug().Msgf("Context canceled, shutting down") wd.Shutdown()
wd.Shutdown() }()
}() }
}
// Watch the configuration directory
if options.LoadToMemory != nil { startWatcher(options)
for _, m := range options.LoadToMemory {
cfg, err := cl.LoadBackendConfigFileByName(m, options.ModelPath, log.Info().Msg("core/startup process completed!")
config.LoadOptionDebug(options.Debug), return cl, ml, options, nil
config.LoadOptionThreads(options.Threads), }
config.LoadOptionContextSize(options.ContextSize),
config.LoadOptionF16(options.F16), func startWatcher(options *config.ApplicationConfig) {
config.ModelPath(options.ModelPath), if options.DynamicConfigsDir == "" {
) // No need to start the watcher if the directory is not set
if err != nil { return
return nil, nil, nil, err }
}
if _, err := os.Stat(options.DynamicConfigsDir); err != nil {
log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model) if os.IsNotExist(err) {
// We try to create the directory if it does not exist and was specified
grpcOpts := backend.GRPCModelOpts(*cfg) if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil {
o := []model.Option{ log.Error().Err(err).Msg("failed creating DynamicConfigsDir")
model.WithModel(cfg.Model), }
model.WithAssetDir(options.AssetsDestination), } else {
model.WithThreads(uint32(options.Threads)), // something else happened, we log the error and don't start the watcher
model.WithLoadGRPCLoadModelOpts(grpcOpts), log.Error().Err(err).Msg("failed to read DynamicConfigsDir, watcher will not be started")
} return
}
var backendErr error }
if cfg.Backend != "" {
o = append(o, model.WithBackendString(cfg.Backend)) configHandler := newConfigFileHandler(options)
_, backendErr = ml.BackendLoader(o...) if err := configHandler.Watch(); err != nil {
} else { log.Error().Err(err).Msg("failed creating watcher")
_, backendErr = ml.GreedyLoader(o...) }
} }
if backendErr != nil {
return nil, nil, nil, err // In Lieu of a proper DI framework, this function wires up the Application manually.
} // This is in core/startup rather than core/state.go to keep package references clean!
} func createApplication(appConfig *config.ApplicationConfig) *core.Application {
} app := &core.Application{
ApplicationConfig: appConfig,
// Watch the configuration directory BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
startWatcher(options) ModelLoader: model.NewModelLoader(appConfig.ModelPath),
}
log.Info().Msg("core/startup process completed!")
return cl, ml, options, nil var err error
}
// app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
func startWatcher(options *config.ApplicationConfig) { // app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
if options.DynamicConfigsDir == "" { // app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// No need to start the watcher if the directory is not set // app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
return // app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
}
app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
if _, err := os.Stat(options.DynamicConfigsDir); err != nil { app.GalleryService = services.NewGalleryService(app.ApplicationConfig)
if os.IsNotExist(err) { // app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService)
// We try to create the directory if it does not exist and was specified
if err := os.MkdirAll(options.DynamicConfigsDir, 0700); err != nil { app.LocalAIMetricsService, err = services.NewLocalAIMetricsService()
log.Error().Err(err).Msg("failed creating DynamicConfigsDir") if err != nil {
} log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.")
} else { }
// something else happened, we log the error and don't start the watcher
log.Error().Err(err).Msg("failed to read DynamicConfigsDir, watcher will not be started") return app
return }
}
}
configHandler := newConfigFileHandler(options)
if err := configHandler.Watch(); err != nil {
log.Error().Err(err).Msg("failed creating watcher")
}
}
// In Lieu of a proper DI framework, this function wires up the Application manually.
// This is in core/startup rather than core/state.go to keep package references clean!
func createApplication(appConfig *config.ApplicationConfig) *core.Application {
app := &core.Application{
ApplicationConfig: appConfig,
BackendConfigLoader: config.NewBackendConfigLoader(appConfig.ModelPath),
ModelLoader: model.NewModelLoader(appConfig.ModelPath),
}
var err error
// app.EmbeddingsBackendService = backend.NewEmbeddingsBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.ImageGenerationBackendService = backend.NewImageGenerationBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.LLMBackendService = backend.NewLLMBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.TranscriptionBackendService = backend.NewTranscriptionBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
// app.TextToSpeechBackendService = backend.NewTextToSpeechBackendService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
app.BackendMonitorService = services.NewBackendMonitorService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig)
app.GalleryService = services.NewGalleryService(app.ApplicationConfig)
// app.OpenAIService = services.NewOpenAIService(app.ModelLoader, app.BackendConfigLoader, app.ApplicationConfig, app.LLMBackendService)
app.LocalAIMetricsService, err = services.NewLocalAIMetricsService()
if err != nil {
log.Error().Err(err).Msg("encountered an error initializing metrics service, startup will continue but metrics will not be tracked.")
}
return app
}

View File

@@ -154,7 +154,7 @@ Images are available with and without python dependencies. Note that images with
Images with `core` in the tag are smaller and do not contain any python dependencies. Images with `core` in the tag are smaller and do not contain any python dependencies.
{{< tabs tabTotal="7" >}} {{< tabs tabTotal="6" >}}
{{% tab tabName="Vanilla / CPU Images" %}} {{% tab tabName="Vanilla / CPU Images" %}}
| Description | Quay | Docker Hub | | Description | Quay | Docker Hub |
@@ -227,15 +227,6 @@ Images with `core` in the tag are smaller and do not contain any python dependen
{{% /tab %}} {{% /tab %}}
{{% tab tabName="Vulkan Images" %}}
| Description | Quay | Docker Hub |
| --- | --- |-------------------------------------------------------------|
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai: master-vulkan-ffmpeg-core ` | `localai/localai: master-vulkan-ffmpeg-core ` |
| Latest tag | `quay.io/go-skynet/local-ai: latest-vulkan-ffmpeg-core ` | `localai/localai: latest-vulkan-ffmpeg-core` |
| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-vulkan-fmpeg-core` | `localai/localai:{{< version >}}-vulkan-fmpeg-core` |
{{% /tab %}}
{{< /tabs >}} {{< /tabs >}}
## See Also ## See Also

View File

@@ -1,3 +1,3 @@
{ {
"version": "v2.21.0" "version": "v2.20.1"
} }

View File

@@ -39,7 +39,7 @@ func init() {
func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) { func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) {
remoteLibrary := map[string]string{} remoteLibrary := map[string]string{}
uri := downloader.URI(url) uri := downloader.URI(url)
err := uri.DownloadWithCallback(basePath, func(_ string, i []byte) error { err := uri.DownloadAndUnmarshal(basePath, func(_ string, i []byte) error {
return yaml.Unmarshal(i, &remoteLibrary) return yaml.Unmarshal(i, &remoteLibrary)
}) })
if err != nil { if err != nil {

View File

@@ -1,6 +1,6 @@
llama_index==0.11.12 llama_index==0.11.7
requests==2.32.3 requests==2.32.3
weaviate_client==4.8.1 weaviate_client==4.6.7
transformers transformers
torch torch
chainlit chainlit

View File

@@ -1,2 +1,2 @@
langchain==0.3.0 langchain==0.2.16
openai==1.47.1 openai==1.44.0

View File

@@ -1,4 +1,4 @@
langchain==0.3.0 langchain==0.2.16
openai==1.47.1 openai==1.44.1
chromadb==0.5.7 chromadb==0.5.5
llama-index==0.11.12 llama-index==0.11.7

View File

@@ -8,9 +8,9 @@ colorama==0.4.6
dataclasses-json==0.6.7 dataclasses-json==0.6.7
debugpy==1.8.2 debugpy==1.8.2
frozenlist==1.4.1 frozenlist==1.4.1
greenlet==3.1.0 greenlet==3.0.3
idna==3.10 idna==3.8
langchain==0.3.0 langchain==0.2.16
langchain-community==0.2.16 langchain-community==0.2.16
marshmallow==3.22.0 marshmallow==3.22.0
marshmallow-enum==1.5.1 marshmallow-enum==1.5.1
@@ -18,16 +18,16 @@ multidict==6.0.5
mypy-extensions==1.0.0 mypy-extensions==1.0.0
numexpr==2.10.1 numexpr==2.10.1
numpy==2.1.1 numpy==2.1.1
openai==1.45.1 openai==1.44.0
openapi-schema-pydantic==1.2.4 openapi-schema-pydantic==1.2.4
packaging>=23.2 packaging>=23.2
pydantic==2.9.2 pydantic==2.8.2
PyYAML==6.0.2 PyYAML==6.0.2
requests==2.32.3 requests==2.32.3
SQLAlchemy==2.0.35 SQLAlchemy==2.0.32
tenacity==8.5.0 tenacity==8.5.0
tqdm==4.66.5 tqdm==4.66.5
typing-inspect==0.9.0 typing-inspect==0.9.0
typing_extensions==4.12.2 typing_extensions==4.12.2
urllib3==2.2.3 urllib3==2.2.2
yarl==1.11.1 yarl==1.11.0

View File

@@ -1,166 +1,4 @@
--- ---
## Qwen2.5
- &qwen25
name: "qwen2.5-14b-instruct"
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
license: apache-2.0
description: |
Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.
tags:
- llm
- gguf
- gpu
- qwen
- qwen2.5
- cpu
urls:
- https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF
- https://huggingface.co/Qwen/Qwen2.5-7B-Instruct
overrides:
parameters:
model: Qwen2.5-14B-Instruct-Q4_K_M.gguf
files:
- filename: Qwen2.5-14B-Instruct-Q4_K_M.gguf
sha256: e47ad95dad6ff848b431053b375adb5d39321290ea2c638682577dafca87c008
uri: huggingface://bartowski/Qwen2.5-14B-Instruct-GGUF/Qwen2.5-14B-Instruct-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "qwen2.5-math-7b-instruct"
urls:
- https://huggingface.co/bartowski/Qwen2.5-Math-7B-Instruct-GGUF
- https://huggingface.co/Qwen/Qwen2.5-Math-7B-Instruct
description: |
In August 2024, we released the first series of mathematical LLMs - Qwen2-Math - of our Qwen family. A month later, we have upgraded it and open-sourced Qwen2.5-Math series, including base models Qwen2.5-Math-1.5B/7B/72B, instruction-tuned models Qwen2.5-Math-1.5B/7B/72B-Instruct, and mathematical reward model Qwen2.5-Math-RM-72B.
Unlike Qwen2-Math series which only supports using Chain-of-Thught (CoT) to solve English math problems, Qwen2.5-Math series is expanded to support using both CoT and Tool-integrated Reasoning (TIR) to solve math problems in both Chinese and English. The Qwen2.5-Math series models have achieved significant performance improvements compared to the Qwen2-Math series models on the Chinese and English mathematics benchmarks with CoT.
The base models of Qwen2-Math are initialized with Qwen2-1.5B/7B/72B, and then pretrained on a meticulously designed Mathematics-specific Corpus. This corpus contains large-scale high-quality mathematical web texts, books, codes, exam questions, and mathematical pre-training data synthesized by Qwen2.
overrides:
parameters:
model: Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf
files:
- filename: Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf
sha256: 7e03cee8c65b9ebf9ca14ddb010aca27b6b18e6c70f2779e94e7451d9529c091
uri: huggingface://bartowski/Qwen2.5-Math-7B-Instruct-GGUF/Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "qwen2.5-14b_uncencored"
icon: https://huggingface.co/SicariusSicariiStuff/Phi-3.5-mini-instruct_Uncensored/resolve/main/Misc/Uncensored.png
urls:
- https://huggingface.co/SicariusSicariiStuff/Qwen2.5-14B_Uncencored
- https://huggingface.co/bartowski/Qwen2.5-14B_Uncencored-GGUF
description: |
Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.
Uncensored qwen2.5
tags:
- llm
- gguf
- gpu
- qwen
- qwen2.5
- cpu
- uncensored
overrides:
parameters:
model: Qwen2.5-14B_Uncencored-Q4_K_M.gguf
files:
- filename: Qwen2.5-14B_Uncencored-Q4_K_M.gguf
sha256: 066b9341b67e0fd0956de3576a3b7988574a5b9a0028aef2b9c8edeadd6dbbd1
uri: huggingface://bartowski/Qwen2.5-14B_Uncencored-GGUF/Qwen2.5-14B_Uncencored-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "qwen2.5-coder-7b-instruct"
urls:
- https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct
- https://huggingface.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF
description: |
Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). For Qwen2.5-Coder, we release three base language models and instruction-tuned language models, 1.5, 7 and 32 (coming soon) billion parameters. Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:
Significantly improvements in code generation, code reasoning and code fixing. Base on the strong Qwen2.5, we scale up the training tokens into 5.5 trillion including source code, text-code grounding, Synthetic data, etc.
A more comprehensive foundation for real-world applications such as Code Agents. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.
Long-context Support up to 128K tokens.
overrides:
parameters:
model: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
files:
- filename: Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
sha256: 1664fccab734674a50763490a8c6931b70e3f2f8ec10031b54806d30e5f956b6
uri: huggingface://bartowski/Qwen2.5-Coder-7B-Instruct-GGUF/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "qwen2.5-math-72b-instruct"
icon: http://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen2.5/qwen2.5-math-pipeline.jpeg
urls:
- https://huggingface.co/Qwen/Qwen2.5-Math-72B-Instruct
- https://huggingface.co/bartowski/Qwen2.5-Math-72B-Instruct-GGUF
description: |
In August 2024, we released the first series of mathematical LLMs - Qwen2-Math - of our Qwen family. A month later, we have upgraded it and open-sourced Qwen2.5-Math series, including base models Qwen2.5-Math-1.5B/7B/72B, instruction-tuned models Qwen2.5-Math-1.5B/7B/72B-Instruct, and mathematical reward model Qwen2.5-Math-RM-72B.
Unlike Qwen2-Math series which only supports using Chain-of-Thught (CoT) to solve English math problems, Qwen2.5-Math series is expanded to support using both CoT and Tool-integrated Reasoning (TIR) to solve math problems in both Chinese and English. The Qwen2.5-Math series models have achieved significant performance improvements compared to the Qwen2-Math series models on the Chinese and English mathematics benchmarks with CoT
overrides:
parameters:
model: Qwen2.5-Math-72B-Instruct-Q4_K_M.gguf
files:
- filename: Qwen2.5-Math-72B-Instruct-Q4_K_M.gguf
sha256: 5dee8a6e21d555577712b4f65565a3c3737a0d5d92f5a82970728c6d8e237f17
uri: huggingface://bartowski/Qwen2.5-Math-72B-Instruct-GGUF/Qwen2.5-Math-72B-Instruct-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "qwen2.5-0.5b-instruct"
urls:
- https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct
- https://huggingface.co/bartowski/Qwen2.5-0.5B-Instruct-GGUF
overrides:
parameters:
model: Qwen2.5-0.5B-Instruct-Q4_K_M.gguf
files:
- filename: Qwen2.5-0.5B-Instruct-Q4_K_M.gguf
sha256: 6eb923e7d26e9cea28811e1a8e852009b21242fb157b26149d3b188f3a8c8653
uri: huggingface://bartowski/Qwen2.5-0.5B-Instruct-GGUF/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "qwen2.5-1.5b-instruct"
urls:
- https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct
- https://huggingface.co/bartowski/Qwen2.5-1.5B-Instruct-GGUF
overrides:
parameters:
model: Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
files:
- filename: Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
sha256: 1adf0b11065d8ad2e8123ea110d1ec956dab4ab038eab665614adba04b6c3370
uri: huggingface://bartowski/Qwen2.5-1.5B-Instruct-GGUF/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "qwen2.5-32b"
urls:
- https://huggingface.co/Qwen/Qwen2.5-32B
- https://huggingface.co/mradermacher/Qwen2.5-32B-GGUF
overrides:
parameters:
model: Qwen2.5-32B.Q4_K_M.gguf
files:
- filename: Qwen2.5-32B.Q4_K_M.gguf
sha256: 02703e27c8b964db445444581a6937ad7538f0c32a100b26b49fa0e8ff527155
uri: huggingface://mradermacher/Qwen2.5-32B-GGUF/Qwen2.5-32B.Q4_K_M.gguf
- !!merge <<: *qwen25
name: "qwen2.5-32b-instruct"
urls:
- https://huggingface.co/Qwen/Qwen2.5-32B-Instruct
- https://huggingface.co/bartowski/Qwen2.5-32B-Instruct-GGUF
overrides:
parameters:
model: Qwen2.5-32B-Instruct-Q4_K_M.gguf
files:
- filename: Qwen2.5-32B-Instruct-Q4_K_M.gguf
sha256: 2e5f6daea180dbc59f65a40641e94d3973b5dbaa32b3c0acf54647fa874e519e
uri: huggingface://bartowski/Qwen2.5-32B-Instruct-GGUF/Qwen2.5-32B-Instruct-Q4_K_M.gguf
- !!merge <<: *qwen25
name: "qwen2.5-72b-instruct"
urls:
- https://huggingface.co/Qwen/Qwen2.5-72B-Instruct
- https://huggingface.co/bartowski/Qwen2.5-72B-Instruct-GGUF
overrides:
parameters:
model: Qwen2.5-72B-Instruct-Q4_K_M.gguf
files:
- filename: Qwen2.5-72B-Instruct-Q4_K_M.gguf
sha256: e4c8fad16946be8cf0bbf67eb8f4e18fc7415a5a6d2854b4cda453edb4082545
uri: huggingface://bartowski/Qwen2.5-72B-Instruct-GGUF/Qwen2.5-72B-Instruct-Q4_K_M.gguf
## SmolLM ## SmolLM
- &smollm - &smollm
url: "github:mudler/LocalAI/gallery/chatml.yaml@master" url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
@@ -601,75 +439,6 @@
- filename: Reflection-Llama-3.1-70B-q4_k_m.gguf - filename: Reflection-Llama-3.1-70B-q4_k_m.gguf
sha256: 16064e07037883a750cfeae9a7be41143aa857dbac81c2e93c68e2f941dee7b2 sha256: 16064e07037883a750cfeae9a7be41143aa857dbac81c2e93c68e2f941dee7b2
uri: huggingface://senseable/Reflection-Llama-3.1-70B-gguf/Reflection-Llama-3.1-70B-q4_k_m.gguf uri: huggingface://senseable/Reflection-Llama-3.1-70B-gguf/Reflection-Llama-3.1-70B-q4_k_m.gguf
- !!merge <<: *llama31
name: "llama-3.1-supernova-lite-reflection-v1.0-i1"
url: "github:mudler/LocalAI/gallery/llama3.1-reflective.yaml@master"
icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png
urls:
- https://huggingface.co/SE6446/Llama-3.1-SuperNova-Lite-Reflection-V1.0
- https://huggingface.co/mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF
description: |
This model is a LoRA adaptation of arcee-ai/Llama-3.1-SuperNova-Lite on thesven/Reflective-MAGLLAMA-v0.1.1. This has been a simple experiment into reflection and the model appears to perform adequately, though I am unsure if it is a large improvement.
overrides:
parameters:
model: Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf
files:
- filename: Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf
sha256: 0c4531fe553d00142808e1bc7348ae92d400794c5b64d2db1a974718324dfe9a
uri: huggingface://mradermacher/Llama-3.1-SuperNova-Lite-Reflection-V1.0-i1-GGUF/Llama-3.1-SuperNova-Lite-Reflection-V1.0.i1-Q4_K_M.gguf
- !!merge <<: *llama31
name: "llama-3.1-supernova-lite"
icon: https://i.ibb.co/r072p7j/eopi-ZVu-SQ0-G-Cav78-Byq-Tg.png
urls:
- https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite
- https://huggingface.co/arcee-ai/Llama-3.1-SuperNova-Lite-GGUF
description: |
Llama-3.1-SuperNova-Lite is an 8B parameter model developed by Arcee.ai, based on the Llama-3.1-8B-Instruct architecture. It is a distilled version of the larger Llama-3.1-405B-Instruct model, leveraging offline logits extracted from the 405B parameter variant. This 8B variation of Llama-3.1-SuperNova maintains high performance while offering exceptional instruction-following capabilities and domain-specific adaptability.
The model was trained using a state-of-the-art distillation pipeline and an instruction dataset generated with EvolKit, ensuring accuracy and efficiency across a wide range of tasks. For more information on its training, visit blog.arcee.ai.
Llama-3.1-SuperNova-Lite excels in both benchmark performance and real-world applications, providing the power of large-scale models in a more compact, efficient form ideal for organizations seeking high performance with reduced resource requirements.
overrides:
parameters:
model: supernova-lite-v1.Q4_K_M.gguf
files:
- filename: supernova-lite-v1.Q4_K_M.gguf
sha256: 237b7b0b704d294f92f36c576cc8fdc10592f95168a5ad0f075a2d8edf20da4d
uri: huggingface://arcee-ai/Llama-3.1-SuperNova-Lite-GGUF/supernova-lite-v1.Q4_K_M.gguf
- !!merge <<: *llama31
name: "llama3.1-8b-shiningvaliant2"
icon: https://cdn-uploads.huggingface.co/production/uploads/63444f2687964b331809eb55/EXX7TKbB-R6arxww2mk0R.jpeg
urls:
- https://huggingface.co/ValiantLabs/Llama3.1-8B-ShiningValiant2
- https://huggingface.co/bartowski/Llama3.1-8B-ShiningValiant2-GGUF
description: |
Shining Valiant 2 is a chat model built on Llama 3.1 8b, finetuned on our data for friendship, insight, knowledge and enthusiasm.
Finetuned on meta-llama/Meta-Llama-3.1-8B-Instruct for best available general performance
Trained on a variety of high quality data; focused on science, engineering, technical knowledge, and structured reasoning
overrides:
parameters:
model: Llama3.1-8B-ShiningValiant2-Q4_K_M.gguf
files:
- filename: Llama3.1-8B-ShiningValiant2-Q4_K_M.gguf
sha256: 9369eb97922a9f01e4eae610e3d7aaeca30762d78d9239884179451d60bdbdd2
uri: huggingface://bartowski/Llama3.1-8B-ShiningValiant2-GGUF/Llama3.1-8B-ShiningValiant2-Q4_K_M.gguf
- !!merge <<: *llama31
name: "nightygurps-14b-v1.1"
icon: https://cdn-uploads.huggingface.co/production/uploads/6336c5b3e3ac69e6a90581da/FvfjK7bKqsWdaBkB3eWgP.png
urls:
- https://huggingface.co/AlexBefest/NightyGurps-14b-v1.1
- https://huggingface.co/bartowski/NightyGurps-14b-v1.1-GGUF
description: |
This model works with Russian only.
This model is designed to run GURPS roleplaying games, as well as consult and assist. This model was trained on an augmented dataset of the GURPS Basic Set rulebook. Its primary purpose was initially to become an assistant consultant and assistant Game Master for the GURPS roleplaying system, but it can also be used as a GM for running solo games as a player.
overrides:
parameters:
model: NightyGurps-14b-v1.1-Q4_K_M.gguf
files:
- filename: NightyGurps-14b-v1.1-Q4_K_M.gguf
sha256: d09d53259ad2c0298150fa8c2db98fe42f11731af89fdc80ad0e255a19adc4b0
uri: huggingface://bartowski/NightyGurps-14b-v1.1-GGUF/NightyGurps-14b-v1.1-Q4_K_M.gguf
## Uncensored models ## Uncensored models
- !!merge <<: *llama31 - !!merge <<: *llama31
name: "humanish-roleplay-llama-3.1-8b-i1" name: "humanish-roleplay-llama-3.1-8b-i1"
@@ -889,103 +658,6 @@
- filename: Mahou-1.3-llama3.1-8B.Q4_K_M.gguf - filename: Mahou-1.3-llama3.1-8B.Q4_K_M.gguf
sha256: 88bfdca2f6077d789d3e0f161d19711aa208a6d9a02cce96a2276c69413b3594 sha256: 88bfdca2f6077d789d3e0f161d19711aa208a6d9a02cce96a2276c69413b3594
uri: huggingface://mradermacher/Mahou-1.3-llama3.1-8B-GGUF/Mahou-1.3-llama3.1-8B.Q4_K_M.gguf uri: huggingface://mradermacher/Mahou-1.3-llama3.1-8B-GGUF/Mahou-1.3-llama3.1-8B.Q4_K_M.gguf
- !!merge <<: *llama31
name: "azure_dusk-v0.2-iq-imatrix"
# chatml
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/n3-g_YTk3FY-DBzxXd28E.png
urls:
- https://huggingface.co/Lewdiculous/Azure_Dusk-v0.2-GGUF-IQ-Imatrix
description: |
"Following up on Crimson_Dawn-v0.2 we have Azure_Dusk-v0.2! Training on Mistral-Nemo-Base-2407 this time I've added significantly more data, as well as trained using RSLoRA as opposed to regular LoRA. Another key change is training on ChatML as opposed to Mistral Formatting."
by Author.
overrides:
parameters:
model: Azure_Dusk-v0.2-Q4_K_M-imat.gguf
files:
- filename: Azure_Dusk-v0.2-Q4_K_M-imat.gguf
sha256: c03a670c00976d14c267a0322374ed488b2a5f4790eb509136ca4e75cbc10cf4
uri: huggingface://Lewdiculous/Azure_Dusk-v0.2-GGUF-IQ-Imatrix/Azure_Dusk-v0.2-Q4_K_M-imat.gguf
- !!merge <<: *llama31
name: "l3.1-8b-niitama-v1.1-iq-imatrix"
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/2Q5ky8TvP0vLS1ulMXnrn.png
urls:
- https://huggingface.co/Sao10K/L3.1-8B-Niitama-v1.1
- https://huggingface.co/Lewdiculous/L3.1-8B-Niitama-v1.1-GGUF-IQ-Imatrix
description: |
GGUF-IQ-Imatrix quants for Sao10K/L3.1-8B-Niitama-v1.1
Here's the subjectively superior L3 version: L3-8B-Niitama-v1
An experimental model using experimental methods.
More detail on it:
Tamamo and Niitama are made from the same data. Literally. The only thing that's changed is how theyre shuffled and formatted. Yet, I get wildly different results.
Interesting, eh? Feels kinda not as good compared to the l3 version, but it's aight.
overrides:
parameters:
model: L3.1-8B-Niitama-v1.1-Q4_K_M-imat.gguf
files:
- filename: L3.1-8B-Niitama-v1.1-Q4_K_M-imat.gguf
sha256: 524163bd0f1d43c9284b09118abcc192f3250b13dd3bb79d60c28321108b6748
uri: huggingface://Lewdiculous/L3.1-8B-Niitama-v1.1-GGUF-IQ-Imatrix/L3.1-8B-Niitama-v1.1-Q4_K_M-imat.gguf
- !!merge <<: *llama31
name: "llama-3.1-8b-stheno-v3.4-iq-imatrix"
icon: https://huggingface.co/Sao10K/Llama-3.1-8B-Stheno-v3.4/resolve/main/meneno.jpg
urls:
- https://huggingface.co/Sao10K/Llama-3.1-8B-Stheno-v3.4
- https://huggingface.co/Lewdiculous/Llama-3.1-8B-Stheno-v3.4-GGUF-IQ-Imatrix
description: |
This model has went through a multi-stage finetuning process.
- 1st, over a multi-turn Conversational-Instruct
- 2nd, over a Creative Writing / Roleplay along with some Creative-based Instruct Datasets.
- - Dataset consists of a mixture of Human and Claude Data.
Prompting Format:
- Use the L3 Instruct Formatting - Euryale 2.1 Preset Works Well
- Temperature + min_p as per usual, I recommend 1.4 Temp + 0.2 min_p.
- Has a different vibe to previous versions. Tinker around.
Changes since previous Stheno Datasets:
- Included Multi-turn Conversation-based Instruct Datasets to boost multi-turn coherency. # This is a seperate set, not the ones made by Kalomaze and Nopm, that are used in Magnum. They're completely different data.
- Replaced Single-Turn Instruct with Better Prompts and Answers by Claude 3.5 Sonnet and Claude 3 Opus.
- Removed c2 Samples -> Underway of re-filtering and masking to use with custom prefills. TBD
- Included 55% more Roleplaying Examples based of [Gryphe's](https://huggingface.co/datasets/Gryphe/Sonnet3.5-Charcard-Roleplay) Charcard RP Sets. Further filtered and cleaned on.
- Included 40% More Creative Writing Examples.
- Included Datasets Targeting System Prompt Adherence.
- Included Datasets targeting Reasoning / Spatial Awareness.
- Filtered for the usual errors, slop and stuff at the end. Some may have slipped through, but I removed nearly all of it.
Personal Opinions:
- Llama3.1 was more disappointing, in the Instruct Tune? It felt overbaked, atleast. Likely due to the DPO being done after their SFT Stage.
- Tuning on L3.1 base did not give good results, unlike when I tested with Nemo base. unfortunate.
- Still though, I think I did an okay job. It does feel a bit more distinctive.
- It took a lot of tinkering, like a LOT to wrangle this.
overrides:
parameters:
model: Llama-3.1-8B-Stheno-v3.4-Q4_K_M-imat.gguf
files:
- filename: Llama-3.1-8B-Stheno-v3.4-Q4_K_M-imat.gguf
sha256: 830d4858aa11a654f82f69fa40dee819edf9ecf54213057648304eb84b8dd5eb
uri: huggingface://Lewdiculous/Llama-3.1-8B-Stheno-v3.4-GGUF-IQ-Imatrix/Llama-3.1-8B-Stheno-v3.4-Q4_K_M-imat.gguf
- !!merge <<: *llama31
name: "llama-3.1-8b-arliai-rpmax-v1.1"
urls:
- https://huggingface.co/ArliAI/Llama-3.1-8B-ArliAI-RPMax-v1.1
- https://huggingface.co/bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.1-GGUF
description: |
RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
overrides:
parameters:
model: Llama-3.1-8B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
files:
- filename: Llama-3.1-8B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
sha256: 0a601c7341228d9160332965298d799369a1dc2b7080771fb8051bdeb556b30c
uri: huggingface://bartowski/Llama-3.1-8B-ArliAI-RPMax-v1.1-GGUF/Llama-3.1-8B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
- &deepseek - &deepseek
## Deepseek ## Deepseek
url: "github:mudler/LocalAI/gallery/deepseek.yaml@master" url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
@@ -1523,38 +1195,6 @@
- filename: Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf - filename: Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
sha256: cf3465c183bf4ecbccd1b6b480f687e0160475b04c87e2f1e5ebc8baa0f4c7aa sha256: cf3465c183bf4ecbccd1b6b480f687e0160475b04c87e2f1e5ebc8baa0f4c7aa
uri: huggingface://bartowski/Pantheon-RP-1.6-12b-Nemo-GGUF/Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf uri: huggingface://bartowski/Pantheon-RP-1.6-12b-Nemo-GGUF/Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
- !!merge <<: *mistral03
name: "acolyte-22b-i1"
icon: https://cdn-uploads.huggingface.co/production/uploads/6569a4ed2419be6072890cf8/3dcGMcrWK2-2vQh9QBt3o.png
urls:
- https://huggingface.co/rAIfle/Acolyte-22B
- https://huggingface.co/mradermacher/Acolyte-22B-i1-GGUF
description: |
LoRA of a bunch of random datasets on top of Mistral-Small-Instruct-2409, then SLERPed onto base at 0.5. Decent enough for its size. Check the LoRA for dataset info.
overrides:
parameters:
model: Acolyte-22B.i1-Q4_K_M.gguf
files:
- filename: Acolyte-22B.i1-Q4_K_M.gguf
sha256: 5a454405b98b6f886e8e4c695488d8ea098162bb8c46f2a7723fc2553c6e2f6e
uri: huggingface://mradermacher/Acolyte-22B-i1-GGUF/Acolyte-22B.i1-Q4_K_M.gguf
- !!merge <<: *mistral03
name: "mn-12b-lyra-v4-iq-imatrix"
icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/dVoru83WOpwVjMlgZ_xhA.png
# chatml
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
urls:
- https://huggingface.co/Lewdiculous/MN-12B-Lyra-v4-GGUF-IQ-Imatrix
description: |
A finetune of Mistral Nemo by Sao10K.
Uses the ChatML prompt format.
overrides:
parameters:
model: MN-12B-Lyra-v4-Q4_K_M-imat.gguf
files:
- filename: MN-12B-Lyra-v4-Q4_K_M-imat.gguf
sha256: 1989123481ca1936c8a2cbe278ff5d1d2b0ae63dbdc838bb36a6d7547b8087b3
uri: huggingface://Lewdiculous/MN-12B-Lyra-v4-GGUF-IQ-Imatrix/MN-12B-Lyra-v4-Q4_K_M-imat.gguf
- &mudler - &mudler
### START mudler's LocalAI specific-models ### START mudler's LocalAI specific-models
url: "github:mudler/LocalAI/gallery/mudler.yaml@master" url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
@@ -2082,75 +1722,6 @@
- filename: Athena-codegemma-2-2b-it.Q4_K_M.gguf - filename: Athena-codegemma-2-2b-it.Q4_K_M.gguf
sha256: 59ce17023438b0da603dd211c7d39f78e7acac4108258ac0818a97a4ca7d64e3 sha256: 59ce17023438b0da603dd211c7d39f78e7acac4108258ac0818a97a4ca7d64e3
uri: huggingface://mradermacher/Athena-codegemma-2-2b-it-GGUF/Athena-codegemma-2-2b-it.Q4_K_M.gguf uri: huggingface://mradermacher/Athena-codegemma-2-2b-it-GGUF/Athena-codegemma-2-2b-it.Q4_K_M.gguf
- !!merge <<: *gemma
name: "datagemma-rag-27b-it"
urls:
- https://huggingface.co/google/datagemma-rag-27b-it
- https://huggingface.co/bartowski/datagemma-rag-27b-it-GGUF
description: |
DataGemma is a series of fine-tuned Gemma 2 models used to help LLMs access and incorporate reliable public statistical data from Data Commons into their responses. DataGemma RAG is used with Retrieval Augmented Generation, where it is trained to take a user query and generate natural language queries that can be understood by Data Commons' existing natural language interface. More information can be found in this research paper.
overrides:
parameters:
model: datagemma-rag-27b-it-Q4_K_M.gguf
files:
- filename: datagemma-rag-27b-it-Q4_K_M.gguf
sha256: 3dfcf51b05e3f0ab0979ad194de350edea71cb14444efa0a9f2ef5bfc80753f8
uri: huggingface://bartowski/datagemma-rag-27b-it-GGUF/datagemma-rag-27b-it-Q4_K_M.gguf
- !!merge <<: *gemma
name: "datagemma-rig-27b-it"
urls:
- https://huggingface.co/google/datagemma-rig-27b-it
- https://huggingface.co/bartowski/datagemma-rig-27b-it-GGUF
description: |
DataGemma is a series of fine-tuned Gemma 2 models used to help LLMs access and incorporate reliable public statistical data from Data Commons into their responses. DataGemma RIG is used in the retrieval interleaved generation approach (based off of tool-use approaches), where it is trained to annotate a response with natural language queries to Data Commons existing natural language interface wherever there are statistics. More information can be found in this research paper.
overrides:
parameters:
model: datagemma-rig-27b-it-Q4_K_M.gguf
files:
- filename: datagemma-rig-27b-it-Q4_K_M.gguf
sha256: a6738ffbb49b6c46d220e2793df85c0538e9ac72398e32a0914ee5e55c3096ad
uri: huggingface://bartowski/datagemma-rig-27b-it-GGUF/datagemma-rig-27b-it-Q4_K_M.gguf
- !!merge <<: *gemma
name: "buddy-2b-v1"
urls:
- https://huggingface.co/TheDrummer/Buddy-2B-v1
- https://huggingface.co/bartowski/Buddy-2B-v1-GGUF
description: |
Buddy is designed as an empathetic language model, aimed at fostering introspection, self-reflection, and personal growth through thoughtful conversation. Buddy won't judge and it won't dismiss your concerns. Get some self-care with Buddy.
overrides:
parameters:
model: Buddy-2B-v1-Q4_K_M.gguf
files:
- filename: Buddy-2B-v1-Q4_K_M.gguf
sha256: 9bd25ed907d1a3c2e07fe09399a9b3aec107d368c29896e2c46facede5b7e3d5
uri: huggingface://bartowski/Buddy-2B-v1-GGUF/Buddy-2B-v1-Q4_K_M.gguf
- !!merge <<: *gemma
name: "gemma-2-9b-arliai-rpmax-v1.1"
urls:
- https://huggingface.co/ArliAI/Gemma-2-9B-ArliAI-RPMax-v1.1
- https://huggingface.co/bartowski/Gemma-2-9B-ArliAI-RPMax-v1.1-GGUF
description: |
RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
overrides:
parameters:
model: Gemma-2-9B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
files:
- filename: Gemma-2-9B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
sha256: 1724aff0ad6f71bf4371d839aca55578f7ec6f030d8d25c0254126088e4c6250
uri: huggingface://bartowski/Gemma-2-9B-ArliAI-RPMax-v1.1-GGUF/Gemma-2-9B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
- !!merge <<: *gemma
name: "gemma-2-2b-arliai-rpmax-v1.1"
urls:
- https://huggingface.co/bartowski/Gemma-2-2B-ArliAI-RPMax-v1.1-GGUF
description: |
RPMax is a series of models that are trained on a diverse set of curated creative writing and RP datasets with a focus on variety and deduplication. This model is designed to be highly creative and non-repetitive by making sure no two entries in the dataset have repeated characters or situations, which makes sure the model does not latch on to a certain personality and be capable of understanding and acting appropriately to any characters or situations.
overrides:
parameters:
model: Gemma-2-2B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
files:
- filename: Gemma-2-2B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
sha256: 89fe35345754d7e9de8d0c0d5bf35b2be9b12a09811b365b712b8b27112f7712
uri: huggingface://bartowski/Gemma-2-2B-ArliAI-RPMax-v1.1-GGUF/Gemma-2-2B-ArliAI-RPMax-v1.1-Q4_K_M.gguf
- &llama3 - &llama3
url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -4030,7 +3601,7 @@
files: files:
- filename: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf - filename: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf
sha256: 12f4af9d66817d7d300bd9a181e4fe66f7ecf7ea972049f2cbd0554cdc3ecf05 sha256: 12f4af9d66817d7d300bd9a181e4fe66f7ecf7ea972049f2cbd0554cdc3ecf05
uri: huggingface://QuantFactory/NeuralDaredevil-8B-abliterated-GGUF/NeuralDaredevil-8B-abliterated.Q4_K_M.gguf uri: huggingface://QuantFactory/NeuralDaredevil-8B-abliterated-GGUF/Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf
- !!merge <<: *llama3 - !!merge <<: *llama3
name: "llama-3-8b-instruct-mopeymule" name: "llama-3-8b-instruct-mopeymule"
urls: urls:

View File

@@ -1,65 +0,0 @@
---
name: "llama3-instruct"
config_file: |
mmap: true
cutstrings:
- (.*?)</thinking>
function:
disable_no_action: true
grammar:
disable: true
response_regex:
- <function=(?P<name>\w+)>(?P<arguments>.*)</function>
template:
chat_message: |
<|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
{{ if .FunctionCall -}}
Function call:
{{ else if eq .RoleName "tool" -}}
Function response:
{{ end -}}
{{ if .Content -}}
{{.Content -}}
{{ else if .FunctionCall -}}
{{ toJson .FunctionCall -}}
{{ end -}}
<|eot_id|>
function: |
<|start_header_id|>system<|end_header_id|>
You have access to the following functions:
{{range .Functions}}
Use the function '{{.Name}}' to '{{.Description}}'
{{toJson .Parameters}}
{{end}}
Think very carefully before calling functions.
If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
<function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
Reminder:
- If looking for real time information use relevant functions before falling back to searching on internet
- Function calls MUST follow the specified format, start with <function= and end with </function>
- Required parameters MUST be specified
- Only call one function at a time
- Put the entire function call reply on one line
<|eot_id|>
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
chat: |
{{.Input }}
<|start_header_id|>assistant<|end_header_id|>
<thinking>
completion: |
{{.Input}}
context_size: 8192
f16: true
stopwords:
- <|im_end|>
- <dummy32000>
- "<|eot_id|>"
- <|end_of_text|>

5
go.mod
View File

@@ -1,8 +1,8 @@
module github.com/mudler/LocalAI module github.com/mudler/LocalAI
go 1.23 go 1.22.0
toolchain go1.23.1 toolchain go1.22.4
require ( require (
dario.cat/mergo v1.0.0 dario.cat/mergo v1.0.0
@@ -74,7 +74,6 @@ require (
cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
cloud.google.com/go/compute/metadata v0.3.0 // indirect cloud.google.com/go/compute/metadata v0.3.0 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 // indirect
github.com/envoyproxy/protoc-gen-validate v1.0.4 // indirect github.com/envoyproxy/protoc-gen-validate v1.0.4 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect

2
go.sum
View File

@@ -110,8 +110,6 @@ github.com/creachadair/otp v0.4.2 h1:ngNMaD6Tzd7UUNRFyed7ykZFn/Wr5sSs5ffqZWm9pu8
github.com/creachadair/otp v0.4.2/go.mod h1:DqV9hJyUbcUme0pooYfiFvvMe72Aua5sfhNzwfZvk40= github.com/creachadair/otp v0.4.2/go.mod h1:DqV9hJyUbcUme0pooYfiFvvMe72Aua5sfhNzwfZvk40=
github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2 h1:flLYmnQFZNo04x2NPehMbf30m7Pli57xwZ0NFqR/hb0=
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2/go.mod h1:NtWqRzAp/1tw+twkW8uuBenEVVYndEAZACWU3F3xdoQ=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=

View File

@@ -31,11 +31,7 @@ const (
type URI string type URI string
func (uri URI) DownloadWithCallback(basePath string, f func(url string, i []byte) error) error { func (uri URI) DownloadAndUnmarshal(basePath string, f func(url string, i []byte) error) error {
return uri.DownloadWithAuthorizationAndCallback(basePath, "", f)
}
func (uri URI) DownloadWithAuthorizationAndCallback(basePath string, authorization string, f func(url string, i []byte) error) error {
url := uri.ResolveURL() url := uri.ResolveURL()
if strings.HasPrefix(url, LocalPrefix) { if strings.HasPrefix(url, LocalPrefix) {
@@ -45,6 +41,7 @@ func (uri URI) DownloadWithAuthorizationAndCallback(basePath string, authorizati
if err != nil { if err != nil {
return err return err
} }
// ???
resolvedBasePath, err := filepath.EvalSymlinks(basePath) resolvedBasePath, err := filepath.EvalSymlinks(basePath)
if err != nil { if err != nil {
return err return err
@@ -66,16 +63,7 @@ func (uri URI) DownloadWithAuthorizationAndCallback(basePath string, authorizati
} }
// Send a GET request to the URL // Send a GET request to the URL
response, err := http.Get(url)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return err
}
if authorization != "" {
req.Header.Add("Authorization", authorization)
}
response, err := http.DefaultClient.Do(req)
if err != nil { if err != nil {
return err return err
} }

View File

@@ -11,7 +11,7 @@ var _ = Describe("Gallery API tests", func() {
It("parses github with a branch", func() { It("parses github with a branch", func() {
uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml") uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml")
Expect( Expect(
uri.DownloadWithCallback("", func(url string, i []byte) error { uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml")) Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
return nil return nil
}), }),
@@ -21,7 +21,7 @@ var _ = Describe("Gallery API tests", func() {
uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml@main") uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml@main")
Expect( Expect(
uri.DownloadWithCallback("", func(url string, i []byte) error { uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml")) Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
return nil return nil
}), }),
@@ -30,7 +30,7 @@ var _ = Describe("Gallery API tests", func() {
It("parses github with urls", func() { It("parses github with urls", func() {
uri := URI("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml") uri := URI("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml")
Expect( Expect(
uri.DownloadWithCallback("", func(url string, i []byte) error { uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml")) Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
return nil return nil
}), }),

View File

@@ -1,17 +0,0 @@
package model
import (
process "github.com/mudler/go-processmanager"
)
type GRPCProcessFilter = func(id string, p *process.Process) bool
func all(_ string, _ *process.Process) bool {
return true
}
func allExcept(s string) GRPCProcessFilter {
return func(id string, p *process.Process) bool {
return id != s
}
}

View File

@@ -320,7 +320,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
} else { } else {
grpcProcess := backendPath(o.assetDir, backend) grpcProcess := backendPath(o.assetDir, backend)
if err := utils.VerifyPath(grpcProcess, o.assetDir); err != nil { if err := utils.VerifyPath(grpcProcess, o.assetDir); err != nil {
return nil, fmt.Errorf("refering to a backend not in asset dir: %s", err.Error()) return nil, fmt.Errorf("grpc process not found in assetdir: %s", err.Error())
} }
if autoDetect { if autoDetect {
@@ -332,7 +332,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
// Check if the file exists // Check if the file exists
if _, err := os.Stat(grpcProcess); os.IsNotExist(err) { if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
return nil, fmt.Errorf("backend not found: %s", grpcProcess) return nil, fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
} }
serverAddress, err := getFreeAddress() serverAddress, err := getFreeAddress()
@@ -355,8 +355,6 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
client = NewModel(serverAddress) client = NewModel(serverAddress)
} }
log.Debug().Msgf("Wait for the service to start up")
// Wait for the service to start up // Wait for the service to start up
ready := false ready := false
for i := 0; i < o.grpcAttempts; i++ { for i := 0; i < o.grpcAttempts; i++ {
@@ -415,8 +413,10 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
} }
if o.singleActiveBackend { if o.singleActiveBackend {
ml.mu.Lock()
log.Debug().Msgf("Stopping all backends except '%s'", o.model) log.Debug().Msgf("Stopping all backends except '%s'", o.model)
err := ml.StopGRPC(allExcept(o.model)) err := ml.StopAllExcept(o.model)
ml.mu.Unlock()
if err != nil { if err != nil {
log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel") log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel")
return nil, err return nil, err
@@ -444,10 +444,13 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) { func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
o := NewOptions(opts...) o := NewOptions(opts...)
ml.mu.Lock()
// Return earlier if we have a model already loaded // Return earlier if we have a model already loaded
// (avoid looping through all the backends) // (avoid looping through all the backends)
if m := ml.CheckIsLoaded(o.model); m != nil { if m := ml.CheckIsLoaded(o.model); m != nil {
log.Debug().Msgf("Model '%s' already loaded", o.model) log.Debug().Msgf("Model '%s' already loaded", o.model)
ml.mu.Unlock()
return m.GRPC(o.parallelRequests, ml.wd), nil return m.GRPC(o.parallelRequests, ml.wd), nil
} }
@@ -455,11 +458,12 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
// If we can have only one backend active, kill all the others (except external backends) // If we can have only one backend active, kill all the others (except external backends)
if o.singleActiveBackend { if o.singleActiveBackend {
log.Debug().Msgf("Stopping all backends except '%s'", o.model) log.Debug().Msgf("Stopping all backends except '%s'", o.model)
err := ml.StopGRPC(allExcept(o.model)) err := ml.StopAllExcept(o.model)
if err != nil { if err != nil {
log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing") log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing")
} }
} }
ml.mu.Unlock()
var err error var err error

View File

@@ -69,8 +69,6 @@ var knownModelsNameSuffixToSkip []string = []string{
".tar.gz", ".tar.gz",
} }
const retryTimeout = time.Duration(2 * time.Minute)
func (ml *ModelLoader) ListFilesInModelPath() ([]string, error) { func (ml *ModelLoader) ListFilesInModelPath() ([]string, error) {
files, err := os.ReadDir(ml.ModelPath) files, err := os.ReadDir(ml.ModelPath)
if err != nil { if err != nil {
@@ -118,6 +116,9 @@ func (ml *ModelLoader) ListModels() []*Model {
} }
func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (*Model, error)) (*Model, error) { func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (*Model, error)) (*Model, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
// Check if we already have a loaded model // Check if we already have a loaded model
if model := ml.CheckIsLoaded(modelName); model != nil { if model := ml.CheckIsLoaded(modelName); model != nil {
return model, nil return model, nil
@@ -136,8 +137,6 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (
return nil, fmt.Errorf("loader didn't return a model") return nil, fmt.Errorf("loader didn't return a model")
} }
ml.mu.Lock()
defer ml.mu.Unlock()
ml.models[modelName] = model ml.models[modelName] = model
return model, nil return model, nil
@@ -147,28 +146,18 @@ func (ml *ModelLoader) ShutdownModel(modelName string) error {
ml.mu.Lock() ml.mu.Lock()
defer ml.mu.Unlock() defer ml.mu.Unlock()
_, ok := ml.models[modelName] return ml.stopModel(modelName)
if !ok { }
func (ml *ModelLoader) stopModel(modelName string) error {
defer ml.deleteProcess(modelName)
if _, ok := ml.models[modelName]; !ok {
return fmt.Errorf("model %s not found", modelName) return fmt.Errorf("model %s not found", modelName)
} }
return nil
retries := 1
for ml.models[modelName].GRPC(false, ml.wd).IsBusy() {
log.Debug().Msgf("%s busy. Waiting.", modelName)
dur := time.Duration(retries*2) * time.Second
if dur > retryTimeout {
dur = retryTimeout
}
time.Sleep(dur)
retries++
}
return ml.deleteProcess(modelName)
} }
func (ml *ModelLoader) CheckIsLoaded(s string) *Model { func (ml *ModelLoader) CheckIsLoaded(s string) *Model {
ml.mu.Lock()
defer ml.mu.Unlock()
m, ok := ml.models[s] m, ok := ml.models[s]
if !ok { if !ok {
return nil return nil

View File

@@ -9,12 +9,27 @@ import (
"strconv" "strconv"
"strings" "strings"
"syscall" "syscall"
"time"
"github.com/hpcloud/tail" "github.com/hpcloud/tail"
process "github.com/mudler/go-processmanager" process "github.com/mudler/go-processmanager"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
) )
func (ml *ModelLoader) StopAllExcept(s string) error {
return ml.StopGRPC(func(id string, p *process.Process) bool {
if id != s {
for ml.models[id].GRPC(false, ml.wd).IsBusy() {
log.Debug().Msgf("%s busy. Waiting.", id)
time.Sleep(2 * time.Second)
}
log.Debug().Msgf("[single-backend] Stopping %s", id)
return true
}
return false
})
}
func (ml *ModelLoader) deleteProcess(s string) error { func (ml *ModelLoader) deleteProcess(s string) error {
if _, exists := ml.grpcProcesses[s]; exists { if _, exists := ml.grpcProcesses[s]; exists {
if err := ml.grpcProcesses[s].Stop(); err != nil { if err := ml.grpcProcesses[s].Stop(); err != nil {
@@ -26,11 +41,17 @@ func (ml *ModelLoader) deleteProcess(s string) error {
return nil return nil
} }
type GRPCProcessFilter = func(id string, p *process.Process) bool
func includeAllProcesses(_ string, _ *process.Process) bool {
return true
}
func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error { func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
var err error = nil var err error = nil
for k, p := range ml.grpcProcesses { for k, p := range ml.grpcProcesses {
if filter(k, p) { if filter(k, p) {
e := ml.ShutdownModel(k) e := ml.deleteProcess(k)
err = errors.Join(err, e) err = errors.Join(err, e)
} }
} }
@@ -38,12 +59,10 @@ func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
} }
func (ml *ModelLoader) StopAllGRPC() error { func (ml *ModelLoader) StopAllGRPC() error {
return ml.StopGRPC(all) return ml.StopGRPC(includeAllProcesses)
} }
func (ml *ModelLoader) GetGRPCPID(id string) (int, error) { func (ml *ModelLoader) GetGRPCPID(id string) (int, error) {
ml.mu.Lock()
defer ml.mu.Unlock()
p, exists := ml.grpcProcesses[id] p, exists := ml.grpcProcesses[id]
if !exists { if !exists {
return -1, fmt.Errorf("no grpc backend found for %s", id) return -1, fmt.Errorf("no grpc backend found for %s", id)

View File

@@ -13,8 +13,14 @@ var base64DownloadClient http.Client = http.Client{
Timeout: 30 * time.Second, Timeout: 30 * time.Second,
} }
// GetContentURIAsBase64 checks if the string is an URL, if it's an URL downloads the content in memory encodes it in base64 and returns the base64 string, otherwise returns the string by stripping base64 data headers // this function check if the string is an URL, if it's an URL downloads the image in memory
func GetContentURIAsBase64(s string) (string, error) { // encodes it in base64 and returns the base64 string
// This may look weird down in pkg/utils while it is currently only used in core/config
//
// but I believe it may be useful for MQTT as well in the near future, so I'm
// extracting it while I'm thinking of it.
func GetImageURLAsBase64(s string) (string, error) {
if strings.HasPrefix(s, "http") { if strings.HasPrefix(s, "http") {
// download the image // download the image
resp, err := base64DownloadClient.Get(s) resp, err := base64DownloadClient.Get(s)

View File

@@ -10,20 +10,20 @@ var _ = Describe("utils/base64 tests", func() {
It("GetImageURLAsBase64 can strip jpeg data url prefixes", func() { It("GetImageURLAsBase64 can strip jpeg data url prefixes", func() {
// This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes. // This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes.
input := "data:image/jpeg;base64,FOO" input := "data:image/jpeg;base64,FOO"
b64, err := GetContentURIAsBase64(input) b64, err := GetImageURLAsBase64(input)
Expect(err).To(BeNil()) Expect(err).To(BeNil())
Expect(b64).To(Equal("FOO")) Expect(b64).To(Equal("FOO"))
}) })
It("GetImageURLAsBase64 can strip png data url prefixes", func() { It("GetImageURLAsBase64 can strip png data url prefixes", func() {
// This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes. // This one doesn't actually _care_ that it's base64, so feed "bad" data in this test in order to catch a change in that behavior for informational purposes.
input := "data:image/png;base64,BAR" input := "data:image/png;base64,BAR"
b64, err := GetContentURIAsBase64(input) b64, err := GetImageURLAsBase64(input)
Expect(err).To(BeNil()) Expect(err).To(BeNil())
Expect(b64).To(Equal("BAR")) Expect(b64).To(Equal("BAR"))
}) })
It("GetImageURLAsBase64 returns an error for bogus data", func() { It("GetImageURLAsBase64 returns an error for bogus data", func() {
input := "FOO" input := "FOO"
b64, err := GetContentURIAsBase64(input) b64, err := GetImageURLAsBase64(input)
Expect(b64).To(Equal("")) Expect(b64).To(Equal(""))
Expect(err).ToNot(BeNil()) Expect(err).ToNot(BeNil())
Expect(err).To(MatchError("not valid string")) Expect(err).To(MatchError("not valid string"))
@@ -31,7 +31,7 @@ var _ = Describe("utils/base64 tests", func() {
It("GetImageURLAsBase64 can actually download images and calculates something", func() { It("GetImageURLAsBase64 can actually download images and calculates something", func() {
// This test doesn't actually _check_ the results at this time, which is bad, but there wasn't a test at all before... // This test doesn't actually _check_ the results at this time, which is bad, but there wasn't a test at all before...
input := "https://upload.wikimedia.org/wikipedia/en/2/29/Wargames.jpg" input := "https://upload.wikimedia.org/wikipedia/en/2/29/Wargames.jpg"
b64, err := GetContentURIAsBase64(input) b64, err := GetImageURLAsBase64(input)
Expect(err).To(BeNil()) Expect(err).To(BeNil())
Expect(b64).ToNot(BeNil()) Expect(b64).ToNot(BeNil())
}) })

View File

@@ -1394,12 +1394,6 @@ const docTemplate = `{
"description": "The message role", "description": "The message role",
"type": "string" "type": "string"
}, },
"string_audios": {
"type": "array",
"items": {
"type": "string"
}
},
"string_content": { "string_content": {
"type": "string" "type": "string"
}, },
@@ -1409,12 +1403,6 @@ const docTemplate = `{
"type": "string" "type": "string"
} }
}, },
"string_videos": {
"type": "array",
"items": {
"type": "string"
}
},
"tool_calls": { "tool_calls": {
"type": "array", "type": "array",
"items": { "items": {

View File

@@ -1387,12 +1387,6 @@
"description": "The message role", "description": "The message role",
"type": "string" "type": "string"
}, },
"string_audios": {
"type": "array",
"items": {
"type": "string"
}
},
"string_content": { "string_content": {
"type": "string" "type": "string"
}, },
@@ -1402,12 +1396,6 @@
"type": "string" "type": "string"
} }
}, },
"string_videos": {
"type": "array",
"items": {
"type": "string"
}
},
"tool_calls": { "tool_calls": {
"type": "array", "type": "array",
"items": { "items": {

View File

@@ -453,20 +453,12 @@ definitions:
role: role:
description: The message role description: The message role
type: string type: string
string_audios:
items:
type: string
type: array
string_content: string_content:
type: string type: string
string_images: string_images:
items: items:
type: string type: string
type: array type: array
string_videos:
items:
type: string
type: array
tool_calls: tool_calls:
items: items:
$ref: '#/definitions/schema.ToolCall' $ref: '#/definitions/schema.ToolCall'

View File

@@ -171,7 +171,7 @@ var _ = Describe("E2E test", func() {
}) })
Context("vision", func() { Context("vision", func() {
It("correctly", func() { It("correctly", func() {
model := "gpt-4o" model := "gpt-4-vision-preview"
resp, err := client.CreateChatCompletion(context.TODO(), resp, err := client.CreateChatCompletion(context.TODO(),
openai.ChatCompletionRequest{ openai.ChatCompletionRequest{
Model: model, Messages: []openai.ChatCompletionMessage{ Model: model, Messages: []openai.ChatCompletionMessage{