mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-24 08:38:02 -04:00
Compare commits
55 Commits
v2.21.1
...
fix/closed
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
83110891fd | ||
|
|
2553de0187 | ||
|
|
408dfe62ee | ||
|
|
648ffdf449 | ||
|
|
04c0841ca9 | ||
|
|
43144c4743 | ||
|
|
a778668bcd | ||
|
|
4b131a7090 | ||
|
|
d06a052d54 | ||
|
|
b5115903bf | ||
|
|
afaff175d0 | ||
|
|
4686877c6d | ||
|
|
e5586e8781 | ||
|
|
3acd767ac4 | ||
|
|
5488fc3bc1 | ||
|
|
0965c6cd68 | ||
|
|
db704199dc | ||
|
|
2cc3b7128e | ||
|
|
88b99d30bb | ||
|
|
307a835199 | ||
|
|
f84b55d1ef | ||
|
|
139209353f | ||
|
|
a30058b80f | ||
|
|
53f406dc35 | ||
|
|
2649407f44 | ||
|
|
0a8f627cce | ||
|
|
76d4e88e0c | ||
|
|
d4d2a76f8f | ||
|
|
7d306c6431 | ||
|
|
44bdacac61 | ||
|
|
6bd6e2bdeb | ||
|
|
2908ff3f6b | ||
|
|
f19277b8e2 | ||
|
|
32de75c683 | ||
|
|
164a9e972f | ||
|
|
d747f2c89b | ||
|
|
58662db48e | ||
|
|
078942fc9f | ||
|
|
6dfee99575 | ||
|
|
ad62156d54 | ||
|
|
1689740269 | ||
|
|
50a3b54e34 | ||
|
|
e94a50e9db | ||
|
|
4e0f3cc980 | ||
|
|
2a8cbad122 | ||
|
|
453c45d022 | ||
|
|
4550abbfce | ||
|
|
f2ba1cfb01 | ||
|
|
8c4196faf3 | ||
|
|
b0f4556c0f | ||
|
|
fa5c98549a | ||
|
|
3d12d2037c | ||
|
|
d6522e69ca | ||
|
|
ef1507d000 | ||
|
|
a3d69872e3 |
2
.github/workflows/secscan.yaml
vendored
2
.github/workflows/secscan.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
- name: Run Gosec Security Scanner
|
- name: Run Gosec Security Scanner
|
||||||
if: ${{ github.actor != 'dependabot[bot]' }}
|
if: ${{ github.actor != 'dependabot[bot]' }}
|
||||||
uses: securego/gosec@v2.21.0
|
uses: securego/gosec@v2.21.4
|
||||||
with:
|
with:
|
||||||
# we let the report trigger content trigger a failure using the GitHub Security features.
|
# we let the report trigger content trigger a failure using the GitHub Security features.
|
||||||
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
args: '-no-fail -fmt sarif -out results.sarif ./...'
|
||||||
|
|||||||
11
.github/workflows/test.yml
vendored
11
.github/workflows/test.yml
vendored
@@ -178,13 +178,22 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: true
|
submodules: true
|
||||||
|
- name: Dependencies
|
||||||
|
run: |
|
||||||
|
# Install protoc
|
||||||
|
curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
|
||||||
|
unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
|
||||||
|
rm protoc.zip
|
||||||
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
|
||||||
|
go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
|
||||||
|
PATH="$PATH:$HOME/go/bin" make protogen-go
|
||||||
- name: Build images
|
- name: Build images
|
||||||
run: |
|
run: |
|
||||||
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=extras --build-arg EXTRA_BACKENDS=rerankers --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
|
||||||
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
|
||||||
- name: Test
|
- name: Test
|
||||||
run: |
|
run: |
|
||||||
LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
PATH="$PATH:$HOME/go/bin" LOCALAI_MODELS_DIR=$PWD/models LOCALAI_IMAGE_TAG=test LOCALAI_IMAGE=local-ai-aio \
|
||||||
make run-e2e-aio
|
make run-e2e-aio
|
||||||
- name: Setup tmate session if tests fail
|
- name: Setup tmate session if tests fail
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
|
|||||||
@@ -15,8 +15,6 @@ Thank you for your interest in contributing to LocalAI! We appreciate your time
|
|||||||
- [Documentation](#documentation)
|
- [Documentation](#documentation)
|
||||||
- [Community and Communication](#community-and-communication)
|
- [Community and Communication](#community-and-communication)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
@@ -54,7 +52,7 @@ If you find a bug, have a feature request, or encounter any issues, please check
|
|||||||
|
|
||||||
## Coding Guidelines
|
## Coding Guidelines
|
||||||
|
|
||||||
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like []`golangci-lint`](https://golangci-lint.run) can help you here.
|
- No specific coding guidelines at the moment. Please make sure the code can be tested. The most popular lint tools like [`golangci-lint`](https://golangci-lint.run) can help you here.
|
||||||
|
|
||||||
## Testing
|
## Testing
|
||||||
|
|
||||||
@@ -84,5 +82,3 @@ We are welcome the contribution of the documents, please open new PR or create a
|
|||||||
- You can reach out via the Github issue tracker.
|
- You can reach out via the Github issue tracker.
|
||||||
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
- Open a new discussion at [Discussion](https://github.com/go-skynet/LocalAI/discussions)
|
||||||
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
- Join the Discord channel [Discord](https://discord.gg/uJAeKSAGDy)
|
||||||
|
|
||||||
---
|
|
||||||
|
|||||||
6
Makefile
6
Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
|
|||||||
# llama.cpp versions
|
# llama.cpp versions
|
||||||
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
|
||||||
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
|
||||||
CPPLLAMA_VERSION?=70392f1f81470607ba3afef04aa56c9f65587664
|
CPPLLAMA_VERSION?=d5ed2b929d85bbd7dbeecb690880f07d9d7a6077
|
||||||
|
|
||||||
# go-rwkv version
|
# go-rwkv version
|
||||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
@@ -16,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
|
|||||||
|
|
||||||
# whisper.cpp version
|
# whisper.cpp version
|
||||||
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
|
||||||
WHISPER_CPP_VERSION?=0d2e2aed80109e8696791083bde3b58e190b7812
|
WHISPER_CPP_VERSION?=ccc2547210e09e3a1785817383ab770389bb442b
|
||||||
|
|
||||||
# bert.cpp version
|
# bert.cpp version
|
||||||
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
|
||||||
@@ -468,7 +468,7 @@ run-e2e-image:
|
|||||||
ls -liah $(abspath ./tests/e2e-fixtures)
|
ls -liah $(abspath ./tests/e2e-fixtures)
|
||||||
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
|
||||||
|
|
||||||
run-e2e-aio:
|
run-e2e-aio: protogen-go
|
||||||
@echo 'Running e2e AIO tests'
|
@echo 'Running e2e AIO tests'
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e-aio
|
||||||
|
|
||||||
|
|||||||
10
README.md
10
README.md
@@ -68,9 +68,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
|||||||
|
|
||||||
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
[💻 Getting started](https://localai.io/basics/getting_started/index.html)
|
||||||
|
|
||||||
## 🔥🔥 Hot topics / Roadmap
|
## 📰 Latest project news
|
||||||
|
|
||||||
[Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
|
||||||
|
|
||||||
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
- Aug 2024: 🆕 FLUX-1, [P2P Explorer](https://explorer.localai.io)
|
||||||
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
- July 2024: 🔥🔥 🆕 P2P Dashboard, LocalAI Federated mode and AI Swarms: https://github.com/mudler/LocalAI/pull/2723
|
||||||
@@ -83,8 +81,12 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
|
|||||||
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
- May 2024: Chat, TTS, and Image generation in the WebUI: https://github.com/mudler/LocalAI/pull/2222
|
||||||
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
- April 2024: Reranker API: https://github.com/mudler/LocalAI/pull/2121
|
||||||
|
|
||||||
Hot topics (looking for contributors):
|
Roadmap items: [List of issues](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
|
||||||
|
|
||||||
|
## 🔥🔥 Hot topics (looking for help):
|
||||||
|
|
||||||
|
- Multimodal with vLLM and Video understanding: https://github.com/mudler/LocalAI/pull/3729
|
||||||
|
- Realtime API https://github.com/mudler/LocalAI/issues/3714
|
||||||
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
|
||||||
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
- WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
|
||||||
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
- Backends v2: https://github.com/mudler/LocalAI/issues/1126
|
||||||
|
|||||||
@@ -26,6 +26,19 @@ service Backend {
|
|||||||
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}
|
||||||
|
|
||||||
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
rpc Rerank(RerankRequest) returns (RerankResult) {}
|
||||||
|
|
||||||
|
rpc GetMetrics(MetricsRequest) returns (MetricsResponse);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define the empty request
|
||||||
|
message MetricsRequest {}
|
||||||
|
|
||||||
|
message MetricsResponse {
|
||||||
|
int32 slot_id = 1;
|
||||||
|
string prompt_json_for_slot = 2; // Stores the prompt as a JSON string.
|
||||||
|
float tokens_per_second = 3;
|
||||||
|
int32 tokens_generated = 4;
|
||||||
|
int32 prompt_tokens_processed = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
message RerankRequest {
|
message RerankRequest {
|
||||||
@@ -136,6 +149,7 @@ message PredictOptions {
|
|||||||
repeated Message Messages = 44;
|
repeated Message Messages = 44;
|
||||||
repeated string Videos = 45;
|
repeated string Videos = 45;
|
||||||
repeated string Audios = 46;
|
repeated string Audios = 46;
|
||||||
|
string CorrelationId = 47;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The response message containing the result
|
// The response message containing the result
|
||||||
|
|||||||
@@ -495,6 +495,16 @@ struct llama_server_context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llama_client_slot* get_active_slot() {
|
||||||
|
for (llama_client_slot& slot : slots) {
|
||||||
|
// Check if the slot is currently processing
|
||||||
|
if (slot.is_processing()) {
|
||||||
|
return &slot; // Return the active slot
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr; // No active slot found
|
||||||
|
}
|
||||||
|
|
||||||
void initialize() {
|
void initialize() {
|
||||||
// create slots
|
// create slots
|
||||||
all_slots_are_idle = true;
|
all_slots_are_idle = true;
|
||||||
@@ -2106,6 +2116,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, llama
|
|||||||
data["ignore_eos"] = predict->ignoreeos();
|
data["ignore_eos"] = predict->ignoreeos();
|
||||||
data["embeddings"] = predict->embeddings();
|
data["embeddings"] = predict->embeddings();
|
||||||
|
|
||||||
|
// Add the correlationid to json data
|
||||||
|
data["correlation_id"] = predict->correlationid();
|
||||||
|
|
||||||
// for each image in the request, add the image data
|
// for each image in the request, add the image data
|
||||||
//
|
//
|
||||||
for (int i = 0; i < predict->images_size(); i++) {
|
for (int i = 0; i < predict->images_size(); i++) {
|
||||||
@@ -2344,6 +2357,11 @@ public:
|
|||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||||
reply.set_prompt_tokens(tokens_evaluated);
|
reply.set_prompt_tokens(tokens_evaluated);
|
||||||
|
|
||||||
|
// Log Request Correlation Id
|
||||||
|
LOG_VERBOSE("correlation:", {
|
||||||
|
{ "id", data["correlation_id"] }
|
||||||
|
});
|
||||||
|
|
||||||
// Send the reply
|
// Send the reply
|
||||||
writer->Write(reply);
|
writer->Write(reply);
|
||||||
|
|
||||||
@@ -2367,6 +2385,12 @@ public:
|
|||||||
std::string completion_text;
|
std::string completion_text;
|
||||||
task_result result = llama.queue_results.recv(task_id);
|
task_result result = llama.queue_results.recv(task_id);
|
||||||
if (!result.error && result.stop) {
|
if (!result.error && result.stop) {
|
||||||
|
|
||||||
|
// Log Request Correlation Id
|
||||||
|
LOG_VERBOSE("correlation:", {
|
||||||
|
{ "id", data["correlation_id"] }
|
||||||
|
});
|
||||||
|
|
||||||
completion_text = result.result_json.value("content", "");
|
completion_text = result.result_json.value("content", "");
|
||||||
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
int32_t tokens_predicted = result.result_json.value("tokens_predicted", 0);
|
||||||
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
int32_t tokens_evaluated = result.result_json.value("tokens_evaluated", 0);
|
||||||
@@ -2406,6 +2430,31 @@ public:
|
|||||||
|
|
||||||
return grpc::Status::OK;
|
return grpc::Status::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
grpc::Status GetMetrics(ServerContext* context, const backend::MetricsRequest* request, backend::MetricsResponse* response) {
|
||||||
|
llama_client_slot* active_slot = llama.get_active_slot();
|
||||||
|
|
||||||
|
if (active_slot != nullptr) {
|
||||||
|
// Calculate the tokens per second using existing logic
|
||||||
|
double tokens_per_second = 1e3 / active_slot->t_token_generation * active_slot->n_decoded;
|
||||||
|
|
||||||
|
// Populate the response with metrics
|
||||||
|
response->set_slot_id(active_slot->id);
|
||||||
|
response->set_prompt_json_for_slot(active_slot->prompt.dump());
|
||||||
|
response->set_tokens_per_second(tokens_per_second);
|
||||||
|
response->set_tokens_generated(active_slot->n_decoded);
|
||||||
|
response->set_prompt_tokens_processed(active_slot->num_prompt_tokens_processed);
|
||||||
|
} else {
|
||||||
|
// Handle case when no active slot exists
|
||||||
|
response->set_slot_id(0);
|
||||||
|
response->set_prompt_json_for_slot("");
|
||||||
|
response->set_tokens_per_second(0);
|
||||||
|
response->set_tokens_generated(0);
|
||||||
|
response->set_prompt_tokens_processed(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return grpc::Status::OK;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void RunServer(const std::string& server_address) {
|
void RunServer(const std::string& server_address) {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
accelerate
|
accelerate
|
||||||
auto-gptq==0.7.1
|
auto-gptq==0.7.1
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
transformers
|
transformers
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
bark==0.1.5
|
bark==0.1.5
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -1,2 +1,2 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
coqui-tts
|
coqui-tts
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
setuptools
|
setuptools
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
pillow
|
pillow
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
wheel
|
wheel
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
intel-extension-for-pytorch
|
intel-extension-for-pytorch
|
||||||
torch
|
torch
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
librosa==0.9.1
|
librosa==0.9.1
|
||||||
faster-whisper==1.0.3
|
faster-whisper==1.0.3
|
||||||
@@ -18,6 +18,6 @@ python-dotenv
|
|||||||
pypinyin==0.50.0
|
pypinyin==0.50.0
|
||||||
cn2an==0.5.22
|
cn2an==0.5.22
|
||||||
jieba==0.42.1
|
jieba==0.42.1
|
||||||
gradio==4.38.1
|
gradio==4.44.1
|
||||||
langid==1.1.6
|
langid==1.1.6
|
||||||
git+https://github.com/myshell-ai/MeloTTS.git
|
git+https://github.com/myshell-ai/MeloTTS.git
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
librosa
|
librosa
|
||||||
faster-whisper
|
faster-whisper
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
llvmlite==0.43.0
|
llvmlite==0.43.0
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
datasets
|
datasets
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
scipy==1.14.0
|
scipy==1.14.0
|
||||||
certifi
|
certifi
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
@@ -5,6 +5,8 @@ import argparse
|
|||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
from typing import List
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
import backend_pb2
|
import backend_pb2
|
||||||
import backend_pb2_grpc
|
import backend_pb2_grpc
|
||||||
@@ -15,6 +17,8 @@ from vllm.engine.async_llm_engine import AsyncLLMEngine
|
|||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.utils import random_uuid
|
from vllm.utils import random_uuid
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||||
|
from vllm.multimodal.utils import fetch_image
|
||||||
|
from vllm.assets.video import VideoAsset
|
||||||
|
|
||||||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
||||||
|
|
||||||
@@ -105,6 +109,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
try:
|
try:
|
||||||
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
|
self.llm = AsyncLLMEngine.from_engine_args(engine_args)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
|
print(f"Unexpected {err=}, {type(err)=}", file=sys.stderr)
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -117,7 +122,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
)
|
)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
||||||
|
print("Model loaded successfully", file=sys.stderr)
|
||||||
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
||||||
|
|
||||||
async def Predict(self, request, context):
|
async def Predict(self, request, context):
|
||||||
@@ -196,15 +201,33 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if request.Seed != 0:
|
if request.Seed != 0:
|
||||||
sampling_params.seed = request.Seed
|
sampling_params.seed = request.Seed
|
||||||
|
|
||||||
|
# Extract image paths and process images
|
||||||
prompt = request.Prompt
|
prompt = request.Prompt
|
||||||
|
|
||||||
# If tokenizer template is enabled and messages are provided instead of prompt apply the tokenizer template
|
image_paths = request.Images
|
||||||
|
image_data = [self.load_image(img_path) for img_path in image_paths]
|
||||||
|
|
||||||
|
videos_path = request.Videos
|
||||||
|
video_data = [self.load_video(video_path) for video_path in videos_path]
|
||||||
|
|
||||||
|
# If tokenizer template is enabled and messages are provided instead of prompt, apply the tokenizer template
|
||||||
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
|
if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
|
||||||
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
|
prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
|
||||||
|
|
||||||
# Generate text
|
# Generate text using the LLM engine
|
||||||
request_id = random_uuid()
|
request_id = random_uuid()
|
||||||
outputs = self.llm.generate(prompt, sampling_params, request_id)
|
print(f"Generating text with request_id: {request_id}", file=sys.stderr)
|
||||||
|
outputs = self.llm.generate(
|
||||||
|
{
|
||||||
|
"prompt": prompt,
|
||||||
|
"multi_modal_data": {
|
||||||
|
"image": image_data if image_data else None,
|
||||||
|
"video": video_data if video_data else None,
|
||||||
|
} if image_data or video_data else None,
|
||||||
|
},
|
||||||
|
sampling_params=sampling_params,
|
||||||
|
request_id=request_id,
|
||||||
|
)
|
||||||
|
|
||||||
# Stream the results
|
# Stream the results
|
||||||
generated_text = ""
|
generated_text = ""
|
||||||
@@ -227,9 +250,49 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|||||||
if streaming:
|
if streaming:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Remove the image files from /tmp folder
|
||||||
|
for img_path in image_paths:
|
||||||
|
try:
|
||||||
|
os.remove(img_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error removing image file: {img_path}, {e}", file=sys.stderr)
|
||||||
|
|
||||||
# Sending the final generated text
|
# Sending the final generated text
|
||||||
yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
|
yield backend_pb2.Reply(message=bytes(generated_text, encoding='utf-8'))
|
||||||
|
|
||||||
|
def load_image(self, image_path: str):
|
||||||
|
"""
|
||||||
|
Load an image from the given file path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path (str): The path to the image file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Image: The loaded image.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return Image.open(image_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error loading image {image_path}: {e}", file=sys.stderr)
|
||||||
|
return self.load_video(image_path)
|
||||||
|
|
||||||
|
def load_video(self, video_path: str):
|
||||||
|
"""
|
||||||
|
Load a video from the given file path.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video_path (str): The path to the image file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Video: The loaded video.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
video = VideoAsset(name=video_path).np_ndarrays
|
||||||
|
return video
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error loading video {image_path}: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
async def serve(address):
|
async def serve(address):
|
||||||
# Start asyncio gRPC server
|
# Start asyncio gRPC server
|
||||||
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
server = grpc.aio.server(migration_thread_pool=futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
||||||
|
|||||||
@@ -13,4 +13,18 @@ if [ "x${BUILD_PROFILE}" == "xintel" ]; then
|
|||||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
installRequirements
|
if [ "x${BUILD_TYPE}" == "x" ]; then
|
||||||
|
ensureVenv
|
||||||
|
# https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html
|
||||||
|
if [ ! -d vllm ]; then
|
||||||
|
git clone https://github.com/vllm-project/vllm
|
||||||
|
fi
|
||||||
|
pushd vllm
|
||||||
|
uv pip install wheel packaging ninja "setuptools>=49.4.0" numpy typing-extensions pillow setuptools-scm grpcio==1.66.2 protobuf bitsandbytes
|
||||||
|
uv pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu
|
||||||
|
VLLM_TARGET_DEVICE=cpu python setup.py install
|
||||||
|
popd
|
||||||
|
rm -rf vllm
|
||||||
|
else
|
||||||
|
installRequirements
|
||||||
|
fi
|
||||||
|
|||||||
@@ -2,3 +2,4 @@
|
|||||||
accelerate
|
accelerate
|
||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
|
bitsandbytes
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
accelerate
|
accelerate
|
||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
|
bitsandbytes
|
||||||
@@ -2,3 +2,4 @@
|
|||||||
accelerate
|
accelerate
|
||||||
torch
|
torch
|
||||||
transformers
|
transformers
|
||||||
|
bitsandbytes
|
||||||
@@ -5,3 +5,4 @@ torch
|
|||||||
transformers
|
transformers
|
||||||
optimum[openvino]
|
optimum[openvino]
|
||||||
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
setuptools==75.1.0 # https://github.com/mudler/LocalAI/issues/2406
|
||||||
|
bitsandbytes
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
grpcio==1.66.1
|
grpcio==1.66.2
|
||||||
protobuf
|
protobuf
|
||||||
certifi
|
certifi
|
||||||
setuptools
|
setuptools
|
||||||
@@ -10,20 +10,11 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
|
func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() ([]float32, error), error) {
|
||||||
modelFile := backendConfig.Model
|
|
||||||
|
|
||||||
grpcOpts := GRPCModelOpts(backendConfig)
|
|
||||||
|
|
||||||
var inferenceModel interface{}
|
var inferenceModel interface{}
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
opts := modelOpts(backendConfig, appConfig, []model.Option{
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
|
||||||
model.WithThreads(uint32(*backendConfig.Threads)),
|
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
model.WithModel(modelFile),
|
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
})
|
|
||||||
|
|
||||||
if backendConfig.Backend == "" {
|
if backendConfig.Backend == "" {
|
||||||
inferenceModel, err = loader.GreedyLoader(opts...)
|
inferenceModel, err = loader.GreedyLoader(opts...)
|
||||||
|
|||||||
@@ -8,19 +8,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
|
func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (func() error, error) {
|
||||||
threads := backendConfig.Threads
|
|
||||||
if *threads == 0 && appConfig.Threads != 0 {
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
|
||||||
threads = &appConfig.Threads
|
|
||||||
}
|
|
||||||
gRPCOpts := GRPCModelOpts(backendConfig)
|
|
||||||
opts := modelOpts(backendConfig, appConfig, []model.Option{
|
|
||||||
model.WithBackendString(backendConfig.Backend),
|
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
model.WithThreads(uint32(*threads)),
|
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
model.WithModel(backendConfig.Model),
|
|
||||||
model.WithLoadGRPCLoadModelOpts(gRPCOpts),
|
|
||||||
})
|
|
||||||
|
|
||||||
inferenceModel, err := loader.BackendLoader(
|
inferenceModel, err := loader.BackendLoader(
|
||||||
opts...,
|
opts...,
|
||||||
|
|||||||
@@ -33,22 +33,11 @@ type TokenUsage struct {
|
|||||||
|
|
||||||
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
func ModelInference(ctx context.Context, s string, messages []schema.Message, images, videos, audios []string, loader *model.ModelLoader, c config.BackendConfig, o *config.ApplicationConfig, tokenCallback func(string, TokenUsage) bool) (func() (LLMResponse, error), error) {
|
||||||
modelFile := c.Model
|
modelFile := c.Model
|
||||||
threads := c.Threads
|
|
||||||
if *threads == 0 && o.Threads != 0 {
|
|
||||||
threads = &o.Threads
|
|
||||||
}
|
|
||||||
grpcOpts := GRPCModelOpts(c)
|
|
||||||
|
|
||||||
var inferenceModel grpc.Backend
|
var inferenceModel grpc.Backend
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
opts := modelOpts(c, o, []model.Option{
|
opts := ModelOptions(c, o, []model.Option{})
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
|
||||||
model.WithThreads(uint32(*threads)), // some models uses this to allocate threads during startup
|
|
||||||
model.WithAssetDir(o.AssetsDestination),
|
|
||||||
model.WithModel(modelFile),
|
|
||||||
model.WithContext(o.Context),
|
|
||||||
})
|
|
||||||
|
|
||||||
if c.Backend != "" {
|
if c.Backend != "" {
|
||||||
opts = append(opts, model.WithBackendString(c.Backend))
|
opts = append(opts, model.WithBackendString(c.Backend))
|
||||||
|
|||||||
@@ -11,32 +11,65 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
func modelOpts(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
|
func ModelOptions(c config.BackendConfig, so *config.ApplicationConfig, opts []model.Option) []model.Option {
|
||||||
|
name := c.Name
|
||||||
|
if name == "" {
|
||||||
|
name = c.Model
|
||||||
|
}
|
||||||
|
|
||||||
|
defOpts := []model.Option{
|
||||||
|
model.WithBackendString(c.Backend),
|
||||||
|
model.WithModel(c.Model),
|
||||||
|
model.WithAssetDir(so.AssetsDestination),
|
||||||
|
model.WithContext(so.Context),
|
||||||
|
model.WithModelID(name),
|
||||||
|
}
|
||||||
|
|
||||||
|
threads := 1
|
||||||
|
|
||||||
|
if c.Threads != nil {
|
||||||
|
threads = *c.Threads
|
||||||
|
}
|
||||||
|
|
||||||
|
if so.Threads != 0 {
|
||||||
|
threads = so.Threads
|
||||||
|
}
|
||||||
|
|
||||||
|
c.Threads = &threads
|
||||||
|
|
||||||
|
grpcOpts := grpcModelOpts(c)
|
||||||
|
defOpts = append(defOpts, model.WithLoadGRPCLoadModelOpts(grpcOpts))
|
||||||
|
|
||||||
if so.SingleBackend {
|
if so.SingleBackend {
|
||||||
opts = append(opts, model.WithSingleActiveBackend())
|
defOpts = append(defOpts, model.WithSingleActiveBackend())
|
||||||
}
|
}
|
||||||
|
|
||||||
if so.ParallelBackendRequests {
|
if so.ParallelBackendRequests {
|
||||||
opts = append(opts, model.EnableParallelRequests)
|
defOpts = append(defOpts, model.EnableParallelRequests)
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.GRPC.Attempts != 0 {
|
if c.GRPC.Attempts != 0 {
|
||||||
opts = append(opts, model.WithGRPCAttempts(c.GRPC.Attempts))
|
defOpts = append(defOpts, model.WithGRPCAttempts(c.GRPC.Attempts))
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.GRPC.AttemptsSleepTime != 0 {
|
if c.GRPC.AttemptsSleepTime != 0 {
|
||||||
opts = append(opts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
|
defOpts = append(defOpts, model.WithGRPCAttemptsDelay(c.GRPC.AttemptsSleepTime))
|
||||||
}
|
}
|
||||||
|
|
||||||
for k, v := range so.ExternalGRPCBackends {
|
for k, v := range so.ExternalGRPCBackends {
|
||||||
opts = append(opts, model.WithExternalBackend(k, v))
|
defOpts = append(defOpts, model.WithExternalBackend(k, v))
|
||||||
}
|
}
|
||||||
|
|
||||||
return opts
|
return append(defOpts, opts...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getSeed(c config.BackendConfig) int32 {
|
func getSeed(c config.BackendConfig) int32 {
|
||||||
seed := int32(*c.Seed)
|
var seed int32 = config.RAND_SEED
|
||||||
|
|
||||||
|
if c.Seed != nil {
|
||||||
|
seed = int32(*c.Seed)
|
||||||
|
}
|
||||||
|
|
||||||
if seed == config.RAND_SEED {
|
if seed == config.RAND_SEED {
|
||||||
seed = rand.Int31()
|
seed = rand.Int31()
|
||||||
}
|
}
|
||||||
@@ -44,11 +77,47 @@ func getSeed(c config.BackendConfig) int32 {
|
|||||||
return seed
|
return seed
|
||||||
}
|
}
|
||||||
|
|
||||||
func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
||||||
b := 512
|
b := 512
|
||||||
if c.Batch != 0 {
|
if c.Batch != 0 {
|
||||||
b = c.Batch
|
b = c.Batch
|
||||||
}
|
}
|
||||||
|
|
||||||
|
f16 := false
|
||||||
|
if c.F16 != nil {
|
||||||
|
f16 = *c.F16
|
||||||
|
}
|
||||||
|
|
||||||
|
embeddings := false
|
||||||
|
if c.Embeddings != nil {
|
||||||
|
embeddings = *c.Embeddings
|
||||||
|
}
|
||||||
|
|
||||||
|
lowVRAM := false
|
||||||
|
if c.LowVRAM != nil {
|
||||||
|
lowVRAM = *c.LowVRAM
|
||||||
|
}
|
||||||
|
|
||||||
|
mmap := false
|
||||||
|
if c.MMap != nil {
|
||||||
|
mmap = *c.MMap
|
||||||
|
}
|
||||||
|
|
||||||
|
ctxSize := 1024
|
||||||
|
if c.ContextSize != nil {
|
||||||
|
ctxSize = *c.ContextSize
|
||||||
|
}
|
||||||
|
|
||||||
|
mmlock := false
|
||||||
|
if c.MMlock != nil {
|
||||||
|
mmlock = *c.MMlock
|
||||||
|
}
|
||||||
|
|
||||||
|
nGPULayers := 9999999
|
||||||
|
if c.NGPULayers != nil {
|
||||||
|
nGPULayers = *c.NGPULayers
|
||||||
|
}
|
||||||
|
|
||||||
return &pb.ModelOptions{
|
return &pb.ModelOptions{
|
||||||
CUDA: c.CUDA || c.Diffusers.CUDA,
|
CUDA: c.CUDA || c.Diffusers.CUDA,
|
||||||
SchedulerType: c.Diffusers.SchedulerType,
|
SchedulerType: c.Diffusers.SchedulerType,
|
||||||
@@ -56,14 +125,14 @@ func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||||||
CFGScale: c.Diffusers.CFGScale,
|
CFGScale: c.Diffusers.CFGScale,
|
||||||
LoraAdapter: c.LoraAdapter,
|
LoraAdapter: c.LoraAdapter,
|
||||||
LoraScale: c.LoraScale,
|
LoraScale: c.LoraScale,
|
||||||
F16Memory: *c.F16,
|
F16Memory: f16,
|
||||||
LoraBase: c.LoraBase,
|
LoraBase: c.LoraBase,
|
||||||
IMG2IMG: c.Diffusers.IMG2IMG,
|
IMG2IMG: c.Diffusers.IMG2IMG,
|
||||||
CLIPModel: c.Diffusers.ClipModel,
|
CLIPModel: c.Diffusers.ClipModel,
|
||||||
CLIPSubfolder: c.Diffusers.ClipSubFolder,
|
CLIPSubfolder: c.Diffusers.ClipSubFolder,
|
||||||
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
CLIPSkip: int32(c.Diffusers.ClipSkip),
|
||||||
ControlNet: c.Diffusers.ControlNet,
|
ControlNet: c.Diffusers.ControlNet,
|
||||||
ContextSize: int32(*c.ContextSize),
|
ContextSize: int32(ctxSize),
|
||||||
Seed: getSeed(c),
|
Seed: getSeed(c),
|
||||||
NBatch: int32(b),
|
NBatch: int32(b),
|
||||||
NoMulMatQ: c.NoMulMatQ,
|
NoMulMatQ: c.NoMulMatQ,
|
||||||
@@ -85,16 +154,16 @@ func GRPCModelOpts(c config.BackendConfig) *pb.ModelOptions {
|
|||||||
YarnBetaSlow: c.YarnBetaSlow,
|
YarnBetaSlow: c.YarnBetaSlow,
|
||||||
NGQA: c.NGQA,
|
NGQA: c.NGQA,
|
||||||
RMSNormEps: c.RMSNormEps,
|
RMSNormEps: c.RMSNormEps,
|
||||||
MLock: *c.MMlock,
|
MLock: mmlock,
|
||||||
RopeFreqBase: c.RopeFreqBase,
|
RopeFreqBase: c.RopeFreqBase,
|
||||||
RopeScaling: c.RopeScaling,
|
RopeScaling: c.RopeScaling,
|
||||||
Type: c.ModelType,
|
Type: c.ModelType,
|
||||||
RopeFreqScale: c.RopeFreqScale,
|
RopeFreqScale: c.RopeFreqScale,
|
||||||
NUMA: c.NUMA,
|
NUMA: c.NUMA,
|
||||||
Embeddings: *c.Embeddings,
|
Embeddings: embeddings,
|
||||||
LowVRAM: *c.LowVRAM,
|
LowVRAM: lowVRAM,
|
||||||
NGPULayers: int32(*c.NGPULayers),
|
NGPULayers: int32(nGPULayers),
|
||||||
MMap: *c.MMap,
|
MMap: mmap,
|
||||||
MainGPU: c.MainGPU,
|
MainGPU: c.MainGPU,
|
||||||
Threads: int32(*c.Threads),
|
Threads: int32(*c.Threads),
|
||||||
TensorSplit: c.TensorSplit,
|
TensorSplit: c.TensorSplit,
|
||||||
|
|||||||
@@ -9,21 +9,9 @@ import (
|
|||||||
model "github.com/mudler/LocalAI/pkg/model"
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
func Rerank(modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
|
||||||
bb := backend
|
|
||||||
if bb == "" {
|
|
||||||
return nil, fmt.Errorf("backend is required")
|
|
||||||
}
|
|
||||||
|
|
||||||
grpcOpts := GRPCModelOpts(backendConfig)
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
|
||||||
|
|
||||||
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
|
||||||
model.WithBackendString(bb),
|
|
||||||
model.WithModel(modelFile),
|
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
|
||||||
})
|
|
||||||
rerankModel, err := loader.BackendLoader(opts...)
|
rerankModel, err := loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func SoundGeneration(
|
func SoundGeneration(
|
||||||
backend string,
|
|
||||||
modelFile string,
|
modelFile string,
|
||||||
text string,
|
text string,
|
||||||
duration *float32,
|
duration *float32,
|
||||||
@@ -25,18 +24,8 @@ func SoundGeneration(
|
|||||||
appConfig *config.ApplicationConfig,
|
appConfig *config.ApplicationConfig,
|
||||||
backendConfig config.BackendConfig,
|
backendConfig config.BackendConfig,
|
||||||
) (string, *proto.Result, error) {
|
) (string, *proto.Result, error) {
|
||||||
if backend == "" {
|
|
||||||
return "", nil, fmt.Errorf("backend is a required parameter")
|
|
||||||
}
|
|
||||||
|
|
||||||
grpcOpts := GRPCModelOpts(backendConfig)
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{model.WithModel(modelFile)})
|
||||||
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
|
||||||
model.WithBackendString(backend),
|
|
||||||
model.WithModel(modelFile),
|
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
|
||||||
})
|
|
||||||
|
|
||||||
soundGenModel, err := loader.BackendLoader(opts...)
|
soundGenModel, err := loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
33
core/backend/token_metrics.go
Normal file
33
core/backend/token_metrics.go
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
package backend
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc/proto"
|
||||||
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TokenMetrics(
|
||||||
|
modelFile string,
|
||||||
|
loader *model.ModelLoader,
|
||||||
|
appConfig *config.ApplicationConfig,
|
||||||
|
backendConfig config.BackendConfig) (*proto.MetricsResponse, error) {
|
||||||
|
|
||||||
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{
|
||||||
|
model.WithModel(modelFile),
|
||||||
|
})
|
||||||
|
model, err := loader.BackendLoader(opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if model == nil {
|
||||||
|
return nil, fmt.Errorf("could not loadmodel model")
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := model.GetTokenMetrics(context.Background(), &proto.MetricsRequest{})
|
||||||
|
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
44
core/backend/tokenize.go
Normal file
44
core/backend/tokenize.go
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
package backend
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
model "github.com/mudler/LocalAI/pkg/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ModelTokenize(s string, loader *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (schema.TokenizeResponse, error) {
|
||||||
|
|
||||||
|
modelFile := backendConfig.Model
|
||||||
|
|
||||||
|
var inferenceModel grpc.Backend
|
||||||
|
var err error
|
||||||
|
|
||||||
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{
|
||||||
|
model.WithModel(modelFile),
|
||||||
|
})
|
||||||
|
|
||||||
|
if backendConfig.Backend == "" {
|
||||||
|
inferenceModel, err = loader.GreedyLoader(opts...)
|
||||||
|
} else {
|
||||||
|
opts = append(opts, model.WithBackendString(backendConfig.Backend))
|
||||||
|
inferenceModel, err = loader.BackendLoader(opts...)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return schema.TokenizeResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
predictOptions := gRPCPredictOpts(backendConfig, loader.ModelPath)
|
||||||
|
predictOptions.Prompt = s
|
||||||
|
|
||||||
|
// tokenize the string
|
||||||
|
resp, err := inferenceModel.TokenizeString(appConfig.Context, predictOptions)
|
||||||
|
if err != nil {
|
||||||
|
return schema.TokenizeResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return schema.TokenizeResponse{
|
||||||
|
Tokens: resp.Tokens,
|
||||||
|
}, nil
|
||||||
|
|
||||||
|
}
|
||||||
@@ -14,13 +14,11 @@ import (
|
|||||||
|
|
||||||
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
func ModelTranscription(audio, language string, translate bool, ml *model.ModelLoader, backendConfig config.BackendConfig, appConfig *config.ApplicationConfig) (*schema.TranscriptionResult, error) {
|
||||||
|
|
||||||
opts := modelOpts(backendConfig, appConfig, []model.Option{
|
if backendConfig.Backend == "" {
|
||||||
model.WithBackendString(model.WhisperBackend),
|
backendConfig.Backend = model.WhisperBackend
|
||||||
model.WithModel(backendConfig.Model),
|
}
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
model.WithThreads(uint32(*backendConfig.Threads)),
|
opts := ModelOptions(backendConfig, appConfig, []model.Option{})
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
})
|
|
||||||
|
|
||||||
transcriptionModel, err := ml.BackendLoader(opts...)
|
transcriptionModel, err := ml.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -28,14 +28,9 @@ func ModelTTS(
|
|||||||
bb = model.PiperBackend
|
bb = model.PiperBackend
|
||||||
}
|
}
|
||||||
|
|
||||||
grpcOpts := GRPCModelOpts(backendConfig)
|
opts := ModelOptions(config.BackendConfig{}, appConfig, []model.Option{
|
||||||
|
|
||||||
opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
|
|
||||||
model.WithBackendString(bb),
|
model.WithBackendString(bb),
|
||||||
model.WithModel(modelFile),
|
model.WithModel(modelFile),
|
||||||
model.WithContext(appConfig.Context),
|
|
||||||
model.WithAssetDir(appConfig.AssetsDestination),
|
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
|
||||||
})
|
})
|
||||||
ttsModel, err := loader.BackendLoader(opts...)
|
ttsModel, err := loader.BackendLoader(opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -85,13 +85,14 @@ func (t *SoundGenerationCMD) Run(ctx *cliContext.Context) error {
|
|||||||
|
|
||||||
options := config.BackendConfig{}
|
options := config.BackendConfig{}
|
||||||
options.SetDefaults()
|
options.SetDefaults()
|
||||||
|
options.Backend = t.Backend
|
||||||
|
|
||||||
var inputFile *string
|
var inputFile *string
|
||||||
if t.InputFile != "" {
|
if t.InputFile != "" {
|
||||||
inputFile = &t.InputFile
|
inputFile = &t.InputFile
|
||||||
}
|
}
|
||||||
|
|
||||||
filePath, _, err := backend.SoundGeneration(t.Backend, t.Model, text,
|
filePath, _, err := backend.SoundGeneration(t.Model, text,
|
||||||
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
|
parseToFloat32Ptr(t.Duration), parseToFloat32Ptr(t.Temperature), &t.DoSample,
|
||||||
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
|
inputFile, parseToInt32Ptr(t.InputFileSampleDivisor), ml, opts, options)
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ import (
|
|||||||
type UtilCMD struct {
|
type UtilCMD struct {
|
||||||
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
|
GGUFInfo GGUFInfoCMD `cmd:"" name:"gguf-info" help:"Get information about a GGUF file"`
|
||||||
HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
|
HFScan HFScanCMD `cmd:"" name:"hf-scan" help:"Checks installed models for known security issues. WARNING: this is a best-effort feature and may not catch everything!"`
|
||||||
|
UsecaseHeuristic UsecaseHeuristicCMD `cmd:"" name:"usecase-heuristic" help:"Checks a specific model config and prints what usecase LocalAI will offer for it."`
|
||||||
}
|
}
|
||||||
|
|
||||||
type GGUFInfoCMD struct {
|
type GGUFInfoCMD struct {
|
||||||
@@ -30,6 +31,11 @@ type HFScanCMD struct {
|
|||||||
ToScan []string `arg:""`
|
ToScan []string `arg:""`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type UsecaseHeuristicCMD struct {
|
||||||
|
ConfigName string `name:"The config file to check"`
|
||||||
|
ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
|
||||||
|
}
|
||||||
|
|
||||||
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
|
func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
|
||||||
if u.Args == nil || len(u.Args) == 0 {
|
if u.Args == nil || len(u.Args) == 0 {
|
||||||
return fmt.Errorf("no GGUF file provided")
|
return fmt.Errorf("no GGUF file provided")
|
||||||
@@ -99,3 +105,31 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (uhcmd *UsecaseHeuristicCMD) Run(ctx *cliContext.Context) error {
|
||||||
|
if len(uhcmd.ConfigName) == 0 {
|
||||||
|
log.Error().Msg("ConfigName is a required parameter")
|
||||||
|
return fmt.Errorf("config name is a required parameter")
|
||||||
|
}
|
||||||
|
if len(uhcmd.ModelsPath) == 0 {
|
||||||
|
log.Error().Msg("ModelsPath is a required parameter")
|
||||||
|
return fmt.Errorf("model path is a required parameter")
|
||||||
|
}
|
||||||
|
bcl := config.NewBackendConfigLoader(uhcmd.ModelsPath)
|
||||||
|
err := bcl.LoadBackendConfig(uhcmd.ConfigName)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Err(err).Str("ConfigName", uhcmd.ConfigName).Msg("error while loading backend")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
bc, exists := bcl.GetBackendConfig(uhcmd.ConfigName)
|
||||||
|
if !exists {
|
||||||
|
log.Error().Str("ConfigName", uhcmd.ConfigName).Msg("ConfigName not found")
|
||||||
|
}
|
||||||
|
for name, uc := range config.GetAllBackendConfigUsecases() {
|
||||||
|
if bc.HasUsecases(uc) {
|
||||||
|
log.Info().Str("Usecase", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Info().Msg("---")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -3,11 +3,13 @@ package config
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/downloader"
|
"github.com/mudler/LocalAI/pkg/downloader"
|
||||||
"github.com/mudler/LocalAI/pkg/functions"
|
"github.com/mudler/LocalAI/pkg/functions"
|
||||||
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -34,6 +36,8 @@ type BackendConfig struct {
|
|||||||
Embeddings *bool `yaml:"embeddings"`
|
Embeddings *bool `yaml:"embeddings"`
|
||||||
Backend string `yaml:"backend"`
|
Backend string `yaml:"backend"`
|
||||||
TemplateConfig TemplateConfig `yaml:"template"`
|
TemplateConfig TemplateConfig `yaml:"template"`
|
||||||
|
KnownUsecaseStrings []string `yaml:"known_usecases"`
|
||||||
|
KnownUsecases *BackendConfigUsecases `yaml:"-"`
|
||||||
|
|
||||||
PromptStrings, InputStrings []string `yaml:"-"`
|
PromptStrings, InputStrings []string `yaml:"-"`
|
||||||
InputToken [][]int `yaml:"-"`
|
InputToken [][]int `yaml:"-"`
|
||||||
@@ -192,6 +196,21 @@ type TemplateConfig struct {
|
|||||||
// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
|
// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
|
||||||
// It defaults to \n
|
// It defaults to \n
|
||||||
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
|
JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character"`
|
||||||
|
|
||||||
|
Video string `yaml:"video"`
|
||||||
|
Image string `yaml:"image"`
|
||||||
|
Audio string `yaml:"audio"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *BackendConfig) UnmarshalYAML(value *yaml.Node) error {
|
||||||
|
type BCAlias BackendConfig
|
||||||
|
var aux BCAlias
|
||||||
|
if err := value.Decode(&aux); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
*c = BackendConfig(aux)
|
||||||
|
c.KnownUsecases = GetUsecasesFromYAML(c.KnownUsecaseStrings)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *BackendConfig) SetFunctionCallString(s string) {
|
func (c *BackendConfig) SetFunctionCallString(s string) {
|
||||||
@@ -410,3 +429,121 @@ func (c *BackendConfig) Validate() bool {
|
|||||||
func (c *BackendConfig) HasTemplate() bool {
|
func (c *BackendConfig) HasTemplate() bool {
|
||||||
return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
|
return c.TemplateConfig.Completion != "" || c.TemplateConfig.Edit != "" || c.TemplateConfig.Chat != "" || c.TemplateConfig.ChatMessage != ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type BackendConfigUsecases int
|
||||||
|
|
||||||
|
const (
|
||||||
|
FLAG_ANY BackendConfigUsecases = 0b000000000
|
||||||
|
FLAG_CHAT BackendConfigUsecases = 0b000000001
|
||||||
|
FLAG_COMPLETION BackendConfigUsecases = 0b000000010
|
||||||
|
FLAG_EDIT BackendConfigUsecases = 0b000000100
|
||||||
|
FLAG_EMBEDDINGS BackendConfigUsecases = 0b000001000
|
||||||
|
FLAG_RERANK BackendConfigUsecases = 0b000010000
|
||||||
|
FLAG_IMAGE BackendConfigUsecases = 0b000100000
|
||||||
|
FLAG_TRANSCRIPT BackendConfigUsecases = 0b001000000
|
||||||
|
FLAG_TTS BackendConfigUsecases = 0b010000000
|
||||||
|
FLAG_SOUND_GENERATION BackendConfigUsecases = 0b100000000
|
||||||
|
|
||||||
|
// Common Subsets
|
||||||
|
FLAG_LLM BackendConfigUsecases = FLAG_CHAT & FLAG_COMPLETION & FLAG_EDIT
|
||||||
|
)
|
||||||
|
|
||||||
|
func GetAllBackendConfigUsecases() map[string]BackendConfigUsecases {
|
||||||
|
return map[string]BackendConfigUsecases{
|
||||||
|
"FLAG_ANY": FLAG_ANY,
|
||||||
|
"FLAG_CHAT": FLAG_CHAT,
|
||||||
|
"FLAG_COMPLETION": FLAG_COMPLETION,
|
||||||
|
"FLAG_EDIT": FLAG_EDIT,
|
||||||
|
"FLAG_EMBEDDINGS": FLAG_EMBEDDINGS,
|
||||||
|
"FLAG_RERANK": FLAG_RERANK,
|
||||||
|
"FLAG_IMAGE": FLAG_IMAGE,
|
||||||
|
"FLAG_TRANSCRIPT": FLAG_TRANSCRIPT,
|
||||||
|
"FLAG_TTS": FLAG_TTS,
|
||||||
|
"FLAG_SOUND_GENERATION": FLAG_SOUND_GENERATION,
|
||||||
|
"FLAG_LLM": FLAG_LLM,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetUsecasesFromYAML(input []string) *BackendConfigUsecases {
|
||||||
|
if len(input) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
result := FLAG_ANY
|
||||||
|
flags := GetAllBackendConfigUsecases()
|
||||||
|
for _, str := range input {
|
||||||
|
flag, exists := flags["FLAG_"+strings.ToUpper(str)]
|
||||||
|
if exists {
|
||||||
|
result |= flag
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &result
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasUsecases examines a BackendConfig and determines which endpoints have a chance of success.
|
||||||
|
func (c *BackendConfig) HasUsecases(u BackendConfigUsecases) bool {
|
||||||
|
if (c.KnownUsecases != nil) && ((u & *c.KnownUsecases) == u) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return c.GuessUsecases(u)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GuessUsecases is a **heuristic based** function, as the backend in question may not be loaded yet, and the config may not record what it's useful at.
|
||||||
|
// In its current state, this function should ideally check for properties of the config like templates, rather than the direct backend name checks for the lower half.
|
||||||
|
// This avoids the maintenance burden of updating this list for each new backend - but unfortunately, that's the best option for some services currently.
|
||||||
|
func (c *BackendConfig) GuessUsecases(u BackendConfigUsecases) bool {
|
||||||
|
if (u & FLAG_CHAT) == FLAG_CHAT {
|
||||||
|
if c.TemplateConfig.Chat == "" && c.TemplateConfig.ChatMessage == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u & FLAG_COMPLETION) == FLAG_COMPLETION {
|
||||||
|
if c.TemplateConfig.Completion == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u & FLAG_EDIT) == FLAG_EDIT {
|
||||||
|
if c.TemplateConfig.Edit == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u & FLAG_EMBEDDINGS) == FLAG_EMBEDDINGS {
|
||||||
|
if c.Embeddings == nil || !*c.Embeddings {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u & FLAG_IMAGE) == FLAG_IMAGE {
|
||||||
|
imageBackends := []string{"diffusers", "tinydream", "stablediffusion"}
|
||||||
|
if !slices.Contains(imageBackends, c.Backend) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Backend == "diffusers" && c.Diffusers.PipelineType == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if (u & FLAG_RERANK) == FLAG_RERANK {
|
||||||
|
if c.Backend != "rerankers" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u & FLAG_TRANSCRIPT) == FLAG_TRANSCRIPT {
|
||||||
|
if c.Backend != "whisper" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (u & FLAG_TTS) == FLAG_TTS {
|
||||||
|
ttsBackends := []string{"piper", "transformers-musicgen", "parler-tts"}
|
||||||
|
if !slices.Contains(ttsBackends, c.Backend) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (u & FLAG_SOUND_GENERATION) == FLAG_SOUND_GENERATION {
|
||||||
|
if c.Backend != "transformers-musicgen" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|||||||
35
core/config/backend_config_filter.go
Normal file
35
core/config/backend_config_filter.go
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
package config
|
||||||
|
|
||||||
|
import "regexp"
|
||||||
|
|
||||||
|
type BackendConfigFilterFn func(string, *BackendConfig) bool
|
||||||
|
|
||||||
|
func NoFilterFn(_ string, _ *BackendConfig) bool { return true }
|
||||||
|
|
||||||
|
func BuildNameFilterFn(filter string) (BackendConfigFilterFn, error) {
|
||||||
|
if filter == "" {
|
||||||
|
return NoFilterFn, nil
|
||||||
|
}
|
||||||
|
rxp, err := regexp.Compile(filter)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return func(name string, config *BackendConfig) bool {
|
||||||
|
if config != nil {
|
||||||
|
return rxp.MatchString(config.Name)
|
||||||
|
}
|
||||||
|
return rxp.MatchString(name)
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func BuildUsecaseFilterFn(usecases BackendConfigUsecases) BackendConfigFilterFn {
|
||||||
|
if usecases == FLAG_ANY {
|
||||||
|
return NoFilterFn
|
||||||
|
}
|
||||||
|
return func(name string, config *BackendConfig) bool {
|
||||||
|
if config == nil {
|
||||||
|
return false // TODO: Potentially make this a param, for now, no known usecase to include
|
||||||
|
}
|
||||||
|
return config.HasUsecases(usecases)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -201,6 +201,26 @@ func (bcl *BackendConfigLoader) GetAllBackendConfigs() []BackendConfig {
|
|||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (bcl *BackendConfigLoader) GetBackendConfigsByFilter(filter BackendConfigFilterFn) []BackendConfig {
|
||||||
|
bcl.Lock()
|
||||||
|
defer bcl.Unlock()
|
||||||
|
var res []BackendConfig
|
||||||
|
|
||||||
|
if filter == nil {
|
||||||
|
filter = NoFilterFn
|
||||||
|
}
|
||||||
|
|
||||||
|
for n, v := range bcl.configs {
|
||||||
|
if filter(n, &v) {
|
||||||
|
res = append(res, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: I don't think this one needs to Sort on name... but we'll see what breaks.
|
||||||
|
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
func (bcl *BackendConfigLoader) RemoveBackendConfig(m string) {
|
func (bcl *BackendConfigLoader) RemoveBackendConfig(m string) {
|
||||||
bcl.Lock()
|
bcl.Lock()
|
||||||
defer bcl.Unlock()
|
defer bcl.Unlock()
|
||||||
|
|||||||
@@ -19,12 +19,17 @@ var _ = Describe("Test cases for config related functions", func() {
|
|||||||
`backend: "../foo-bar"
|
`backend: "../foo-bar"
|
||||||
name: "foo"
|
name: "foo"
|
||||||
parameters:
|
parameters:
|
||||||
model: "foo-bar"`)
|
model: "foo-bar"
|
||||||
|
known_usecases:
|
||||||
|
- chat
|
||||||
|
- COMPLETION
|
||||||
|
`)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
config, err := readBackendConfigFromFile(tmp.Name())
|
config, err := readBackendConfigFromFile(tmp.Name())
|
||||||
Expect(err).To(BeNil())
|
Expect(err).To(BeNil())
|
||||||
Expect(config).ToNot(BeNil())
|
Expect(config).ToNot(BeNil())
|
||||||
Expect(config.Validate()).To(BeFalse())
|
Expect(config.Validate()).To(BeFalse())
|
||||||
|
Expect(config.KnownUsecases).ToNot(BeNil())
|
||||||
})
|
})
|
||||||
It("Test Validate", func() {
|
It("Test Validate", func() {
|
||||||
tmp, err := os.CreateTemp("", "config.yaml")
|
tmp, err := os.CreateTemp("", "config.yaml")
|
||||||
@@ -61,4 +66,99 @@ parameters:
|
|||||||
Expect(config.Validate()).To(BeTrue())
|
Expect(config.Validate()).To(BeTrue())
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
It("Properly handles backend usecase matching", func() {
|
||||||
|
|
||||||
|
a := BackendConfig{
|
||||||
|
Name: "a",
|
||||||
|
}
|
||||||
|
Expect(a.HasUsecases(FLAG_ANY)).To(BeTrue()) // FLAG_ANY just means the config _exists_ essentially.
|
||||||
|
|
||||||
|
b := BackendConfig{
|
||||||
|
Name: "b",
|
||||||
|
Backend: "stablediffusion",
|
||||||
|
}
|
||||||
|
Expect(b.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(b.HasUsecases(FLAG_IMAGE)).To(BeTrue())
|
||||||
|
Expect(b.HasUsecases(FLAG_CHAT)).To(BeFalse())
|
||||||
|
|
||||||
|
c := BackendConfig{
|
||||||
|
Name: "c",
|
||||||
|
Backend: "llama-cpp",
|
||||||
|
TemplateConfig: TemplateConfig{
|
||||||
|
Chat: "chat",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
Expect(c.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(c.HasUsecases(FLAG_IMAGE)).To(BeFalse())
|
||||||
|
Expect(c.HasUsecases(FLAG_COMPLETION)).To(BeFalse())
|
||||||
|
Expect(c.HasUsecases(FLAG_CHAT)).To(BeTrue())
|
||||||
|
|
||||||
|
d := BackendConfig{
|
||||||
|
Name: "d",
|
||||||
|
Backend: "llama-cpp",
|
||||||
|
TemplateConfig: TemplateConfig{
|
||||||
|
Chat: "chat",
|
||||||
|
Completion: "completion",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
Expect(d.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(d.HasUsecases(FLAG_IMAGE)).To(BeFalse())
|
||||||
|
Expect(d.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
|
||||||
|
Expect(d.HasUsecases(FLAG_CHAT)).To(BeTrue())
|
||||||
|
|
||||||
|
trueValue := true
|
||||||
|
e := BackendConfig{
|
||||||
|
Name: "e",
|
||||||
|
Backend: "llama-cpp",
|
||||||
|
TemplateConfig: TemplateConfig{
|
||||||
|
Completion: "completion",
|
||||||
|
},
|
||||||
|
Embeddings: &trueValue,
|
||||||
|
}
|
||||||
|
|
||||||
|
Expect(e.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(e.HasUsecases(FLAG_IMAGE)).To(BeFalse())
|
||||||
|
Expect(e.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
|
||||||
|
Expect(e.HasUsecases(FLAG_CHAT)).To(BeFalse())
|
||||||
|
Expect(e.HasUsecases(FLAG_EMBEDDINGS)).To(BeTrue())
|
||||||
|
|
||||||
|
f := BackendConfig{
|
||||||
|
Name: "f",
|
||||||
|
Backend: "piper",
|
||||||
|
}
|
||||||
|
Expect(f.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(f.HasUsecases(FLAG_TTS)).To(BeTrue())
|
||||||
|
Expect(f.HasUsecases(FLAG_CHAT)).To(BeFalse())
|
||||||
|
|
||||||
|
g := BackendConfig{
|
||||||
|
Name: "g",
|
||||||
|
Backend: "whisper",
|
||||||
|
}
|
||||||
|
Expect(g.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(g.HasUsecases(FLAG_TRANSCRIPT)).To(BeTrue())
|
||||||
|
Expect(g.HasUsecases(FLAG_TTS)).To(BeFalse())
|
||||||
|
|
||||||
|
h := BackendConfig{
|
||||||
|
Name: "h",
|
||||||
|
Backend: "transformers-musicgen",
|
||||||
|
}
|
||||||
|
Expect(h.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(h.HasUsecases(FLAG_TRANSCRIPT)).To(BeFalse())
|
||||||
|
Expect(h.HasUsecases(FLAG_TTS)).To(BeTrue())
|
||||||
|
Expect(h.HasUsecases(FLAG_SOUND_GENERATION)).To(BeTrue())
|
||||||
|
|
||||||
|
knownUsecases := FLAG_CHAT | FLAG_COMPLETION
|
||||||
|
i := BackendConfig{
|
||||||
|
Name: "i",
|
||||||
|
Backend: "whisper",
|
||||||
|
// Earlier test checks parsing, this just needs to set final values
|
||||||
|
KnownUsecases: &knownUsecases,
|
||||||
|
}
|
||||||
|
Expect(i.HasUsecases(FLAG_ANY)).To(BeTrue())
|
||||||
|
Expect(i.HasUsecases(FLAG_TRANSCRIPT)).To(BeTrue())
|
||||||
|
Expect(i.HasUsecases(FLAG_TTS)).To(BeFalse())
|
||||||
|
Expect(i.HasUsecases(FLAG_COMPLETION)).To(BeTrue())
|
||||||
|
Expect(i.HasUsecases(FLAG_CHAT)).To(BeTrue())
|
||||||
|
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -19,14 +19,16 @@ func ModelFromContext(ctx *fiber.Ctx, cl *config.BackendConfigLoader, loader *mo
|
|||||||
if ctx.Params("model") != "" {
|
if ctx.Params("model") != "" {
|
||||||
modelInput = ctx.Params("model")
|
modelInput = ctx.Params("model")
|
||||||
}
|
}
|
||||||
|
if ctx.Query("model") != "" {
|
||||||
|
modelInput = ctx.Query("model")
|
||||||
|
}
|
||||||
// Set model from bearer token, if available
|
// Set model from bearer token, if available
|
||||||
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bearer ")
|
bearer := strings.TrimLeft(ctx.Get("authorization"), "Bear ") // Reduced duplicate characters of Bearer
|
||||||
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
|
||||||
|
|
||||||
// If no model was specified, take the first available
|
// If no model was specified, take the first available
|
||||||
if modelInput == "" && !bearerExists && firstModel {
|
if modelInput == "" && !bearerExists && firstModel {
|
||||||
models, _ := services.ListModels(cl, loader, "", true)
|
models, _ := services.ListModels(cl, loader, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
if len(models) > 0 {
|
if len(models) > 0 {
|
||||||
modelInput = models[0]
|
modelInput = models[0]
|
||||||
log.Debug().Msgf("No model specified, using: %s", modelInput)
|
log.Debug().Msgf("No model specified, using: %s", modelInput)
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ func SoundGenerationEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoad
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Support uploading files?
|
// TODO: Support uploading files?
|
||||||
filePath, _, err := backend.SoundGeneration(cfg.Backend, modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
|
filePath, _, err := backend.SoundGeneration(modelFile, input.Text, input.Duration, input.Temperature, input.DoSample, nil, nil, ml, appConfig, *cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,13 +45,13 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
|||||||
config.LoadOptionContextSize(appConfig.ContextSize),
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
config.LoadOptionF16(appConfig.F16),
|
config.LoadOptionF16(appConfig.F16),
|
||||||
)
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
modelFile = input.Model
|
modelFile = input.Model
|
||||||
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
} else {
|
} else {
|
||||||
modelFile = cfg.Model
|
modelFile = cfg.Model
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Request for model: %s", modelFile)
|
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||||
|
|
||||||
if input.Backend != "" {
|
if input.Backend != "" {
|
||||||
@@ -64,7 +64,7 @@ func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
|||||||
Documents: req.Documents,
|
Documents: req.Documents,
|
||||||
}
|
}
|
||||||
|
|
||||||
results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg)
|
results, err := backend.Rerank(modelFile, request, ml, appConfig, *cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
60
core/http/endpoints/localai/get_token_metrics.go
Normal file
60
core/http/endpoints/localai/get_token_metrics.go
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
package localai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TokenMetricsEndpoint is an endpoint to get TokensProcessed Per Second for Active SlotID
|
||||||
|
//
|
||||||
|
// @Summary Get TokenMetrics for Active Slot.
|
||||||
|
// @Accept json
|
||||||
|
// @Produce audio/x-wav
|
||||||
|
// @Success 200 {string} binary "generated audio/wav file"
|
||||||
|
// @Router /v1/tokenMetrics [get]
|
||||||
|
// @Router /tokenMetrics [get]
|
||||||
|
func TokenMetricsEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
|
input := new(schema.TokenMetricsRequest)
|
||||||
|
|
||||||
|
// Get input data from the request body
|
||||||
|
if err := c.BodyParser(input); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||||
|
if err != nil {
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||||
|
config.LoadOptionDebug(appConfig.Debug),
|
||||||
|
config.LoadOptionThreads(appConfig.Threads),
|
||||||
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
|
config.LoadOptionF16(appConfig.F16),
|
||||||
|
)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Err(err)
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
} else {
|
||||||
|
modelFile = cfg.Model
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Token Metrics for model: %s", modelFile)
|
||||||
|
|
||||||
|
response, err := backend.TokenMetrics(modelFile, ml, appConfig, *cfg)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return c.JSON(response)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -17,12 +17,14 @@ func SystemInformations(ml *model.ModelLoader, appConfig *config.ApplicationConf
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
loadedModels := ml.ListModels()
|
||||||
for b := range appConfig.ExternalGRPCBackends {
|
for b := range appConfig.ExternalGRPCBackends {
|
||||||
availableBackends = append(availableBackends, b)
|
availableBackends = append(availableBackends, b)
|
||||||
}
|
}
|
||||||
return c.JSON(
|
return c.JSON(
|
||||||
schema.SystemInformationResponse{
|
schema.SystemInformationResponse{
|
||||||
Backends: availableBackends,
|
Backends: availableBackends,
|
||||||
|
Models: loadedModels,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
58
core/http/endpoints/localai/tokenize.go
Normal file
58
core/http/endpoints/localai/tokenize.go
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
package localai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/mudler/LocalAI/core/backend"
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
"github.com/rs/zerolog/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TokenizeEndpoint exposes a REST API to tokenize the content
|
||||||
|
// @Summary Tokenize the input.
|
||||||
|
// @Success 200 {object} schema.TokenizeResponse "Response"
|
||||||
|
// @Router /v1/tokenize [post]
|
||||||
|
func TokenizeEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
||||||
|
return func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
|
input := new(schema.TokenizeRequest)
|
||||||
|
|
||||||
|
// Get input data from the request body
|
||||||
|
if err := c.BodyParser(input); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
modelFile, err := fiberContext.ModelFromContext(c, cl, ml, input.Model, false)
|
||||||
|
if err != nil {
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
|
||||||
|
config.LoadOptionDebug(appConfig.Debug),
|
||||||
|
config.LoadOptionThreads(appConfig.Threads),
|
||||||
|
config.LoadOptionContextSize(appConfig.ContextSize),
|
||||||
|
config.LoadOptionF16(appConfig.F16),
|
||||||
|
)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Err(err)
|
||||||
|
modelFile = input.Model
|
||||||
|
log.Warn().Msgf("Model not found in context: %s", input.Model)
|
||||||
|
} else {
|
||||||
|
modelFile = cfg.Model
|
||||||
|
}
|
||||||
|
log.Debug().Msgf("Request for model: %s", modelFile)
|
||||||
|
|
||||||
|
tokenResponse, err := backend.ModelTokenize(input.Content, ml, *cfg, appConfig)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
c.JSON(tokenResponse)
|
||||||
|
return nil
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -13,7 +13,7 @@ import (
|
|||||||
func WelcomeEndpoint(appConfig *config.ApplicationConfig,
|
func WelcomeEndpoint(appConfig *config.ApplicationConfig,
|
||||||
cl *config.BackendConfigLoader, ml *model.ModelLoader, modelStatus func() (map[string]string, map[string]string)) func(*fiber.Ctx) error {
|
cl *config.BackendConfigLoader, ml *model.ModelLoader, modelStatus func() (map[string]string, map[string]string)) func(*fiber.Ctx) error {
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
models, _ := services.ListModels(cl, ml, "", true)
|
models, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
backendConfigs := cl.GetAllBackendConfigs()
|
backendConfigs := cl.GetAllBackendConfigs()
|
||||||
|
|
||||||
galleryConfigs := map[string]*gallery.Config{}
|
galleryConfigs := map[string]*gallery.Config{}
|
||||||
@@ -32,18 +32,10 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
|
|||||||
// Get model statuses to display in the UI the operation in progress
|
// Get model statuses to display in the UI the operation in progress
|
||||||
processingModels, taskTypes := modelStatus()
|
processingModels, taskTypes := modelStatus()
|
||||||
|
|
||||||
modelsWithoutConfig := []string{}
|
|
||||||
|
|
||||||
for _, m := range models {
|
|
||||||
if _, ok := modelsWithBackendConfig[m]; !ok {
|
|
||||||
modelsWithoutConfig = append(modelsWithoutConfig, m)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
"Title": "LocalAI API - " + internal.PrintableVersion(),
|
"Title": "LocalAI API - " + internal.PrintableVersion(),
|
||||||
"Version": internal.PrintableVersion(),
|
"Version": internal.PrintableVersion(),
|
||||||
"Models": modelsWithoutConfig,
|
"Models": models,
|
||||||
"ModelsConfig": backendConfigs,
|
"ModelsConfig": backendConfigs,
|
||||||
"GalleryConfig": galleryConfigs,
|
"GalleryConfig": galleryConfigs,
|
||||||
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
"IsP2PEnabled": p2p.IsP2PEnabled(),
|
||||||
|
|||||||
@@ -225,7 +225,7 @@ func filterAssistantsAfterID(assistants []Assistant, id string) []Assistant {
|
|||||||
|
|
||||||
func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelName string) (found bool) {
|
func modelExists(cl *config.BackendConfigLoader, ml *model.ModelLoader, modelName string) (found bool) {
|
||||||
found = false
|
found = false
|
||||||
models, err := services.ListModels(cl, ml, "", true)
|
models, err := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -161,6 +161,12 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
|||||||
textContentToReturn = ""
|
textContentToReturn = ""
|
||||||
id = uuid.New().String()
|
id = uuid.New().String()
|
||||||
created = int(time.Now().Unix())
|
created = int(time.Now().Unix())
|
||||||
|
// Set CorrelationID
|
||||||
|
correlationID := c.Get("X-Correlation-ID")
|
||||||
|
if len(strings.TrimSpace(correlationID)) == 0 {
|
||||||
|
correlationID = id
|
||||||
|
}
|
||||||
|
c.Set("X-Correlation-ID", correlationID)
|
||||||
|
|
||||||
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
modelFile, input, err := readRequest(c, cl, ml, startupOptions, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -444,6 +450,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
|
|||||||
c.Set("Cache-Control", "no-cache")
|
c.Set("Cache-Control", "no-cache")
|
||||||
c.Set("Connection", "keep-alive")
|
c.Set("Connection", "keep-alive")
|
||||||
c.Set("Transfer-Encoding", "chunked")
|
c.Set("Transfer-Encoding", "chunked")
|
||||||
|
c.Set("X-Correlation-ID", id)
|
||||||
|
|
||||||
responses := make(chan schema.OpenAIResponse)
|
responses := make(chan schema.OpenAIResponse)
|
||||||
|
|
||||||
|
|||||||
@@ -57,6 +57,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
|
|||||||
}
|
}
|
||||||
|
|
||||||
return func(c *fiber.Ctx) error {
|
return func(c *fiber.Ctx) error {
|
||||||
|
// Add Correlation
|
||||||
|
c.Set("X-Correlation-ID", id)
|
||||||
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
modelFile, input, err := readRequest(c, cl, ml, appConfig, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed reading parameters from request:%w", err)
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
||||||
|
|||||||
@@ -18,32 +18,32 @@ func ListModelsEndpoint(bcl *config.BackendConfigLoader, ml *model.ModelLoader)
|
|||||||
filter := c.Query("filter")
|
filter := c.Query("filter")
|
||||||
|
|
||||||
// By default, exclude any loose files that are already referenced by a configuration file.
|
// By default, exclude any loose files that are already referenced by a configuration file.
|
||||||
excludeConfigured := c.QueryBool("excludeConfigured", true)
|
var policy services.LooseFilePolicy
|
||||||
|
if c.QueryBool("excludeConfigured", true) {
|
||||||
|
policy = services.SKIP_IF_CONFIGURED
|
||||||
|
} else {
|
||||||
|
policy = services.ALWAYS_INCLUDE // This replicates current behavior. TODO: give more options to the user?
|
||||||
|
}
|
||||||
|
|
||||||
dataModels, err := modelList(bcl, ml, filter, excludeConfigured)
|
filterFn, err := config.BuildNameFilterFn(filter)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
modelNames, err := services.ListModels(bcl, ml, filterFn, policy)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Map from a slice of names to a slice of OpenAIModel response objects
|
||||||
|
dataModels := []schema.OpenAIModel{}
|
||||||
|
for _, m := range modelNames {
|
||||||
|
dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
|
||||||
|
}
|
||||||
|
|
||||||
return c.JSON(schema.ModelsDataResponse{
|
return c.JSON(schema.ModelsDataResponse{
|
||||||
Object: "list",
|
Object: "list",
|
||||||
Data: dataModels,
|
Data: dataModels,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func modelList(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter string, excludeConfigured bool) ([]schema.OpenAIModel, error) {
|
|
||||||
|
|
||||||
models, err := services.ListModels(bcl, ml, filter, excludeConfigured)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
dataModels := []schema.OpenAIModel{}
|
|
||||||
|
|
||||||
// Then iterate through the loose files:
|
|
||||||
for _, m := range models {
|
|
||||||
dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
|
|
||||||
}
|
|
||||||
|
|
||||||
return dataModels, nil
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -6,15 +6,22 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
|
"github.com/google/uuid"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
fiberContext "github.com/mudler/LocalAI/core/http/ctx"
|
||||||
"github.com/mudler/LocalAI/core/schema"
|
"github.com/mudler/LocalAI/core/schema"
|
||||||
"github.com/mudler/LocalAI/pkg/functions"
|
"github.com/mudler/LocalAI/pkg/functions"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
"github.com/mudler/LocalAI/pkg/templates"
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type correlationIDKeyType string
|
||||||
|
|
||||||
|
// CorrelationIDKey to track request across process boundary
|
||||||
|
const CorrelationIDKey correlationIDKeyType = "correlationID"
|
||||||
|
|
||||||
func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLoader, o *config.ApplicationConfig, firstModel bool) (string, *schema.OpenAIRequest, error) {
|
||||||
input := new(schema.OpenAIRequest)
|
input := new(schema.OpenAIRequest)
|
||||||
|
|
||||||
@@ -24,9 +31,14 @@ func readRequest(c *fiber.Ctx, cl *config.BackendConfigLoader, ml *model.ModelLo
|
|||||||
}
|
}
|
||||||
|
|
||||||
received, _ := json.Marshal(input)
|
received, _ := json.Marshal(input)
|
||||||
|
// Extract or generate the correlation ID
|
||||||
|
correlationID := c.Get("X-Correlation-ID", uuid.New().String())
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(o.Context)
|
ctx, cancel := context.WithCancel(o.Context)
|
||||||
input.Context = ctx
|
// Add the correlation ID to the new context
|
||||||
|
ctxWithCorrelationID := context.WithValue(ctx, CorrelationIDKey, correlationID)
|
||||||
|
|
||||||
|
input.Context = ctxWithCorrelationID
|
||||||
input.Cancel = cancel
|
input.Cancel = cancel
|
||||||
|
|
||||||
log.Debug().Msgf("Request received: %s", string(received))
|
log.Debug().Msgf("Request received: %s", string(received))
|
||||||
@@ -157,8 +169,13 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
|||||||
continue CONTENT
|
continue CONTENT
|
||||||
}
|
}
|
||||||
input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
|
input.Messages[i].StringVideos = append(input.Messages[i].StringVideos, base64) // TODO: make sure that we only return base64 stuff
|
||||||
|
|
||||||
|
t := "[vid-{{.ID}}]{{.Text}}"
|
||||||
|
if config.TemplateConfig.Video != "" {
|
||||||
|
t = config.TemplateConfig.Video
|
||||||
|
}
|
||||||
// set a placeholder for each image
|
// set a placeholder for each image
|
||||||
input.Messages[i].StringContent = fmt.Sprintf("[vid-%d]", vidIndex) + input.Messages[i].StringContent
|
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, vidIndex, input.Messages[i].StringContent)
|
||||||
vidIndex++
|
vidIndex++
|
||||||
case "audio_url", "audio":
|
case "audio_url", "audio":
|
||||||
// Decode content as base64 either if it's an URL or base64 text
|
// Decode content as base64 either if it's an URL or base64 text
|
||||||
@@ -169,7 +186,11 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
|||||||
}
|
}
|
||||||
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
|
input.Messages[i].StringAudios = append(input.Messages[i].StringAudios, base64) // TODO: make sure that we only return base64 stuff
|
||||||
// set a placeholder for each image
|
// set a placeholder for each image
|
||||||
input.Messages[i].StringContent = fmt.Sprintf("[audio-%d]", audioIndex) + input.Messages[i].StringContent
|
t := "[audio-{{.ID}}]{{.Text}}"
|
||||||
|
if config.TemplateConfig.Audio != "" {
|
||||||
|
t = config.TemplateConfig.Audio
|
||||||
|
}
|
||||||
|
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, audioIndex, input.Messages[i].StringContent)
|
||||||
audioIndex++
|
audioIndex++
|
||||||
case "image_url", "image":
|
case "image_url", "image":
|
||||||
// Decode content as base64 either if it's an URL or base64 text
|
// Decode content as base64 either if it's an URL or base64 text
|
||||||
@@ -178,9 +199,14 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
|
|||||||
log.Error().Msgf("Failed encoding image: %s", err)
|
log.Error().Msgf("Failed encoding image: %s", err)
|
||||||
continue CONTENT
|
continue CONTENT
|
||||||
}
|
}
|
||||||
|
|
||||||
|
t := "[img-{{.ID}}]{{.Text}}"
|
||||||
|
if config.TemplateConfig.Image != "" {
|
||||||
|
t = config.TemplateConfig.Image
|
||||||
|
}
|
||||||
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
input.Messages[i].StringImages = append(input.Messages[i].StringImages, base64) // TODO: make sure that we only return base64 stuff
|
||||||
// set a placeholder for each image
|
// set a placeholder for each image
|
||||||
input.Messages[i].StringContent = fmt.Sprintf("[img-%d]", imgIndex) + input.Messages[i].StringContent
|
input.Messages[i].StringContent, _ = templates.TemplateMultiModal(t, imgIndex, input.Messages[i].StringContent)
|
||||||
imgIndex++
|
imgIndex++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -63,4 +63,7 @@ func RegisterLocalAIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
app.Get("/system", localai.SystemInformations(ml, appConfig))
|
app.Get("/system", localai.SystemInformations(ml, appConfig))
|
||||||
|
|
||||||
|
// misc
|
||||||
|
app.Post("/v1/tokenize", localai.TokenizeEndpoint(cl, ml, appConfig))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -303,7 +303,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
// Show the Chat page
|
// Show the Chat page
|
||||||
app.Get("/chat/:model", func(c *fiber.Ctx) error {
|
app.Get("/chat/:model", func(c *fiber.Ctx) error {
|
||||||
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
|
|
||||||
summary := fiber.Map{
|
summary := fiber.Map{
|
||||||
"Title": "LocalAI - Chat with " + c.Params("model"),
|
"Title": "LocalAI - Chat with " + c.Params("model"),
|
||||||
@@ -318,7 +318,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
})
|
})
|
||||||
|
|
||||||
app.Get("/talk/", func(c *fiber.Ctx) error {
|
app.Get("/talk/", func(c *fiber.Ctx) error {
|
||||||
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
|
|
||||||
if len(backendConfigs) == 0 {
|
if len(backendConfigs) == 0 {
|
||||||
// If no model is available redirect to the index which suggests how to install models
|
// If no model is available redirect to the index which suggests how to install models
|
||||||
@@ -339,7 +339,7 @@ func RegisterUIRoutes(app *fiber.App,
|
|||||||
|
|
||||||
app.Get("/chat/", func(c *fiber.Ctx) error {
|
app.Get("/chat/", func(c *fiber.Ctx) error {
|
||||||
|
|
||||||
backendConfigs, _ := services.ListModels(cl, ml, "", true)
|
backendConfigs, _ := services.ListModels(cl, ml, config.NoFilterFn, services.SKIP_IF_CONFIGURED)
|
||||||
|
|
||||||
if len(backendConfigs) == 0 {
|
if len(backendConfigs) == 0 {
|
||||||
// If no model is available redirect to the index which suggests how to install models
|
// If no model is available redirect to the index which suggests how to install models
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"net"
|
"net"
|
||||||
|
|
||||||
"github.com/mudler/edgevpn/pkg/node"
|
"github.com/mudler/edgevpn/pkg/node"
|
||||||
@@ -41,7 +42,7 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
|
|||||||
log.Error().Err(err).Msg("Error listening")
|
log.Error().Err(err).Msg("Error listening")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// ll.Info("Binding local port on", srcaddr)
|
|
||||||
go func() {
|
go func() {
|
||||||
<-ctx.Done()
|
<-ctx.Done()
|
||||||
l.Close()
|
l.Close()
|
||||||
@@ -82,6 +83,7 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
|
|||||||
|
|
||||||
if workerID == "" {
|
if workerID == "" {
|
||||||
log.Error().Msg("No available nodes yet")
|
log.Error().Msg("No available nodes yet")
|
||||||
|
fs.sendHTMLResponse(conn, 503, "Sorry, waiting for nodes to connect")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,6 +91,7 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
|
|||||||
nodeData, exists := GetNode(fs.service, workerID)
|
nodeData, exists := GetNode(fs.service, workerID)
|
||||||
if !exists {
|
if !exists {
|
||||||
log.Error().Msgf("Node %s not found", workerID)
|
log.Error().Msgf("Node %s not found", workerID)
|
||||||
|
fs.sendHTMLResponse(conn, 404, "Node not found")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -100,3 +103,42 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sendHTMLResponse sends a basic HTML response with a status code and a message.
|
||||||
|
// This is extracted to make the HTML content maintainable.
|
||||||
|
func (fs *FederatedServer) sendHTMLResponse(conn net.Conn, statusCode int, message string) {
|
||||||
|
defer conn.Close()
|
||||||
|
|
||||||
|
// Define the HTML content separately for easier maintenance.
|
||||||
|
htmlContent := fmt.Sprintf("<html><body><h1>%s</h1></body></html>\r\n", message)
|
||||||
|
|
||||||
|
// Create the HTTP response with dynamic status code and content.
|
||||||
|
response := fmt.Sprintf(
|
||||||
|
"HTTP/1.1 %d %s\r\n"+
|
||||||
|
"Content-Type: text/html\r\n"+
|
||||||
|
"Connection: close\r\n"+
|
||||||
|
"\r\n"+
|
||||||
|
"%s",
|
||||||
|
statusCode, getHTTPStatusText(statusCode), htmlContent,
|
||||||
|
)
|
||||||
|
|
||||||
|
// Write the response to the client connection.
|
||||||
|
_, writeErr := io.WriteString(conn, response)
|
||||||
|
if writeErr != nil {
|
||||||
|
log.Error().Err(writeErr).Msg("Error writing response to client")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getHTTPStatusText returns a textual representation of HTTP status codes.
|
||||||
|
func getHTTPStatusText(statusCode int) string {
|
||||||
|
switch statusCode {
|
||||||
|
case 503:
|
||||||
|
return "Service Unavailable"
|
||||||
|
case 404:
|
||||||
|
return "Not Found"
|
||||||
|
case 200:
|
||||||
|
return "OK"
|
||||||
|
default:
|
||||||
|
return "Unknown Status"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package schema
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/mudler/LocalAI/core/p2p"
|
"github.com/mudler/LocalAI/core/p2p"
|
||||||
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
gopsutil "github.com/shirou/gopsutil/v3/process"
|
gopsutil "github.com/shirou/gopsutil/v3/process"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -9,6 +10,10 @@ type BackendMonitorRequest struct {
|
|||||||
Model string `json:"model" yaml:"model"`
|
Model string `json:"model" yaml:"model"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type TokenMetricsRequest struct {
|
||||||
|
Model string `json:"model" yaml:"model"`
|
||||||
|
}
|
||||||
|
|
||||||
type BackendMonitorResponse struct {
|
type BackendMonitorResponse struct {
|
||||||
MemoryInfo *gopsutil.MemoryInfoStat
|
MemoryInfo *gopsutil.MemoryInfoStat
|
||||||
MemoryPercent float32
|
MemoryPercent float32
|
||||||
@@ -73,4 +78,5 @@ type P2PNodesResponse struct {
|
|||||||
|
|
||||||
type SystemInformationResponse struct {
|
type SystemInformationResponse struct {
|
||||||
Backends []string `json:"backends"`
|
Backends []string `json:"backends"`
|
||||||
|
Models []model.Model `json:"loaded_models"`
|
||||||
}
|
}
|
||||||
|
|||||||
10
core/schema/tokenize.go
Normal file
10
core/schema/tokenize.go
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
package schema
|
||||||
|
|
||||||
|
type TokenizeRequest struct {
|
||||||
|
Content string `json:"content"`
|
||||||
|
Model string `json:"model"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type TokenizeResponse struct {
|
||||||
|
Tokens []int32 `json:"tokens"`
|
||||||
|
}
|
||||||
@@ -1,57 +1,49 @@
|
|||||||
package services
|
package services
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"regexp"
|
|
||||||
|
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ListModels(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter string, excludeConfigured bool) ([]string, error) {
|
type LooseFilePolicy int
|
||||||
|
|
||||||
|
const (
|
||||||
|
SKIP_IF_CONFIGURED LooseFilePolicy = iota
|
||||||
|
SKIP_ALWAYS
|
||||||
|
ALWAYS_INCLUDE
|
||||||
|
LOOSE_ONLY
|
||||||
|
)
|
||||||
|
|
||||||
|
func ListModels(bcl *config.BackendConfigLoader, ml *model.ModelLoader, filter config.BackendConfigFilterFn, looseFilePolicy LooseFilePolicy) ([]string, error) {
|
||||||
|
|
||||||
|
var skipMap map[string]interface{} = map[string]interface{}{}
|
||||||
|
|
||||||
|
dataModels := []string{}
|
||||||
|
|
||||||
|
// Start with known configurations
|
||||||
|
if looseFilePolicy != LOOSE_ONLY {
|
||||||
|
for _, c := range bcl.GetBackendConfigsByFilter(filter) {
|
||||||
|
if looseFilePolicy == SKIP_IF_CONFIGURED {
|
||||||
|
skipMap[c.Model] = nil
|
||||||
|
}
|
||||||
|
dataModels = append(dataModels, c.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then iterate through the loose files if requested.
|
||||||
|
if looseFilePolicy != SKIP_ALWAYS {
|
||||||
|
|
||||||
models, err := ml.ListFilesInModelPath()
|
models, err := ml.ListFilesInModelPath()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var mm map[string]interface{} = map[string]interface{}{}
|
|
||||||
|
|
||||||
dataModels := []string{}
|
|
||||||
|
|
||||||
var filterFn func(name string) bool
|
|
||||||
|
|
||||||
// If filter is not specified, do not filter the list by model name
|
|
||||||
if filter == "" {
|
|
||||||
filterFn = func(_ string) bool { return true }
|
|
||||||
} else {
|
|
||||||
// If filter _IS_ specified, we compile it to a regex which is used to create the filterFn
|
|
||||||
rxp, err := regexp.Compile(filter)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
filterFn = func(name string) bool {
|
|
||||||
return rxp.MatchString(name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start with the known configurations
|
|
||||||
for _, c := range bcl.GetAllBackendConfigs() {
|
|
||||||
if excludeConfigured {
|
|
||||||
mm[c.Model] = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if filterFn(c.Name) {
|
|
||||||
dataModels = append(dataModels, c.Name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Then iterate through the loose files:
|
|
||||||
for _, m := range models {
|
for _, m := range models {
|
||||||
// And only adds them if they shouldn't be skipped.
|
// And only adds them if they shouldn't be skipped.
|
||||||
if _, exists := mm[m]; !exists && filterFn(m) {
|
if _, exists := skipMap[m]; !exists && filter(m, nil) {
|
||||||
dataModels = append(dataModels, m)
|
dataModels = append(dataModels, m)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return dataModels, nil
|
return dataModels, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -160,13 +160,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
|
|||||||
|
|
||||||
log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model)
|
log.Debug().Msgf("Auto loading model %s into memory from file: %s", m, cfg.Model)
|
||||||
|
|
||||||
grpcOpts := backend.GRPCModelOpts(*cfg)
|
o := backend.ModelOptions(*cfg, options, []model.Option{})
|
||||||
o := []model.Option{
|
|
||||||
model.WithModel(cfg.Model),
|
|
||||||
model.WithAssetDir(options.AssetsDestination),
|
|
||||||
model.WithThreads(uint32(options.Threads)),
|
|
||||||
model.WithLoadGRPCLoadModelOpts(grpcOpts),
|
|
||||||
}
|
|
||||||
|
|
||||||
var backendErr error
|
var backendErr error
|
||||||
if cfg.Backend != "" {
|
if cfg.Backend != "" {
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"version": "v2.21.0"
|
"version": "v2.21.1"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
llama_index==0.11.12
|
llama_index==0.11.14
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
weaviate_client==4.8.1
|
weaviate_client==4.8.1
|
||||||
transformers
|
transformers
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
langchain==0.3.0
|
langchain==0.3.1
|
||||||
openai==1.47.1
|
openai==1.50.2
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
langchain==0.3.0
|
langchain==0.3.1
|
||||||
openai==1.47.1
|
openai==1.50.2
|
||||||
chromadb==0.5.7
|
chromadb==0.5.11
|
||||||
llama-index==0.11.12
|
llama-index==0.11.14
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
aiohttp==3.10.3
|
aiohttp==3.10.8
|
||||||
aiosignal==1.3.1
|
aiosignal==1.3.1
|
||||||
async-timeout==4.0.3
|
async-timeout==4.0.3
|
||||||
attrs==24.2.0
|
attrs==24.2.0
|
||||||
@@ -8,10 +8,10 @@ colorama==0.4.6
|
|||||||
dataclasses-json==0.6.7
|
dataclasses-json==0.6.7
|
||||||
debugpy==1.8.2
|
debugpy==1.8.2
|
||||||
frozenlist==1.4.1
|
frozenlist==1.4.1
|
||||||
greenlet==3.1.0
|
greenlet==3.1.1
|
||||||
idna==3.10
|
idna==3.10
|
||||||
langchain==0.3.0
|
langchain==0.3.1
|
||||||
langchain-community==0.2.16
|
langchain-community==0.3.1
|
||||||
marshmallow==3.22.0
|
marshmallow==3.22.0
|
||||||
marshmallow-enum==1.5.1
|
marshmallow-enum==1.5.1
|
||||||
multidict==6.0.5
|
multidict==6.0.5
|
||||||
@@ -30,4 +30,4 @@ tqdm==4.66.5
|
|||||||
typing-inspect==0.9.0
|
typing-inspect==0.9.0
|
||||||
typing_extensions==4.12.2
|
typing_extensions==4.12.2
|
||||||
urllib3==2.2.3
|
urllib3==2.2.3
|
||||||
yarl==1.11.1
|
yarl==1.13.1
|
||||||
|
|||||||
@@ -1,6 +1,90 @@
|
|||||||
---
|
---
|
||||||
## Qwen2.5
|
- name: "salamandra-7b-instruct"
|
||||||
|
icon: https://huggingface.co/BSC-LT/salamandra-7b-instruct/resolve/main/images/salamandra_header.png
|
||||||
|
# Uses chatml
|
||||||
|
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||||
|
license: apache-2.0
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/BSC-LT/salamandra-7b-instruct
|
||||||
|
- https://huggingface.co/cstr/salamandra-7b-instruct-GGUF
|
||||||
|
tags:
|
||||||
|
- llm
|
||||||
|
- gguf
|
||||||
|
- gpu
|
||||||
|
- cpu
|
||||||
|
- salamandra
|
||||||
|
description: |
|
||||||
|
Transformer-based decoder-only language model that has been pre-trained on 7.8 trillion tokens of highly curated data. The pre-training corpus contains text in 35 European languages and code.
|
||||||
|
Salamandra comes in three different sizes — 2B, 7B and 40B parameters — with their respective base and instruction-tuned variants. This model card corresponds to the 7B instructed version.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: salamandra-7b-instruct.Q4_K_M-f32.gguf
|
||||||
|
files:
|
||||||
|
- filename: salamandra-7b-instruct.Q4_K_M-f32.gguf
|
||||||
|
sha256: bac8e8c1d1d9d53cbdb148b8ff9ad378ddb392429207099e85b5aae3a43bff3d
|
||||||
|
uri: huggingface://cstr/salamandra-7b-instruct-GGUF/salamandra-7b-instruct.Q4_K_M-f32.gguf
|
||||||
|
## llama3.2
|
||||||
|
- &llama32
|
||||||
|
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
|
||||||
|
license: llama3.2
|
||||||
|
description: |
|
||||||
|
The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.
|
||||||
|
|
||||||
|
Model Developer: Meta
|
||||||
|
|
||||||
|
Model Architecture: Llama 3.2 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.
|
||||||
|
tags:
|
||||||
|
- llm
|
||||||
|
- gguf
|
||||||
|
- gpu
|
||||||
|
- cpu
|
||||||
|
- llama3.2
|
||||||
|
name: "llama-3.2-1b-instruct:q4_k_m"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: llama-3.2-1b-instruct-q4_k_m.gguf
|
||||||
|
files:
|
||||||
|
- filename: llama-3.2-1b-instruct-q4_k_m.gguf
|
||||||
|
sha256: 1d0e9419ec4e12aef73ccf4ffd122703e94c48344a96bc7c5f0f2772c2152ce3
|
||||||
|
uri: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
|
||||||
|
- !!merge <<: *llama32
|
||||||
|
name: "llama-3.2-3b-instruct:q4_k_m"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: llama-3.2-3b-instruct-q4_k_m.gguf
|
||||||
|
files:
|
||||||
|
- filename: llama-3.2-3b-instruct-q4_k_m.gguf
|
||||||
|
sha256: c55a83bfb6396799337853ca69918a0b9bbb2917621078c34570bc17d20fd7a1
|
||||||
|
uri: huggingface://hugging-quants/Llama-3.2-3B-Instruct-Q4_K_M-GGUF/llama-3.2-3b-instruct-q4_k_m.gguf
|
||||||
|
- !!merge <<: *llama32
|
||||||
|
name: "llama-3.2-3b-instruct:q8_0"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: llama-3.2-3b-instruct-q8_0.gguf
|
||||||
|
files:
|
||||||
|
- filename: llama-3.2-3b-instruct-q8_0.gguf
|
||||||
|
sha256: 51725f77f997a5080c3d8dd66e073da22ddf48ab5264f21f05ded9b202c3680e
|
||||||
|
uri: huggingface://hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF/llama-3.2-3b-instruct-q8_0.gguf
|
||||||
|
- !!merge <<: *llama32
|
||||||
|
name: "llama-3.2-1b-instruct:q8_0"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: llama-3.2-1b-instruct-q8_0.gguf
|
||||||
|
files:
|
||||||
|
- filename: llama-3.2-1b-instruct-q8_0.gguf
|
||||||
|
sha256: ba345c83bf5cc679c653b853c46517eea5a34f03ed2205449db77184d9ae62a9
|
||||||
|
uri: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF/llama-3.2-1b-instruct-q8_0.gguf
|
||||||
- &qwen25
|
- &qwen25
|
||||||
|
## Qwen2.5
|
||||||
name: "qwen2.5-14b-instruct"
|
name: "qwen2.5-14b-instruct"
|
||||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||||
license: apache-2.0
|
license: apache-2.0
|
||||||
@@ -135,8 +219,8 @@
|
|||||||
model: Qwen2.5-32B.Q4_K_M.gguf
|
model: Qwen2.5-32B.Q4_K_M.gguf
|
||||||
files:
|
files:
|
||||||
- filename: Qwen2.5-32B.Q4_K_M.gguf
|
- filename: Qwen2.5-32B.Q4_K_M.gguf
|
||||||
sha256: 02703e27c8b964db445444581a6937ad7538f0c32a100b26b49fa0e8ff527155
|
|
||||||
uri: huggingface://mradermacher/Qwen2.5-32B-GGUF/Qwen2.5-32B.Q4_K_M.gguf
|
uri: huggingface://mradermacher/Qwen2.5-32B-GGUF/Qwen2.5-32B.Q4_K_M.gguf
|
||||||
|
sha256: fa42a4067e3630929202b6bb1ef5cebc43c1898494aedfd567b7d53c7a9d84a6
|
||||||
- !!merge <<: *qwen25
|
- !!merge <<: *qwen25
|
||||||
name: "qwen2.5-32b-instruct"
|
name: "qwen2.5-32b-instruct"
|
||||||
urls:
|
urls:
|
||||||
@@ -161,8 +245,82 @@
|
|||||||
- filename: Qwen2.5-72B-Instruct-Q4_K_M.gguf
|
- filename: Qwen2.5-72B-Instruct-Q4_K_M.gguf
|
||||||
sha256: e4c8fad16946be8cf0bbf67eb8f4e18fc7415a5a6d2854b4cda453edb4082545
|
sha256: e4c8fad16946be8cf0bbf67eb8f4e18fc7415a5a6d2854b4cda453edb4082545
|
||||||
uri: huggingface://bartowski/Qwen2.5-72B-Instruct-GGUF/Qwen2.5-72B-Instruct-Q4_K_M.gguf
|
uri: huggingface://bartowski/Qwen2.5-72B-Instruct-GGUF/Qwen2.5-72B-Instruct-Q4_K_M.gguf
|
||||||
## SmolLM
|
- !!merge <<: *qwen25
|
||||||
|
name: "bigqwen2.5-52b-instruct"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/98GiKtmH1AtHHbIbOUH4Y.jpeg
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/mlabonne/BigQwen2.5-52B-Instruct
|
||||||
|
- https://huggingface.co/bartowski/BigQwen2.5-52B-Instruct-GGUF
|
||||||
|
description: |
|
||||||
|
BigQwen2.5-52B-Instruct is a Qwen/Qwen2-32B-Instruct self-merge made with MergeKit.
|
||||||
|
It applies the mlabonne/Meta-Llama-3-120B-Instruct recipe.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: BigQwen2.5-52B-Instruct-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: BigQwen2.5-52B-Instruct-Q4_K_M.gguf
|
||||||
|
sha256: 9c939f08e366b51b07096eb2ecb5cc2a82894ac7baf639e446237ad39889c896
|
||||||
|
uri: huggingface://bartowski/BigQwen2.5-52B-Instruct-GGUF/BigQwen2.5-52B-Instruct-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "replete-llm-v2.5-qwen-14b"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/ihnWXDEgV-ZKN_B036U1J.png
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Replete-AI/Replete-LLM-V2.5-Qwen-14b
|
||||||
|
- https://huggingface.co/bartowski/Replete-LLM-V2.5-Qwen-14b-GGUF
|
||||||
|
description: |
|
||||||
|
Replete-LLM-V2.5-Qwen-14b is a continues finetuned version of Qwen2.5-14B. I noticed recently that the Qwen team did not learn from my methods of continuous finetuning, the great benefits, and no downsides of it. So I took it upon myself to merge the instruct model with the base model myself using the Ties merge method
|
||||||
|
|
||||||
|
This version of the model shows higher performance than the original instruct and base models.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Replete-LLM-V2.5-Qwen-14b-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Replete-LLM-V2.5-Qwen-14b-Q4_K_M.gguf
|
||||||
|
sha256: 17d0792ff5e3062aecb965629f66e679ceb407e4542e8045993dcfe9e7e14d9d
|
||||||
|
uri: huggingface://bartowski/Replete-LLM-V2.5-Qwen-14b-GGUF/Replete-LLM-V2.5-Qwen-14b-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "replete-llm-v2.5-qwen-7b"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/ihnWXDEgV-ZKN_B036U1J.png
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/Replete-AI/Replete-LLM-V2.5-Qwen-7b
|
||||||
|
- https://huggingface.co/bartowski/Replete-LLM-V2.5-Qwen-7b-GGUF
|
||||||
|
description: |
|
||||||
|
Replete-LLM-V2.5-Qwen-7b is a continues finetuned version of Qwen2.5-14B. I noticed recently that the Qwen team did not learn from my methods of continuous finetuning, the great benefits, and no downsides of it. So I took it upon myself to merge the instruct model with the base model myself using the Ties merge method
|
||||||
|
|
||||||
|
This version of the model shows higher performance than the original instruct and base models.
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: Replete-LLM-V2.5-Qwen-7b-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: Replete-LLM-V2.5-Qwen-7b-Q4_K_M.gguf
|
||||||
|
sha256: 054d54972259c0398b4e0af3f408f608e1166837b1d7535d08fc440d1daf8639
|
||||||
|
uri: huggingface://bartowski/Replete-LLM-V2.5-Qwen-7b-GGUF/Replete-LLM-V2.5-Qwen-7b-Q4_K_M.gguf
|
||||||
|
- !!merge <<: *qwen25
|
||||||
|
name: "calme-2.2-qwen2.5-72b-i1"
|
||||||
|
icon: https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2.5-72b/resolve/main/calme-2.webp
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2.5-72b
|
||||||
|
- https://huggingface.co/mradermacher/calme-2.2-qwen2.5-72b-i1-GGUF
|
||||||
|
description: |
|
||||||
|
This model is a fine-tuned version of the powerful Qwen/Qwen2.5-72B-Instruct, pushing the boundaries of natural language understanding and generation even further. My goal was to create a versatile and robust model that excels across a wide range of benchmarks and real-world applications.
|
||||||
|
Use Cases
|
||||||
|
|
||||||
|
This model is suitable for a wide range of applications, including but not limited to:
|
||||||
|
|
||||||
|
Advanced question-answering systems
|
||||||
|
Intelligent chatbots and virtual assistants
|
||||||
|
Content generation and summarization
|
||||||
|
Code generation and analysis
|
||||||
|
Complex problem-solving and decision support
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: calme-2.2-qwen2.5-72b.i1-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: calme-2.2-qwen2.5-72b.i1-Q4_K_M.gguf
|
||||||
|
sha256: 5fdfa599724d7c78502c477ced1d294e92781b91d3265bd0748fbf15a6fefde6
|
||||||
|
uri: huggingface://mradermacher/calme-2.2-qwen2.5-72b-i1-GGUF/calme-2.2-qwen2.5-72b.i1-Q4_K_M.gguf
|
||||||
- &smollm
|
- &smollm
|
||||||
|
## SmolLM
|
||||||
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||||
name: "smollm-1.7b-instruct"
|
name: "smollm-1.7b-instruct"
|
||||||
icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
|
icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
|
||||||
@@ -1555,6 +1713,27 @@
|
|||||||
- filename: MN-12B-Lyra-v4-Q4_K_M-imat.gguf
|
- filename: MN-12B-Lyra-v4-Q4_K_M-imat.gguf
|
||||||
sha256: 1989123481ca1936c8a2cbe278ff5d1d2b0ae63dbdc838bb36a6d7547b8087b3
|
sha256: 1989123481ca1936c8a2cbe278ff5d1d2b0ae63dbdc838bb36a6d7547b8087b3
|
||||||
uri: huggingface://Lewdiculous/MN-12B-Lyra-v4-GGUF-IQ-Imatrix/MN-12B-Lyra-v4-Q4_K_M-imat.gguf
|
uri: huggingface://Lewdiculous/MN-12B-Lyra-v4-GGUF-IQ-Imatrix/MN-12B-Lyra-v4-Q4_K_M-imat.gguf
|
||||||
|
- !!merge <<: *mistral03
|
||||||
|
name: "magnusintellectus-12b-v1-i1"
|
||||||
|
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
|
||||||
|
icon: https://cdn-uploads.huggingface.co/production/uploads/66b564058d9afb7a9d5607d5/hUVJI1Qa4tCMrZWMgYkoD.png
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/GalrionSoftworks/MagnusIntellectus-12B-v1
|
||||||
|
- https://huggingface.co/mradermacher/MagnusIntellectus-12B-v1-i1-GGUF
|
||||||
|
description: |
|
||||||
|
How pleasant, the rocks appear to have made a decent conglomerate. A-.
|
||||||
|
|
||||||
|
MagnusIntellectus is a merge of the following models using LazyMergekit:
|
||||||
|
|
||||||
|
UsernameJustAnother/Nemo-12B-Marlin-v5
|
||||||
|
anthracite-org/magnum-12b-v2
|
||||||
|
overrides:
|
||||||
|
parameters:
|
||||||
|
model: MagnusIntellectus-12B-v1.i1-Q4_K_M.gguf
|
||||||
|
files:
|
||||||
|
- filename: MagnusIntellectus-12B-v1.i1-Q4_K_M.gguf
|
||||||
|
sha256: c97107983b4edc5b6f2a592d227ca2dd4196e2af3d3bc0fe6b7a8954a1fb5870
|
||||||
|
uri: huggingface://mradermacher/MagnusIntellectus-12B-v1-i1-GGUF/MagnusIntellectus-12B-v1.i1-Q4_K_M.gguf
|
||||||
- &mudler
|
- &mudler
|
||||||
### START mudler's LocalAI specific-models
|
### START mudler's LocalAI specific-models
|
||||||
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
|
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
|
||||||
|
|||||||
@@ -51,4 +51,6 @@ type Backend interface {
|
|||||||
StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error)
|
StoresFind(ctx context.Context, in *pb.StoresFindOptions, opts ...grpc.CallOption) (*pb.StoresFindResult, error)
|
||||||
|
|
||||||
Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
|
Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.CallOption) (*pb.RerankResult, error)
|
||||||
|
|
||||||
|
GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -374,3 +374,21 @@ func (c *Client) Rerank(ctx context.Context, in *pb.RerankRequest, opts ...grpc.
|
|||||||
client := pb.NewBackendClient(conn)
|
client := pb.NewBackendClient(conn)
|
||||||
return client.Rerank(ctx, in, opts...)
|
return client.Rerank(ctx, in, opts...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Client) GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error) {
|
||||||
|
if !c.parallel {
|
||||||
|
c.opMutex.Lock()
|
||||||
|
defer c.opMutex.Unlock()
|
||||||
|
}
|
||||||
|
c.setBusy(true)
|
||||||
|
defer c.setBusy(false)
|
||||||
|
c.wdMark()
|
||||||
|
defer c.wdUnMark()
|
||||||
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer conn.Close()
|
||||||
|
client := pb.NewBackendClient(conn)
|
||||||
|
return client.GetMetrics(ctx, in, opts...)
|
||||||
|
}
|
||||||
|
|||||||
@@ -87,6 +87,10 @@ func (e *embedBackend) Rerank(ctx context.Context, in *pb.RerankRequest, opts ..
|
|||||||
return e.s.Rerank(ctx, in)
|
return e.s.Rerank(ctx, in)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *embedBackend) GetTokenMetrics(ctx context.Context, in *pb.MetricsRequest, opts ...grpc.CallOption) (*pb.MetricsResponse, error) {
|
||||||
|
return e.s.GetMetrics(ctx, in)
|
||||||
|
}
|
||||||
|
|
||||||
type embedBackendServerStream struct {
|
type embedBackendServerStream struct {
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
fn func(s []byte)
|
fn func(s []byte)
|
||||||
|
|||||||
@@ -144,6 +144,8 @@ func (s *server) PredictStream(in *pb.PredictOptions, stream pb.Backend_PredictS
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
err := s.llm.PredictStream(in, resultChan)
|
err := s.llm.PredictStream(in, resultChan)
|
||||||
|
// close the channel, so if resultChan is not closed by the LLM (maybe because does not implement PredictStream), the client will not hang
|
||||||
|
close(resultChan)
|
||||||
<-done
|
<-done
|
||||||
|
|
||||||
return err
|
return err
|
||||||
|
|||||||
@@ -268,10 +268,10 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
|
|||||||
|
|
||||||
// starts the grpcModelProcess for the backend, and returns a grpc client
|
// starts the grpcModelProcess for the backend, and returns a grpc client
|
||||||
// It also loads the model
|
// It also loads the model
|
||||||
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (*Model, error) {
|
func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string, string) (*Model, error) {
|
||||||
return func(modelName, modelFile string) (*Model, error) {
|
return func(modelID, modelName, modelFile string) (*Model, error) {
|
||||||
|
|
||||||
log.Debug().Msgf("Loading Model %s with gRPC (file: %s) (backend: %s): %+v", modelName, modelFile, backend, *o)
|
log.Debug().Msgf("Loading Model %s with gRPC (file: %s) (backend: %s): %+v", modelID, modelFile, backend, *o)
|
||||||
|
|
||||||
var client *Model
|
var client *Model
|
||||||
|
|
||||||
@@ -304,18 +304,19 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||||||
return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
|
return nil, fmt.Errorf("failed allocating free ports: %s", err.Error())
|
||||||
}
|
}
|
||||||
// Make sure the process is executable
|
// Make sure the process is executable
|
||||||
if err := ml.startProcess(uri, o.model, serverAddress); err != nil {
|
process, err := ml.startProcess(uri, modelID, serverAddress)
|
||||||
|
if err != nil {
|
||||||
log.Error().Err(err).Str("path", uri).Msg("failed to launch ")
|
log.Error().Err(err).Str("path", uri).Msg("failed to launch ")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("GRPC Service Started")
|
log.Debug().Msgf("GRPC Service Started")
|
||||||
|
|
||||||
client = NewModel(serverAddress)
|
client = NewModel(modelID, serverAddress, process)
|
||||||
} else {
|
} else {
|
||||||
log.Debug().Msg("external backend is uri")
|
log.Debug().Msg("external backend is a uri")
|
||||||
// address
|
// address
|
||||||
client = NewModel(uri)
|
client = NewModel(modelID, uri, nil)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
grpcProcess := backendPath(o.assetDir, backend)
|
grpcProcess := backendPath(o.assetDir, backend)
|
||||||
@@ -346,13 +347,14 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||||||
args, grpcProcess = library.LoadLDSO(o.assetDir, args, grpcProcess)
|
args, grpcProcess = library.LoadLDSO(o.assetDir, args, grpcProcess)
|
||||||
|
|
||||||
// Make sure the process is executable in any circumstance
|
// Make sure the process is executable in any circumstance
|
||||||
if err := ml.startProcess(grpcProcess, o.model, serverAddress, args...); err != nil {
|
process, err := ml.startProcess(grpcProcess, modelID, serverAddress, args...)
|
||||||
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("GRPC Service Started")
|
log.Debug().Msgf("GRPC Service Started")
|
||||||
|
|
||||||
client = NewModel(serverAddress)
|
client = NewModel(modelID, serverAddress, process)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Wait for the service to start up")
|
log.Debug().Msgf("Wait for the service to start up")
|
||||||
@@ -374,6 +376,9 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||||||
|
|
||||||
if !ready {
|
if !ready {
|
||||||
log.Debug().Msgf("GRPC Service NOT ready")
|
log.Debug().Msgf("GRPC Service NOT ready")
|
||||||
|
if process := client.Process(); process != nil {
|
||||||
|
process.Stop()
|
||||||
|
}
|
||||||
return nil, fmt.Errorf("grpc service not ready")
|
return nil, fmt.Errorf("grpc service not ready")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -385,9 +390,15 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
|
|||||||
|
|
||||||
res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options)
|
res, err := client.GRPC(o.parallelRequests, ml.wd).LoadModel(o.context, &options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if process := client.Process(); process != nil {
|
||||||
|
process.Stop()
|
||||||
|
}
|
||||||
return nil, fmt.Errorf("could not load model: %w", err)
|
return nil, fmt.Errorf("could not load model: %w", err)
|
||||||
}
|
}
|
||||||
if !res.Success {
|
if !res.Success {
|
||||||
|
if process := client.Process(); process != nil {
|
||||||
|
process.Stop()
|
||||||
|
}
|
||||||
return nil, fmt.Errorf("could not load model (no success): %s", res.Message)
|
return nil, fmt.Errorf("could not load model (no success): %s", res.Message)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -402,11 +413,7 @@ func (ml *ModelLoader) ListAvailableBackends(assetdir string) ([]string, error)
|
|||||||
func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err error) {
|
func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err error) {
|
||||||
o := NewOptions(opts...)
|
o := NewOptions(opts...)
|
||||||
|
|
||||||
if o.model != "" {
|
log.Info().Msgf("Loading model '%s' with backend %s", o.modelID, o.backendString)
|
||||||
log.Info().Msgf("Loading model '%s' with backend %s", o.model, o.backendString)
|
|
||||||
} else {
|
|
||||||
log.Info().Msgf("Loading model with backend %s", o.backendString)
|
|
||||||
}
|
|
||||||
|
|
||||||
backend := strings.ToLower(o.backendString)
|
backend := strings.ToLower(o.backendString)
|
||||||
if realBackend, exists := Aliases[backend]; exists {
|
if realBackend, exists := Aliases[backend]; exists {
|
||||||
@@ -415,11 +422,10 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
|
|||||||
}
|
}
|
||||||
|
|
||||||
if o.singleActiveBackend {
|
if o.singleActiveBackend {
|
||||||
log.Debug().Msgf("Stopping all backends except '%s'", o.model)
|
log.Debug().Msgf("Stopping all backends except '%s'", o.modelID)
|
||||||
err := ml.StopGRPC(allExcept(o.model))
|
err := ml.StopGRPC(allExcept(o.modelID))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel")
|
log.Error().Err(err).Str("keptModel", o.modelID).Msg("error while shutting down all backends except for the keptModel")
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -433,7 +439,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
|
|||||||
backendToConsume = backend
|
backendToConsume = backend
|
||||||
}
|
}
|
||||||
|
|
||||||
model, err := ml.LoadModel(o.model, ml.grpcModel(backendToConsume, o))
|
model, err := ml.LoadModel(o.modelID, o.model, ml.grpcModel(backendToConsume, o))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -446,18 +452,18 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
|
|||||||
|
|
||||||
// Return earlier if we have a model already loaded
|
// Return earlier if we have a model already loaded
|
||||||
// (avoid looping through all the backends)
|
// (avoid looping through all the backends)
|
||||||
if m := ml.CheckIsLoaded(o.model); m != nil {
|
if m := ml.CheckIsLoaded(o.modelID); m != nil {
|
||||||
log.Debug().Msgf("Model '%s' already loaded", o.model)
|
log.Debug().Msgf("Model '%s' already loaded", o.modelID)
|
||||||
|
|
||||||
return m.GRPC(o.parallelRequests, ml.wd), nil
|
return m.GRPC(o.parallelRequests, ml.wd), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we can have only one backend active, kill all the others (except external backends)
|
// If we can have only one backend active, kill all the others (except external backends)
|
||||||
if o.singleActiveBackend {
|
if o.singleActiveBackend {
|
||||||
log.Debug().Msgf("Stopping all backends except '%s'", o.model)
|
log.Debug().Msgf("Stopping all backends except '%s'", o.modelID)
|
||||||
err := ml.StopGRPC(allExcept(o.model))
|
err := ml.StopGRPC(allExcept(o.modelID))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Err(err).Str("keptModel", o.model).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing")
|
log.Error().Err(err).Str("keptModel", o.modelID).Msg("error while shutting down all backends except for the keptModel - greedyloader continuing")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -476,23 +482,13 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
|
|||||||
|
|
||||||
log.Debug().Msgf("Loading from the following backends (in order): %+v", autoLoadBackends)
|
log.Debug().Msgf("Loading from the following backends (in order): %+v", autoLoadBackends)
|
||||||
|
|
||||||
if o.model != "" {
|
log.Info().Msgf("Trying to load the model '%s' with the backend '%s'", o.modelID, autoLoadBackends)
|
||||||
log.Info().Msgf("Trying to load the model '%s' with the backend '%s'", o.model, autoLoadBackends)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, key := range autoLoadBackends {
|
for _, key := range autoLoadBackends {
|
||||||
log.Info().Msgf("[%s] Attempting to load", key)
|
log.Info().Msgf("[%s] Attempting to load", key)
|
||||||
options := []Option{
|
options := append(opts, []Option{
|
||||||
WithBackendString(key),
|
WithBackendString(key),
|
||||||
WithModel(o.model),
|
}...)
|
||||||
WithLoadGRPCLoadModelOpts(o.gRPCOptions),
|
|
||||||
WithThreads(o.threads),
|
|
||||||
WithAssetDir(o.assetDir),
|
|
||||||
}
|
|
||||||
|
|
||||||
for k, v := range o.externalBackends {
|
|
||||||
options = append(options, WithExternalBackend(k, v))
|
|
||||||
}
|
|
||||||
|
|
||||||
model, modelerr := ml.BackendLoader(options...)
|
model, modelerr := ml.BackendLoader(options...)
|
||||||
if modelerr == nil && model != nil {
|
if modelerr == nil && model != nil {
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ import (
|
|||||||
|
|
||||||
"github.com/mudler/LocalAI/pkg/utils"
|
"github.com/mudler/LocalAI/pkg/utils"
|
||||||
|
|
||||||
process "github.com/mudler/go-processmanager"
|
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -24,7 +23,6 @@ type ModelLoader struct {
|
|||||||
ModelPath string
|
ModelPath string
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
models map[string]*Model
|
models map[string]*Model
|
||||||
grpcProcesses map[string]*process.Process
|
|
||||||
templates *templates.TemplateCache
|
templates *templates.TemplateCache
|
||||||
wd *WatchDog
|
wd *WatchDog
|
||||||
}
|
}
|
||||||
@@ -34,7 +32,6 @@ func NewModelLoader(modelPath string) *ModelLoader {
|
|||||||
ModelPath: modelPath,
|
ModelPath: modelPath,
|
||||||
models: make(map[string]*Model),
|
models: make(map[string]*Model),
|
||||||
templates: templates.NewTemplateCache(modelPath),
|
templates: templates.NewTemplateCache(modelPath),
|
||||||
grpcProcesses: make(map[string]*process.Process),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nml
|
return nml
|
||||||
@@ -105,21 +102,21 @@ FILE:
|
|||||||
return models, nil
|
return models, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) ListModels() []*Model {
|
func (ml *ModelLoader) ListModels() []Model {
|
||||||
ml.mu.Lock()
|
ml.mu.Lock()
|
||||||
defer ml.mu.Unlock()
|
defer ml.mu.Unlock()
|
||||||
|
|
||||||
models := []*Model{}
|
models := []Model{}
|
||||||
for _, model := range ml.models {
|
for _, model := range ml.models {
|
||||||
models = append(models, model)
|
models = append(models, *model)
|
||||||
}
|
}
|
||||||
|
|
||||||
return models
|
return models
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (*Model, error)) (*Model, error) {
|
func (ml *ModelLoader) LoadModel(modelID, modelName string, loader func(string, string, string) (*Model, error)) (*Model, error) {
|
||||||
// Check if we already have a loaded model
|
// Check if we already have a loaded model
|
||||||
if model := ml.CheckIsLoaded(modelName); model != nil {
|
if model := ml.CheckIsLoaded(modelID); model != nil {
|
||||||
return model, nil
|
return model, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -127,18 +124,18 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (
|
|||||||
modelFile := filepath.Join(ml.ModelPath, modelName)
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||||
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||||
|
|
||||||
model, err := loader(modelName, modelFile)
|
ml.mu.Lock()
|
||||||
|
defer ml.mu.Unlock()
|
||||||
|
model, err := loader(modelID, modelName, modelFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to load model with internal loader: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if model == nil {
|
if model == nil {
|
||||||
return nil, fmt.Errorf("loader didn't return a model")
|
return nil, fmt.Errorf("loader didn't return a model")
|
||||||
}
|
}
|
||||||
|
|
||||||
ml.mu.Lock()
|
ml.models[modelID] = model
|
||||||
defer ml.mu.Unlock()
|
|
||||||
ml.models[modelName] = model
|
|
||||||
|
|
||||||
return model, nil
|
return model, nil
|
||||||
}
|
}
|
||||||
@@ -146,14 +143,13 @@ func (ml *ModelLoader) LoadModel(modelName string, loader func(string, string) (
|
|||||||
func (ml *ModelLoader) ShutdownModel(modelName string) error {
|
func (ml *ModelLoader) ShutdownModel(modelName string) error {
|
||||||
ml.mu.Lock()
|
ml.mu.Lock()
|
||||||
defer ml.mu.Unlock()
|
defer ml.mu.Unlock()
|
||||||
|
model, ok := ml.models[modelName]
|
||||||
_, ok := ml.models[modelName]
|
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("model %s not found", modelName)
|
return fmt.Errorf("model %s not found", modelName)
|
||||||
}
|
}
|
||||||
|
|
||||||
retries := 1
|
retries := 1
|
||||||
for ml.models[modelName].GRPC(false, ml.wd).IsBusy() {
|
for model.GRPC(false, ml.wd).IsBusy() {
|
||||||
log.Debug().Msgf("%s busy. Waiting.", modelName)
|
log.Debug().Msgf("%s busy. Waiting.", modelName)
|
||||||
dur := time.Duration(retries*2) * time.Second
|
dur := time.Duration(retries*2) * time.Second
|
||||||
if dur > retryTimeout {
|
if dur > retryTimeout {
|
||||||
@@ -185,8 +181,8 @@ func (ml *ModelLoader) CheckIsLoaded(s string) *Model {
|
|||||||
if !alive {
|
if !alive {
|
||||||
log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
|
log.Warn().Msgf("GRPC Model not responding: %s", err.Error())
|
||||||
log.Warn().Msgf("Deleting the process in order to recreate it")
|
log.Warn().Msgf("Deleting the process in order to recreate it")
|
||||||
process, exists := ml.grpcProcesses[s]
|
process := m.Process()
|
||||||
if !exists {
|
if process == nil {
|
||||||
log.Error().Msgf("Process not found for '%s' and the model is not responding anymore !", s)
|
log.Error().Msgf("Process not found for '%s' and the model is not responding anymore !", s)
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -63,24 +63,24 @@ var _ = Describe("ModelLoader", func() {
|
|||||||
|
|
||||||
Context("LoadModel", func() {
|
Context("LoadModel", func() {
|
||||||
It("should load a model and keep it in memory", func() {
|
It("should load a model and keep it in memory", func() {
|
||||||
mockModel = model.NewModel("test.model")
|
mockModel = model.NewModel("foo", "test.model", nil)
|
||||||
|
|
||||||
mockLoader := func(modelName, modelFile string) (*model.Model, error) {
|
mockLoader := func(modelID, modelName, modelFile string) (*model.Model, error) {
|
||||||
return mockModel, nil
|
return mockModel, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
model, err := modelLoader.LoadModel("test.model", mockLoader)
|
model, err := modelLoader.LoadModel("foo", "test.model", mockLoader)
|
||||||
Expect(err).To(BeNil())
|
Expect(err).To(BeNil())
|
||||||
Expect(model).To(Equal(mockModel))
|
Expect(model).To(Equal(mockModel))
|
||||||
Expect(modelLoader.CheckIsLoaded("test.model")).To(Equal(mockModel))
|
Expect(modelLoader.CheckIsLoaded("foo")).To(Equal(mockModel))
|
||||||
})
|
})
|
||||||
|
|
||||||
It("should return an error if loading the model fails", func() {
|
It("should return an error if loading the model fails", func() {
|
||||||
mockLoader := func(modelName, modelFile string) (*model.Model, error) {
|
mockLoader := func(modelID, modelName, modelFile string) (*model.Model, error) {
|
||||||
return nil, errors.New("failed to load model")
|
return nil, errors.New("failed to load model")
|
||||||
}
|
}
|
||||||
|
|
||||||
model, err := modelLoader.LoadModel("test.model", mockLoader)
|
model, err := modelLoader.LoadModel("foo", "test.model", mockLoader)
|
||||||
Expect(err).To(HaveOccurred())
|
Expect(err).To(HaveOccurred())
|
||||||
Expect(model).To(BeNil())
|
Expect(model).To(BeNil())
|
||||||
})
|
})
|
||||||
@@ -88,18 +88,16 @@ var _ = Describe("ModelLoader", func() {
|
|||||||
|
|
||||||
Context("ShutdownModel", func() {
|
Context("ShutdownModel", func() {
|
||||||
It("should shutdown a loaded model", func() {
|
It("should shutdown a loaded model", func() {
|
||||||
mockModel = model.NewModel("test.model")
|
mockLoader := func(modelID, modelName, modelFile string) (*model.Model, error) {
|
||||||
|
return model.NewModel("foo", "test.model", nil), nil
|
||||||
mockLoader := func(modelName, modelFile string) (*model.Model, error) {
|
|
||||||
return mockModel, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err := modelLoader.LoadModel("test.model", mockLoader)
|
_, err := modelLoader.LoadModel("foo", "test.model", mockLoader)
|
||||||
Expect(err).To(BeNil())
|
Expect(err).To(BeNil())
|
||||||
|
|
||||||
err = modelLoader.ShutdownModel("test.model")
|
err = modelLoader.ShutdownModel("foo")
|
||||||
Expect(err).To(BeNil())
|
Expect(err).To(BeNil())
|
||||||
Expect(modelLoader.CheckIsLoaded("test.model")).To(BeNil())
|
Expect(modelLoader.CheckIsLoaded("foo")).To(BeNil())
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,18 +1,32 @@
|
|||||||
package model
|
package model
|
||||||
|
|
||||||
import grpc "github.com/mudler/LocalAI/pkg/grpc"
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
grpc "github.com/mudler/LocalAI/pkg/grpc"
|
||||||
|
process "github.com/mudler/go-processmanager"
|
||||||
|
)
|
||||||
|
|
||||||
type Model struct {
|
type Model struct {
|
||||||
|
ID string `json:"id"`
|
||||||
address string
|
address string
|
||||||
client grpc.Backend
|
client grpc.Backend
|
||||||
|
process *process.Process
|
||||||
|
sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewModel(address string) *Model {
|
func NewModel(ID, address string, process *process.Process) *Model {
|
||||||
return &Model{
|
return &Model{
|
||||||
|
ID: ID,
|
||||||
address: address,
|
address: address,
|
||||||
|
process: process,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *Model) Process() *process.Process {
|
||||||
|
return m.process
|
||||||
|
}
|
||||||
|
|
||||||
func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
|
func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
|
||||||
if m.client != nil {
|
if m.client != nil {
|
||||||
return m.client
|
return m.client
|
||||||
@@ -23,6 +37,8 @@ func (m *Model) GRPC(parallel bool, wd *WatchDog) grpc.Backend {
|
|||||||
enableWD = true
|
enableWD = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m.Lock()
|
||||||
|
defer m.Unlock()
|
||||||
m.client = grpc.NewClient(m.address, parallel, wd, enableWD)
|
m.client = grpc.NewClient(m.address, parallel, wd, enableWD)
|
||||||
return m.client
|
return m.client
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import (
|
|||||||
type Options struct {
|
type Options struct {
|
||||||
backendString string
|
backendString string
|
||||||
model string
|
model string
|
||||||
threads uint32
|
modelID string
|
||||||
assetDir string
|
assetDir string
|
||||||
context context.Context
|
context context.Context
|
||||||
|
|
||||||
@@ -68,12 +68,6 @@ func WithLoadGRPCLoadModelOpts(opts *pb.ModelOptions) Option {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func WithThreads(threads uint32) Option {
|
|
||||||
return func(o *Options) {
|
|
||||||
o.threads = threads
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func WithAssetDir(assetDir string) Option {
|
func WithAssetDir(assetDir string) Option {
|
||||||
return func(o *Options) {
|
return func(o *Options) {
|
||||||
o.assetDir = assetDir
|
o.assetDir = assetDir
|
||||||
@@ -92,6 +86,12 @@ func WithSingleActiveBackend() Option {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WithModelID(id string) Option {
|
||||||
|
return func(o *Options) {
|
||||||
|
o.modelID = id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func NewOptions(opts ...Option) *Options {
|
func NewOptions(opts ...Option) *Options {
|
||||||
o := &Options{
|
o := &Options{
|
||||||
gRPCOptions: &pb.ModelOptions{},
|
gRPCOptions: &pb.ModelOptions{},
|
||||||
|
|||||||
@@ -16,20 +16,36 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func (ml *ModelLoader) deleteProcess(s string) error {
|
func (ml *ModelLoader) deleteProcess(s string) error {
|
||||||
if _, exists := ml.grpcProcesses[s]; exists {
|
defer delete(ml.models, s)
|
||||||
if err := ml.grpcProcesses[s].Stop(); err != nil {
|
|
||||||
log.Error().Err(err).Msgf("(deleteProcess) error while deleting grpc process %s", s)
|
log.Debug().Msgf("Deleting process %s", s)
|
||||||
}
|
|
||||||
}
|
m, exists := ml.models[s]
|
||||||
delete(ml.grpcProcesses, s)
|
if !exists {
|
||||||
delete(ml.models, s)
|
log.Error().Msgf("Model does not exist %s", s)
|
||||||
|
// Nothing to do
|
||||||
return nil
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
process := m.Process()
|
||||||
|
if process == nil {
|
||||||
|
log.Error().Msgf("No process for %s", s)
|
||||||
|
// Nothing to do as there is no process
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
err := process.Stop()
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Err(err).Msgf("(deleteProcess) error while deleting process %s", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
|
func (ml *ModelLoader) StopGRPC(filter GRPCProcessFilter) error {
|
||||||
var err error = nil
|
var err error = nil
|
||||||
for k, p := range ml.grpcProcesses {
|
for k, m := range ml.models {
|
||||||
if filter(k, p) {
|
if filter(k, m.Process()) {
|
||||||
e := ml.ShutdownModel(k)
|
e := ml.ShutdownModel(k)
|
||||||
err = errors.Join(err, e)
|
err = errors.Join(err, e)
|
||||||
}
|
}
|
||||||
@@ -44,17 +60,20 @@ func (ml *ModelLoader) StopAllGRPC() error {
|
|||||||
func (ml *ModelLoader) GetGRPCPID(id string) (int, error) {
|
func (ml *ModelLoader) GetGRPCPID(id string) (int, error) {
|
||||||
ml.mu.Lock()
|
ml.mu.Lock()
|
||||||
defer ml.mu.Unlock()
|
defer ml.mu.Unlock()
|
||||||
p, exists := ml.grpcProcesses[id]
|
p, exists := ml.models[id]
|
||||||
if !exists {
|
if !exists {
|
||||||
return -1, fmt.Errorf("no grpc backend found for %s", id)
|
return -1, fmt.Errorf("no grpc backend found for %s", id)
|
||||||
}
|
}
|
||||||
return strconv.Atoi(p.PID)
|
if p.Process() == nil {
|
||||||
|
return -1, fmt.Errorf("no grpc backend found for %s", id)
|
||||||
|
}
|
||||||
|
return strconv.Atoi(p.Process().PID)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string, args ...string) error {
|
func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string, args ...string) (*process.Process, error) {
|
||||||
// Make sure the process is executable
|
// Make sure the process is executable
|
||||||
if err := os.Chmod(grpcProcess, 0700); err != nil {
|
if err := os.Chmod(grpcProcess, 0700); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Loading GRPC Process: %s", grpcProcess)
|
log.Debug().Msgf("Loading GRPC Process: %s", grpcProcess)
|
||||||
@@ -63,7 +82,7 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
|
|||||||
|
|
||||||
workDir, err := filepath.Abs(filepath.Dir(grpcProcess))
|
workDir, err := filepath.Abs(filepath.Dir(grpcProcess))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
grpcControlProcess := process.New(
|
grpcControlProcess := process.New(
|
||||||
@@ -79,10 +98,8 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
|
|||||||
ml.wd.AddAddressModelMap(serverAddress, id)
|
ml.wd.AddAddressModelMap(serverAddress, id)
|
||||||
}
|
}
|
||||||
|
|
||||||
ml.grpcProcesses[id] = grpcControlProcess
|
|
||||||
|
|
||||||
if err := grpcControlProcess.Run(); err != nil {
|
if err := grpcControlProcess.Run(); err != nil {
|
||||||
return err
|
return grpcControlProcess, err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("GRPC Service state dir: %s", grpcControlProcess.StateDir())
|
log.Debug().Msgf("GRPC Service state dir: %s", grpcControlProcess.StateDir())
|
||||||
@@ -116,5 +133,5 @@ func (ml *ModelLoader) startProcess(grpcProcess, id string, serverAddress string
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return grpcControlProcess, nil
|
||||||
}
|
}
|
||||||
|
|||||||
24
pkg/templates/multimodal.go
Normal file
24
pkg/templates/multimodal.go
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
package templates
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"text/template"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TemplateMultiModal(templateString string, templateID int, text string) (string, error) {
|
||||||
|
// compile the template
|
||||||
|
tmpl, err := template.New("template").Parse(templateString)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
result := bytes.NewBuffer(nil)
|
||||||
|
// execute the template
|
||||||
|
err = tmpl.Execute(result, struct {
|
||||||
|
ID int
|
||||||
|
Text string
|
||||||
|
}{
|
||||||
|
ID: templateID,
|
||||||
|
Text: text,
|
||||||
|
})
|
||||||
|
return result.String(), err
|
||||||
|
}
|
||||||
19
pkg/templates/multimodal_test.go
Normal file
19
pkg/templates/multimodal_test.go
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
package templates_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
. "github.com/mudler/LocalAI/pkg/templates" // Update with your module path
|
||||||
|
|
||||||
|
// Update with your module path
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = Describe("EvaluateTemplate", func() {
|
||||||
|
Context("templating simple strings for multimodal chat", func() {
|
||||||
|
It("should template messages correctly", func() {
|
||||||
|
result, err := TemplateMultiModal("[img-{{.ID}}]{{.Text}}", 1, "bar")
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
Expect(result).To(Equal("[img-1]bar"))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -972,6 +972,14 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"model.Model": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"openai.Assistant": {
|
"openai.Assistant": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -1682,6 +1690,12 @@ const docTemplate = `{
|
|||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"loaded_models": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/model.Model"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -965,6 +965,14 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"model.Model": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"openai.Assistant": {
|
"openai.Assistant": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -1675,6 +1683,12 @@
|
|||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"loaded_models": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/model.Model"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -168,6 +168,11 @@ definitions:
|
|||||||
type: string
|
type: string
|
||||||
type: array
|
type: array
|
||||||
type: object
|
type: object
|
||||||
|
model.Model:
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
openai.Assistant:
|
openai.Assistant:
|
||||||
properties:
|
properties:
|
||||||
created:
|
created:
|
||||||
@@ -652,6 +657,10 @@ definitions:
|
|||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
type: array
|
type: array
|
||||||
|
loaded_models:
|
||||||
|
items:
|
||||||
|
$ref: '#/definitions/model.Model'
|
||||||
|
type: array
|
||||||
type: object
|
type: object
|
||||||
schema.TTSRequest:
|
schema.TTSRequest:
|
||||||
description: TTS request body
|
description: TTS request body
|
||||||
|
|||||||
@@ -260,11 +260,9 @@ var _ = Describe("E2E test", func() {
|
|||||||
resp, err := http.Post(rerankerEndpoint, "application/json", bytes.NewReader(serialized))
|
resp, err := http.Post(rerankerEndpoint, "application/json", bytes.NewReader(serialized))
|
||||||
Expect(err).To(BeNil())
|
Expect(err).To(BeNil())
|
||||||
Expect(resp).ToNot(BeNil())
|
Expect(resp).ToNot(BeNil())
|
||||||
Expect(resp.StatusCode).To(Equal(200))
|
|
||||||
|
|
||||||
body, err := io.ReadAll(resp.Body)
|
body, err := io.ReadAll(resp.Body)
|
||||||
Expect(err).To(BeNil())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(body).ToNot(BeNil())
|
Expect(resp.StatusCode).To(Equal(200), fmt.Sprintf("body: %s, response: %+v", body, resp))
|
||||||
|
|
||||||
deserializedResponse := schema.JINARerankResponse{}
|
deserializedResponse := schema.JINARerankResponse{}
|
||||||
err = json.Unmarshal(body, &deserializedResponse)
|
err = json.Unmarshal(body, &deserializedResponse)
|
||||||
|
|||||||
Reference in New Issue
Block a user