Compare commits

..

1 Commits

Author SHA1 Message Date
Ettore Di Giacinto
3e8a54f4b6 chore(docs): improve
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2025-11-17 19:34:25 +01:00
173 changed files with 5946 additions and 9062 deletions

View File

@@ -1090,7 +1090,7 @@ jobs:
go-version: ['1.21.x']
steps:
- name: Clone
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
@@ -1176,7 +1176,7 @@ jobs:
go-version: ['1.21.x']
steps:
- name: Clone
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}

View File

@@ -97,7 +97,7 @@ jobs:
&& sudo apt-get install -y git
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Release space from worker
if: inputs.runs-on == 'ubuntu-latest'

View File

@@ -50,7 +50,7 @@ jobs:
go-version: ['${{ inputs.go-version }}']
steps:
- name: Clone
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
submodules: true

View File

@@ -17,7 +17,7 @@ jobs:
has-backends-darwin: ${{ steps.set-matrix.outputs.has-backends-darwin }}
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Setup Bun
uses: oven-sh/setup-bun@v2

View File

@@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Set up Go
@@ -25,7 +25,7 @@ jobs:
runs-on: macos-latest
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Set up Go
@@ -47,7 +47,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Set up Go

View File

@@ -31,7 +31,7 @@ jobs:
file: "backend/go/piper/Makefile"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Bump dependencies 🔧
id: bump
run: |

View File

@@ -12,7 +12,7 @@ jobs:
- repository: "mudler/LocalAI"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Bump dependencies 🔧
run: |
bash .github/bump_docs.sh ${{ matrix.repository }}

View File

@@ -15,7 +15,7 @@ jobs:
&& sudo add-apt-repository -y ppa:git-core/ppa \
&& sudo apt-get update \
&& sudo apt-get install -y git
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- name: Install dependencies
run: |
sudo apt-get update

View File

@@ -20,7 +20,7 @@ jobs:
skip-commit-verification: true
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Approve a PR if not already approved
run: |

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
submodules: true
- uses: actions/setup-go@v5

View File

@@ -30,7 +30,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -73,7 +73,7 @@ jobs:
uses: docker/setup-buildx-action@master
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Cache GRPC
uses: docker/build-push-action@v6

View File

@@ -16,7 +16,7 @@ jobs:
matrix:
include:
- base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04
runs-on: 'arc-runner-set'
runs-on: 'ubuntu-latest'
platforms: 'linux/amd64'
runs-on: ${{matrix.runs-on}}
steps:
@@ -43,7 +43,7 @@ jobs:
uses: docker/setup-buildx-action@master
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Cache Intel images
uses: docker/build-push-action@v6

View File

@@ -94,7 +94,7 @@ jobs:
&& sudo apt-get update \
&& sudo apt-get install -y git
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Release space from worker
if: inputs.runs-on == 'ubuntu-latest'

View File

@@ -14,7 +14,7 @@ jobs:
if: ${{ github.actor == 'localai-bot' && !contains(github.event.pull_request.title, 'chore(model gallery):') }}
steps:
- name: Checkout repository
uses: actions/checkout@v6
uses: actions/checkout@v5
- name: Approve a PR if not already approved
run: |

View File

@@ -15,7 +15,7 @@ jobs:
MODEL_NAME: gemma-3-12b-it-qat
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
ref: ${{ github.event.pull_request.head.sha }} # Checkout the PR head to get the actual changes
@@ -95,7 +95,7 @@ jobs:
MODEL_NAME: gemma-3-12b-it-qat
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
with:
fetch-depth: 0 # needed to checkout all branches for this Action to work
ref: ${{ github.event.pull_request.head.sha }} # Checkout the PR head to get the actual changes

View File

@@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Set up Go
@@ -28,7 +28,7 @@ jobs:
runs-on: macos-latest
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Set up Go
@@ -46,7 +46,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Set up Go

View File

@@ -14,7 +14,7 @@ jobs:
GO111MODULE: on
steps:
- name: Checkout Source
uses: actions/checkout@v6
uses: actions/checkout@v5
if: ${{ github.actor != 'dependabot[bot]' }}
- name: Run Gosec Security Scanner
if: ${{ github.actor != 'dependabot[bot]' }}

View File

@@ -19,7 +19,7 @@ jobs:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v6
# uses: actions/checkout@v5
# with:
# submodules: true
# - name: Dependencies
@@ -40,7 +40,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
submodules: true
- name: Dependencies
@@ -61,7 +61,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
submodules: true
- name: Dependencies
@@ -83,7 +83,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
submodules: true
- name: Dependencies
@@ -104,7 +104,7 @@ jobs:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v6
# uses: actions/checkout@v5
# with:
# submodules: true
# - name: Dependencies
@@ -124,7 +124,7 @@ jobs:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v6
# uses: actions/checkout@v5
# with:
# submodules: true
# - name: Dependencies
@@ -186,7 +186,7 @@ jobs:
# sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
# df -h
# - name: Clone
# uses: actions/checkout@v6
# uses: actions/checkout@v5
# with:
# submodules: true
# - name: Dependencies
@@ -211,7 +211,7 @@ jobs:
# runs-on: ubuntu-latest
# steps:
# - name: Clone
# uses: actions/checkout@v6
# uses: actions/checkout@v5
# with:
# submodules: true
# - name: Dependencies
@@ -232,7 +232,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
submodules: true
- name: Dependencies

View File

@@ -70,7 +70,7 @@ jobs:
sudo rm -rfv build || true
df -h
- name: Clone
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}
@@ -166,7 +166,7 @@ jobs:
sudo rm -rfv build || true
df -h
- name: Clone
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
submodules: true
- name: Dependencies
@@ -196,7 +196,7 @@ jobs:
go-version: ['1.25.x']
steps:
- name: Clone
uses: actions/checkout@v6
uses: actions/checkout@v5
with:
submodules: true
- name: Setup Go ${{ matrix.go-version }}

View File

@@ -9,7 +9,7 @@ jobs:
fail-fast: false
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
- uses: actions/setup-go@v5
with:
go-version: 'stable'

3
.gitmodules vendored
View File

@@ -1,3 +1,6 @@
[submodule "docs/themes/hugo-theme-relearn"]
path = docs/themes/hugo-theme-relearn
url = https://github.com/McShelby/hugo-theme-relearn.git
[submodule "docs/themes/lotusdocs"]
path = docs/themes/lotusdocs
url = https://github.com/colinwilson/lotusdocs

View File

@@ -332,6 +332,6 @@ RUN mkdir -p /models /backends
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
VOLUME /models /backends /configuration
VOLUME /models /backends
EXPOSE 8080
ENTRYPOINT [ "/entrypoint.sh" ]

View File

@@ -108,7 +108,7 @@ Run the installer script:
curl https://localai.io/install.sh | sh
```
For more installation options, see [Installer Options](https://localai.io/installation/).
For more installation options, see [Installer Options](https://localai.io/docs/advanced/installer/).
### macOS Download:
@@ -206,7 +206,6 @@ For more information, see [💻 Getting started](https://localai.io/basics/getti
## 📰 Latest project news
- November 2025: Major improvements to the UX. Among these: [Import models via URL](https://github.com/mudler/LocalAI/pull/7245) and [Multiple chats and history](https://github.com/mudler/LocalAI/pull/7325)
- October 2025: 🔌 [Model Context Protocol (MCP)](https://localai.io/docs/features/mcp/) support added for agentic capabilities with external tools
- September 2025: New Launcher application for MacOS and Linux, extended support to many backends for Mac and Nvidia L4T devices. Models: Added MLX-Audio, WAN 2.2. WebUI improvements and Python-based backends now ships portable python environments.
- August 2025: MLX, MLX-VLM, Diffusers and llama.cpp are now supported on Mac M1/M2/M3+ chips ( with `development` suffix in the gallery ): https://github.com/mudler/LocalAI/pull/6049 https://github.com/mudler/LocalAI/pull/6119 https://github.com/mudler/LocalAI/pull/6121 https://github.com/mudler/LocalAI/pull/6060

View File

@@ -57,7 +57,7 @@ add_library(hw_grpc_proto
${hw_proto_srcs}
${hw_proto_hdrs} )
add_executable(${TARGET} grpc-server.cpp json.hpp httplib.h)
add_executable(${TARGET} grpc-server.cpp utils.hpp json.hpp httplib.h)
target_include_directories(${TARGET} PRIVATE ../llava)
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})

View File

@@ -1,5 +1,5 @@
LLAMA_VERSION?=583cb83416467e8abf9b37349dcf1f6a0083745a
LLAMA_VERSION?=80deff3648b93727422461c41c7279ef1dac7452
LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
CMAKE_ARGS?=

View File

@@ -8,10 +8,6 @@
//
#include "server.cpp"
#include "server-task.cpp"
#include "server-queue.cpp"
#include "server-common.cpp"
// LocalAI
#include "backend.pb.h"
@@ -141,43 +137,15 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, const
}
// Extract tools and tool_choice from proto and add to data JSON
SRV_INF("[TOOLS DEBUG] parse_options: Checking for tools in proto, tools().empty()=%d, tools().size()=%zu\n",
predict->tools().empty() ? 1 : 0, predict->tools().size());
if (!predict->tools().empty()) {
SRV_INF("[TOOLS DEBUG] parse_options: Tools string from proto (first 500 chars): %s\n",
predict->tools().substr(0, std::min<size_t>(500, predict->tools().size())).c_str());
try {
// Parse tools JSON string and add to data
json tools_json = json::parse(predict->tools());
data["tools"] = tools_json;
SRV_INF("Extracted tools from proto: %s\n", predict->tools().c_str());
// Debug: Log tools count and names
if (tools_json.is_array()) {
SRV_INF("[TOOLS DEBUG] parse_options: Successfully parsed %zu tools from Go layer\n", tools_json.size());
for (size_t i = 0; i < tools_json.size(); i++) {
if (tools_json[i].contains("function") && tools_json[i]["function"].contains("name")) {
SRV_INF("[TOOLS DEBUG] parse_options: Tool %zu: %s\n", i, tools_json[i]["function"]["name"].get<std::string>().c_str());
} else if (tools_json[i].contains("name")) {
SRV_INF("[TOOLS DEBUG] parse_options: Tool %zu: %s\n", i, tools_json[i]["name"].get<std::string>().c_str());
}
}
} else {
SRV_WRN("[TOOLS DEBUG] parse_options: Parsed tools JSON is not an array: %s\n", tools_json.dump().c_str());
}
} catch (const json::parse_error& e) {
SRV_WRN("Failed to parse tools JSON from proto: %s\n", e.what());
SRV_WRN("[TOOLS DEBUG] parse_options: Tools string that failed to parse: %s\n", predict->tools().c_str());
}
} else {
SRV_INF("%s", "[TOOLS DEBUG] parse_options: No tools received from Go layer (predict->tools() is empty)\n");
}
// Debug: Verify tools are in data after extraction
if (data.contains("tools")) {
SRV_INF("[TOOLS DEBUG] parse_options: Tools successfully added to data, count: %zu\n",
data["tools"].is_array() ? data["tools"].size() : 0);
} else {
SRV_INF("%s", "[TOOLS DEBUG] parse_options: WARNING - Tools NOT in data after extraction!\n");
}
if (!predict->toolchoice().empty()) {
try {
@@ -187,11 +155,9 @@ json parse_options(bool streaming, const backend::PredictOptions* predict, const
// Store it as-is (string or object) so we can convert object to "required" later when adding to body_json
if (tool_choice_json.is_string()) {
data["tool_choice"] = tool_choice_json.get<std::string>();
SRV_DBG("[TOOLS DEBUG] Received tool_choice from Go layer: %s\n", tool_choice_json.get<std::string>().c_str());
} else {
// Store object as-is so we can detect it later and convert to "required"
data["tool_choice"] = tool_choice_json;
SRV_DBG("[TOOLS DEBUG] Received tool_choice object from Go layer: %s\n", tool_choice_json.dump().c_str());
}
SRV_INF("Extracted tool_choice from proto: %s\n", predict->toolchoice().c_str());
} catch (const json::parse_error& e) {
@@ -700,10 +666,6 @@ public:
json content_val;
try {
content_val = json::parse(msg.content());
// Handle null values - convert to empty string to avoid template errors
if (content_val.is_null()) {
content_val = "";
}
} catch (const json::parse_error&) {
// Not JSON, treat as plain string
content_val = msg.content();
@@ -745,12 +707,7 @@ public:
msg_json["content"] = content_array;
} else {
// Use content as-is (already array or not last user message)
// Ensure null values are converted to empty string
if (content_val.is_null()) {
msg_json["content"] = "";
} else {
msg_json["content"] = content_val;
}
msg_json["content"] = content_val;
}
} else if (is_last_user_msg && has_images_or_audio) {
// If no content but this is the last user message with images/audio, create content array
@@ -777,57 +734,6 @@ public:
}
}
msg_json["content"] = content_array;
} else if (msg.role() == "tool") {
// Tool role messages must have content field set, even if empty
// Jinja templates expect content to be a string, not null or object
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
if (msg.content().empty()) {
msg_json["content"] = "";
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): empty content, set to empty string\n", i);
} else {
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): content exists: %s\n",
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
// Content exists, parse and ensure it's a string
json content_val;
try {
content_val = json::parse(msg.content());
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): parsed JSON, type=%s\n",
i, content_val.is_null() ? "null" :
content_val.is_object() ? "object" :
content_val.is_string() ? "string" :
content_val.is_array() ? "array" : "other");
// Handle null values - Jinja templates expect content to be a string, not null
if (content_val.is_null()) {
msg_json["content"] = "";
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): null content, converted to empty string\n", i);
} else if (content_val.is_object()) {
// If content is an object (e.g., from tool call failures/errors), convert to string
msg_json["content"] = content_val.dump();
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): object content, converted to string: %s\n",
i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
} else if (content_val.is_string()) {
msg_json["content"] = content_val.get<std::string>();
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): string content, using as-is\n", i);
} else {
// For arrays or other types, convert to string
msg_json["content"] = content_val.dump();
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): %s content, converted to string\n",
i, content_val.is_array() ? "array" : "other type");
}
} catch (const json::parse_error&) {
// Not JSON, treat as plain string
msg_json["content"] = msg.content();
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): not JSON, using as string\n", i);
}
}
} else {
// Ensure all messages have content set (fallback for any unhandled cases)
// Jinja templates expect content to be present, default to empty string if not set
if (!msg_json.contains("content")) {
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (role=%s): no content field, adding empty string\n",
i, msg.role().c_str());
msg_json["content"] = "";
}
}
// Add optional fields for OpenAI-compatible message format
@@ -845,96 +751,14 @@ public:
try {
json tool_calls = json::parse(msg.tool_calls());
msg_json["tool_calls"] = tool_calls;
SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
// IMPORTANT: If message has tool_calls but content is empty or not set,
// set content to space " " instead of empty string "", because llama.cpp's
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
// which causes template errors when accessing message.content[:tool_start_length]
if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d has tool_calls but empty content, setting to space\n", i);
msg_json["content"] = " ";
}
// Log each tool call with name and arguments
if (tool_calls.is_array()) {
for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
const auto& tc = tool_calls[tc_idx];
std::string tool_name = "unknown";
std::string tool_args = "{}";
if (tc.contains("function")) {
const auto& func = tc["function"];
if (func.contains("name")) {
tool_name = func["name"].get<std::string>();
}
if (func.contains("arguments")) {
tool_args = func["arguments"].is_string() ?
func["arguments"].get<std::string>() :
func["arguments"].dump();
}
} else if (tc.contains("name")) {
tool_name = tc["name"].get<std::string>();
if (tc.contains("arguments")) {
tool_args = tc["arguments"].is_string() ?
tc["arguments"].get<std::string>() :
tc["arguments"].dump();
}
}
SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d, tool_call %zu: name=%s, arguments=%s\n",
i, tc_idx, tool_name.c_str(), tool_args.c_str());
}
}
} catch (const json::parse_error& e) {
SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
}
}
// Debug: Log final content state before adding to array
if (msg_json.contains("content")) {
if (msg_json["content"].is_null()) {
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
} else {
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content type=%s, has_value=%d\n",
i, msg_json["content"].is_string() ? "string" :
msg_json["content"].is_array() ? "array" :
msg_json["content"].is_object() ? "object" : "other",
msg_json["content"].is_null() ? 0 : 1);
}
} else {
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
}
messages_json.push_back(msg_json);
}
// Final safety check: Ensure no message has null content (Jinja templates require strings)
SRV_INF("[CONTENT DEBUG] PredictStream: Running final safety check on %zu messages\n", messages_json.size());
for (size_t idx = 0; idx < messages_json.size(); idx++) {
auto& msg = messages_json[idx];
if (msg.contains("content") && msg["content"].is_null()) {
SRV_INF("[CONTENT DEBUG] PredictStream: Safety check found message %zu with NULL content, converting to empty string\n", idx);
msg["content"] = "";
} else if (!msg.contains("content")) {
SRV_INF("[CONTENT DEBUG] PredictStream: Safety check found message %zu without content field, adding empty string\n", idx);
msg["content"] = "";
} else {
SRV_INF("[CONTENT DEBUG] PredictStream: Safety check message %zu: content OK, type=%s\n",
idx, msg["content"].is_string() ? "string" :
msg["content"].is_array() ? "array" :
msg["content"].is_object() ? "object" : "other");
}
}
// Debug: Count tool messages
int tool_msg_count = 0;
for (const auto& msg : messages_json) {
if (msg.contains("role") && msg["role"] == "tool") {
tool_msg_count++;
}
}
SRV_DBG("[TOOLS DEBUG] PredictStream: Built %d tool messages out of %zu total messages\n", tool_msg_count, messages_json.size());
// Debug: Print full conversation (messages)
SRV_DBG("[CONVERSATION DEBUG] PredictStream: Full messages array:\n%s\n", messages_json.dump(2).c_str());
body_json["messages"] = messages_json;
body_json["stream"] = true; // PredictStream is always streaming
@@ -945,16 +769,6 @@ public:
data["grammar"].is_string() &&
!data["grammar"].get<std::string>().empty();
SRV_INF("[TOOLS DEBUG] PredictStream: has_grammar_from_go=%d, data.contains(\"tools\")=%d, data.contains(\"grammar\")=%d\n",
has_grammar_from_go ? 1 : 0,
data.contains("tools") ? 1 : 0,
data.contains("grammar") ? 1 : 0);
if (data.contains("grammar")) {
SRV_INF("[TOOLS DEBUG] PredictStream: grammar type=%s, empty=%d\n",
data["grammar"].is_string() ? "string" : "other",
data["grammar"].is_string() && data["grammar"].get<std::string>().empty() ? 1 : 0);
}
// Copy other relevant fields from data that oaicompat_chat_params_parse expects
// Tools and tool_choice are only passed when NoGrammar is true (grammar not provided)
// When grammar is provided from Go layer, we use it instead of template-generated grammar
@@ -964,36 +778,8 @@ public:
body_json["tools"] = data["tools"];
std::string tools_str = data["tools"].dump();
SRV_INF("Using tools from data (NoGrammar=true): %s\n", tools_str.c_str());
// Debug: Log tools count and details before template processing
if (data["tools"].is_array()) {
SRV_INF("[TOOLS DEBUG] PredictStream: Passing %zu tools to oaicompat_chat_params_parse\n", data["tools"].size());
for (size_t t_idx = 0; t_idx < data["tools"].size(); t_idx++) {
const auto& tool = data["tools"][t_idx];
std::string tool_name = "unknown";
std::string tool_desc = "";
if (tool.contains("function")) {
const auto& func = tool["function"];
if (func.contains("name")) {
tool_name = func["name"].get<std::string>();
}
if (func.contains("description")) {
tool_desc = func["description"].is_string() ?
func["description"].get<std::string>() : "";
}
} else if (tool.contains("name")) {
tool_name = tool["name"].get<std::string>();
if (tool.contains("description")) {
tool_desc = tool["description"].is_string() ?
tool["description"].get<std::string>() : "";
}
}
SRV_INF("[TOOLS DEBUG] PredictStream: Tool %zu: name=%s, description=%s\n",
t_idx, tool_name.c_str(), tool_desc.substr(0, 100).c_str());
}
}
} else {
SRV_WRN("%s", "No tools found in data - tool calls will not work without tools field\n");
SRV_DBG("[TOOLS DEBUG] PredictStream: No tools in data, tool_choice=%s\n", data.contains("tool_choice") ? data["tool_choice"].dump().c_str() : "not set");
}
if (data.contains("tool_choice")) {
// tool_choice can be a string or object, but oaicompat_chat_params_parse expects a string
@@ -1035,17 +821,6 @@ public:
if (data.contains("chat_template_kwargs")) {
body_json["chat_template_kwargs"] = data["chat_template_kwargs"];
}
// Pass parallel_tool_calls if present (used by oaicompat_chat_params_parse)
if (data.contains("parallel_tool_calls")) {
body_json["parallel_tool_calls"] = data["parallel_tool_calls"];
}
// Pass add_generation_prompt if present (used by oaicompat_chat_params_parse)
if (data.contains("add_generation_prompt")) {
body_json["add_generation_prompt"] = data["add_generation_prompt"];
}
// Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
SRV_DBG("[CONVERSATION DEBUG] PredictStream: Full body_json before oaicompat_chat_params_parse:\n%s\n", body_json.dump(2).c_str());
// Use the same approach as server.cpp: call oaicompat_chat_params_parse
// This handles all template application, grammar merging, etc. automatically
@@ -1056,56 +831,8 @@ public:
// Update allow_image and allow_audio based on current mctx state
parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false;
parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false;
// Debug: Log tools before template processing
if (body_json.contains("tools")) {
SRV_DBG("[TOOLS DEBUG] PredictStream: Before oaicompat_chat_params_parse - tools count: %zu\n",
body_json["tools"].is_array() ? body_json["tools"].size() : 0);
}
// Debug: Verify messages content before template processing
// Also ensure ALL messages have content set to string (not null) - templates expect strings
if (body_json.contains("messages") && body_json["messages"].is_array()) {
SRV_INF("[CONTENT DEBUG] PredictStream: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
auto& msg = body_json["messages"][idx];
std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
if (msg.contains("content")) {
if (msg["content"].is_null()) {
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
msg["content"] = ""; // Fix null content
} else if (!msg["content"].is_string() && !msg["content"].is_array()) {
// If content is object or other non-string type, convert to string for templates
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
if (msg["content"].is_object()) {
msg["content"] = msg["content"].dump();
} else {
msg["content"] = "";
}
} else {
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n",
idx, role_str.c_str(),
msg["content"].is_string() ? "string" :
msg["content"].is_array() ? "array" :
msg["content"].is_object() ? "object" : "other");
}
} else {
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
msg["content"] = ""; // Add missing content
}
}
}
json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files);
// Debug: Log tools after template processing
if (parsed_data.contains("tools")) {
SRV_DBG("[TOOLS DEBUG] PredictStream: After oaicompat_chat_params_parse - tools count: %zu\n",
parsed_data["tools"].is_array() ? parsed_data["tools"].size() : 0);
} else {
SRV_DBG("%s", "[TOOLS DEBUG] PredictStream: After oaicompat_chat_params_parse - no tools in parsed_data\n");
}
// Extract the prompt from parsed data
prompt_str = parsed_data.at("prompt").get<std::string>();
@@ -1116,9 +843,8 @@ public:
preserved_grammar = data["grammar"];
}
// Merge all fields from parsed_data into data (grammar, grammar_triggers, preserved_tokens, parse_tool_calls, etc.)
// Merge all fields from parsed_data into data (grammar, grammar_triggers, preserved_tokens, etc.)
// This ensures all template-generated fields are included
// parse_tool_calls is set by oaicompat_chat_params_parse when tools are present
for (const auto& item : parsed_data.items()) {
if (item.key() != "prompt") { // Don't overwrite prompt_str, we already extracted it
// If grammar was provided from Go layer, preserve it instead of template-generated grammar
@@ -1129,11 +855,6 @@ public:
}
}
}
// Debug: Log parse_tool_calls if present (set by oaicompat_chat_params_parse when tools are present)
if (data.contains("parse_tool_calls")) {
SRV_DBG("[TOOLS DEBUG] PredictStream: parse_tool_calls=%s\n", data["parse_tool_calls"].get<bool>() ? "true" : "false");
}
} else {
// Use prompt directly from data
if (data.contains("prompt") && data["prompt"].is_string()) {
@@ -1388,19 +1109,11 @@ public:
}
}
SRV_INF("[CONTENT DEBUG] Predict: Processing %d messages\n", request->messages_size());
for (int i = 0; i < request->messages_size(); i++) {
const auto& msg = request->messages(i);
json msg_json;
msg_json["role"] = msg.role();
SRV_INF("[CONTENT DEBUG] Predict: Message %d: role=%s, content_empty=%d, content_length=%zu\n",
i, msg.role().c_str(), msg.content().empty() ? 1 : 0, msg.content().size());
if (!msg.content().empty()) {
SRV_INF("[CONTENT DEBUG] Predict: Message %d content (first 200 chars): %s\n",
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
}
bool is_last_user_msg = (i == last_user_msg_idx);
bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0);
@@ -1411,11 +1124,6 @@ public:
json content_val;
try {
content_val = json::parse(msg.content());
// Handle null values - convert to empty string to avoid template errors
if (content_val.is_null()) {
SRV_INF("[CONTENT DEBUG] Predict: Message %d parsed JSON is null, converting to empty string\n", i);
content_val = "";
}
} catch (const json::parse_error&) {
// Not JSON, treat as plain string
content_val = msg.content();
@@ -1423,7 +1131,6 @@ public:
// If content is an object (e.g., from tool call failures), convert to string
if (content_val.is_object()) {
SRV_INF("[CONTENT DEBUG] Predict: Message %d content is object, converting to string\n", i);
content_val = content_val.dump();
}
@@ -1458,17 +1165,7 @@ public:
msg_json["content"] = content_array;
} else {
// Use content as-is (already array or not last user message)
// Ensure null values are converted to empty string
if (content_val.is_null()) {
SRV_INF("[CONTENT DEBUG] Predict: Message %d content_val was null, setting to empty string\n", i);
msg_json["content"] = "";
} else {
msg_json["content"] = content_val;
SRV_INF("[CONTENT DEBUG] Predict: Message %d content set, type=%s\n",
i, content_val.is_string() ? "string" :
content_val.is_array() ? "array" :
content_val.is_object() ? "object" : "other");
}
msg_json["content"] = content_val;
}
} else if (is_last_user_msg && has_images_or_audio) {
// If no content but this is the last user message with images/audio, create content array
@@ -1495,65 +1192,9 @@ public:
}
}
msg_json["content"] = content_array;
SRV_INF("[CONTENT DEBUG] Predict: Message %d created content array with media\n", i);
} else if (!msg.tool_calls().empty()) {
// Tool call messages may have null content, but templates expect string
// IMPORTANT: Set to space " " instead of empty string "", because llama.cpp's
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
// which causes template errors when accessing message.content[:tool_start_length]
SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls, setting content to space (not empty string)\n", i);
msg_json["content"] = " ";
} else if (msg.role() == "tool") {
// Tool role messages must have content field set, even if empty
// Jinja templates expect content to be a string, not null or object
SRV_INF("[CONTENT DEBUG] Predict: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
if (msg.content().empty()) {
msg_json["content"] = "";
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): empty content, set to empty string\n", i);
} else {
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): content exists: %s\n",
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
// Content exists, parse and ensure it's a string
json content_val;
try {
content_val = json::parse(msg.content());
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): parsed JSON, type=%s\n",
i, content_val.is_null() ? "null" :
content_val.is_object() ? "object" :
content_val.is_string() ? "string" :
content_val.is_array() ? "array" : "other");
// Handle null values - Jinja templates expect content to be a string, not null
if (content_val.is_null()) {
msg_json["content"] = "";
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): null content, converted to empty string\n", i);
} else if (content_val.is_object()) {
// If content is an object (e.g., from tool call failures/errors), convert to string
msg_json["content"] = content_val.dump();
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): object content, converted to string: %s\n",
i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
} else if (content_val.is_string()) {
msg_json["content"] = content_val.get<std::string>();
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): string content, using as-is\n", i);
} else {
// For arrays or other types, convert to string
msg_json["content"] = content_val.dump();
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): %s content, converted to string\n",
i, content_val.is_array() ? "array" : "other type");
}
} catch (const json::parse_error&) {
// Not JSON, treat as plain string
msg_json["content"] = msg.content();
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): not JSON, using as string\n", i);
}
}
} else {
// Ensure all messages have content set (fallback for any unhandled cases)
// Jinja templates expect content to be present, default to empty string if not set
if (!msg_json.contains("content")) {
SRV_INF("[CONTENT DEBUG] Predict: Message %d (role=%s): no content field, adding empty string\n",
i, msg.role().c_str());
msg_json["content"] = "";
}
// Tool call messages may have null content
msg_json["content"] = json();
}
// Add optional fields for OpenAI-compatible message format
@@ -1571,98 +1212,14 @@ public:
try {
json tool_calls = json::parse(msg.tool_calls());
msg_json["tool_calls"] = tool_calls;
SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
// IMPORTANT: If message has tool_calls but content is empty or not set,
// set content to space " " instead of empty string "", because llama.cpp's
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
// which causes template errors when accessing message.content[:tool_start_length]
if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls but empty content, setting to space\n", i);
msg_json["content"] = " ";
}
// Log each tool call with name and arguments
if (tool_calls.is_array()) {
for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
const auto& tc = tool_calls[tc_idx];
std::string tool_name = "unknown";
std::string tool_args = "{}";
if (tc.contains("function")) {
const auto& func = tc["function"];
if (func.contains("name")) {
tool_name = func["name"].get<std::string>();
}
if (func.contains("arguments")) {
tool_args = func["arguments"].is_string() ?
func["arguments"].get<std::string>() :
func["arguments"].dump();
}
} else if (tc.contains("name")) {
tool_name = tc["name"].get<std::string>();
if (tc.contains("arguments")) {
tool_args = tc["arguments"].is_string() ?
tc["arguments"].get<std::string>() :
tc["arguments"].dump();
}
}
SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d, tool_call %zu: name=%s, arguments=%s\n",
i, tc_idx, tool_name.c_str(), tool_args.c_str());
}
}
} catch (const json::parse_error& e) {
SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
}
}
// Debug: Log final content state before adding to array
if (msg_json.contains("content")) {
if (msg_json["content"].is_null()) {
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
} else {
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content type=%s, has_value=%d\n",
i, msg_json["content"].is_string() ? "string" :
msg_json["content"].is_array() ? "array" :
msg_json["content"].is_object() ? "object" : "other",
msg_json["content"].is_null() ? 0 : 1);
}
} else {
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
}
messages_json.push_back(msg_json);
}
// Final safety check: Ensure no message has null content (Jinja templates require strings)
SRV_INF("[CONTENT DEBUG] Predict: Running final safety check on %zu messages\n", messages_json.size());
for (size_t idx = 0; idx < messages_json.size(); idx++) {
auto& msg = messages_json[idx];
std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
if (msg.contains("content") && msg["content"].is_null()) {
SRV_INF("[CONTENT DEBUG] Predict: Safety check found message %zu (role=%s) with NULL content, converting to empty string\n", idx, role_str.c_str());
msg["content"] = "";
} else if (!msg.contains("content")) {
SRV_INF("[CONTENT DEBUG] Predict: Safety check found message %zu (role=%s) without content field, adding empty string\n", idx, role_str.c_str());
msg["content"] = "";
} else {
SRV_INF("[CONTENT DEBUG] Predict: Safety check message %zu (role=%s): content OK, type=%s\n",
idx, role_str.c_str(),
msg["content"].is_string() ? "string" :
msg["content"].is_array() ? "array" :
msg["content"].is_object() ? "object" : "other");
}
}
// Debug: Count tool messages
int tool_msg_count = 0;
for (const auto& msg : messages_json) {
if (msg.contains("role") && msg["role"] == "tool") {
tool_msg_count++;
}
}
SRV_DBG("[TOOLS DEBUG] Predict: Built %d tool messages out of %zu total messages\n", tool_msg_count, messages_json.size());
// Debug: Print full conversation (messages)
SRV_DBG("[CONVERSATION DEBUG] Predict: Full messages array:\n%s\n", messages_json.dump(2).c_str());
body_json["messages"] = messages_json;
body_json["stream"] = false;
@@ -1673,16 +1230,6 @@ public:
data["grammar"].is_string() &&
!data["grammar"].get<std::string>().empty();
SRV_INF("[TOOLS DEBUG] Predict: has_grammar_from_go=%d, data.contains(\"tools\")=%d, data.contains(\"grammar\")=%d\n",
has_grammar_from_go ? 1 : 0,
data.contains("tools") ? 1 : 0,
data.contains("grammar") ? 1 : 0);
if (data.contains("grammar")) {
SRV_INF("[TOOLS DEBUG] Predict: grammar type=%s, empty=%d\n",
data["grammar"].is_string() ? "string" : "other",
data["grammar"].is_string() && data["grammar"].get<std::string>().empty() ? 1 : 0);
}
// Copy other relevant fields from data that oaicompat_chat_params_parse expects
// Tools and tool_choice are only passed when NoGrammar is true (grammar not provided)
// When grammar is provided from Go layer, we use it instead of template-generated grammar
@@ -1692,36 +1239,8 @@ public:
body_json["tools"] = data["tools"];
std::string tools_str = data["tools"].dump();
SRV_INF("Using tools from data (NoGrammar=true): %s\n", tools_str.c_str());
// Debug: Log tools count and details before template processing
if (data["tools"].is_array()) {
SRV_INF("[TOOLS DEBUG] Predict: Passing %zu tools to oaicompat_chat_params_parse\n", data["tools"].size());
for (size_t t_idx = 0; t_idx < data["tools"].size(); t_idx++) {
const auto& tool = data["tools"][t_idx];
std::string tool_name = "unknown";
std::string tool_desc = "";
if (tool.contains("function")) {
const auto& func = tool["function"];
if (func.contains("name")) {
tool_name = func["name"].get<std::string>();
}
if (func.contains("description")) {
tool_desc = func["description"].is_string() ?
func["description"].get<std::string>() : "";
}
} else if (tool.contains("name")) {
tool_name = tool["name"].get<std::string>();
if (tool.contains("description")) {
tool_desc = tool["description"].is_string() ?
tool["description"].get<std::string>() : "";
}
}
SRV_INF("[TOOLS DEBUG] Predict: Tool %zu: name=%s, description=%s\n",
t_idx, tool_name.c_str(), tool_desc.substr(0, 100).c_str());
}
}
} else {
SRV_WRN("%s", "No tools found in data - tool calls will not work without tools field\n");
SRV_DBG("[TOOLS DEBUG] Predict: No tools in data, tool_choice=%s\n", data.contains("tool_choice") ? data["tool_choice"].dump().c_str() : "not set");
}
if (data.contains("tool_choice")) {
// tool_choice can be a string or object, but oaicompat_chat_params_parse expects a string
@@ -1763,17 +1282,6 @@ public:
if (data.contains("chat_template_kwargs")) {
body_json["chat_template_kwargs"] = data["chat_template_kwargs"];
}
// Pass parallel_tool_calls if present (used by oaicompat_chat_params_parse)
if (data.contains("parallel_tool_calls")) {
body_json["parallel_tool_calls"] = data["parallel_tool_calls"];
}
// Pass add_generation_prompt if present (used by oaicompat_chat_params_parse)
if (data.contains("add_generation_prompt")) {
body_json["add_generation_prompt"] = data["add_generation_prompt"];
}
// Debug: Print full body_json before template processing (includes messages, tools, tool_choice, etc.)
SRV_DBG("[CONVERSATION DEBUG] Predict: Full body_json before oaicompat_chat_params_parse:\n%s\n", body_json.dump(2).c_str());
// Use the same approach as server.cpp: call oaicompat_chat_params_parse
// This handles all template application, grammar merging, etc. automatically
@@ -1784,56 +1292,8 @@ public:
// Update allow_image and allow_audio based on current mctx state
parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false;
parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false;
// Debug: Log tools before template processing
if (body_json.contains("tools")) {
SRV_DBG("[TOOLS DEBUG] Predict: Before oaicompat_chat_params_parse - tools count: %zu\n",
body_json["tools"].is_array() ? body_json["tools"].size() : 0);
}
// Debug: Verify messages content before template processing
// Also ensure ALL messages have content set to string (not null) - templates expect strings
if (body_json.contains("messages") && body_json["messages"].is_array()) {
SRV_INF("[CONTENT DEBUG] Predict: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
auto& msg = body_json["messages"][idx];
std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
if (msg.contains("content")) {
if (msg["content"].is_null()) {
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
msg["content"] = ""; // Fix null content
} else if (!msg["content"].is_string() && !msg["content"].is_array()) {
// If content is object or other non-string type, convert to string for templates
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
if (msg["content"].is_object()) {
msg["content"] = msg["content"].dump();
} else {
msg["content"] = "";
}
} else {
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n",
idx, role_str.c_str(),
msg["content"].is_string() ? "string" :
msg["content"].is_array() ? "array" :
msg["content"].is_object() ? "object" : "other");
}
} else {
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
msg["content"] = ""; // Add missing content
}
}
}
json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files);
// Debug: Log tools after template processing
if (parsed_data.contains("tools")) {
SRV_DBG("[TOOLS DEBUG] Predict: After oaicompat_chat_params_parse - tools count: %zu\n",
parsed_data["tools"].is_array() ? parsed_data["tools"].size() : 0);
} else {
SRV_DBG("%s", "[TOOLS DEBUG] Predict: After oaicompat_chat_params_parse - no tools in parsed_data\n");
}
// Extract the prompt from parsed data
prompt_str = parsed_data.at("prompt").get<std::string>();
@@ -1844,9 +1304,8 @@ public:
preserved_grammar = data["grammar"];
}
// Merge all fields from parsed_data into data (grammar, grammar_triggers, preserved_tokens, parse_tool_calls, etc.)
// Merge all fields from parsed_data into data (grammar, grammar_triggers, preserved_tokens, etc.)
// This ensures all template-generated fields are included
// parse_tool_calls is set by oaicompat_chat_params_parse when tools are present
for (const auto& item : parsed_data.items()) {
if (item.key() != "prompt") { // Don't overwrite prompt_str, we already extracted it
// If grammar was provided from Go layer, preserve it instead of template-generated grammar
@@ -1857,11 +1316,6 @@ public:
}
}
}
// Debug: Log parse_tool_calls if present (set by oaicompat_chat_params_parse when tools are present)
if (data.contains("parse_tool_calls")) {
SRV_DBG("[TOOLS DEBUG] Predict: parse_tool_calls=%s\n", data["parse_tool_calls"].get<bool>() ? "true" : "false");
}
} else {
// Use prompt directly from data
if (data.contains("prompt") && data["prompt"].is_string()) {
@@ -2138,7 +1592,7 @@ public:
tasks.reserve(documents.size());
for (size_t i = 0; i < documents.size(); i++) {
auto tmp = format_prompt_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]);
auto tmp = format_rerank(ctx_server.model, ctx_server.vocab, ctx_server.mctx, request->query(), documents[i]);
server_task task = server_task(SERVER_TASK_TYPE_RERANK);
task.id = ctx_server.queue_tasks.get_new_id();
task.index = i;

View File

@@ -9,13 +9,10 @@ done
set -e
for file in $(ls llama.cpp/tools/server/); do
cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
done
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
cp -rfv llama.cpp/tools/server/utils.hpp llama.cpp/tools/grpc-server/
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
set +e

View File

@@ -8,7 +8,7 @@ JOBS?=$(shell nproc --ignore=1)
# whisper.cpp version
WHISPER_REPO?=https://github.com/ggml-org/whisper.cpp
WHISPER_CPP_VERSION?=19ceec8eac980403b714d603e5ca31653cd42a3f
WHISPER_CPP_VERSION?=d9b7613b34a343848af572cc14467fc5e82fc788
SO_TARGET?=libgowhisper.so
CMAKE_ARGS+=-DBUILD_SHARED_LIBS=OFF

View File

@@ -6,4 +6,4 @@ transformers
bitsandbytes
outetts
sentence-transformers==5.1.0
protobuf==6.33.1
protobuf==6.32.0

View File

@@ -7,4 +7,4 @@ transformers
bitsandbytes
outetts
sentence-transformers==5.1.0
protobuf==6.33.1
protobuf==6.32.0

View File

@@ -6,4 +6,4 @@ transformers
bitsandbytes
outetts
sentence-transformers==5.1.0
protobuf==6.33.1
protobuf==6.32.0

View File

@@ -8,4 +8,4 @@ bitsandbytes
outetts
bitsandbytes
sentence-transformers==5.1.0
protobuf==6.33.1
protobuf==6.32.0

View File

@@ -10,4 +10,4 @@ intel-extension-for-transformers
bitsandbytes
outetts
sentence-transformers==5.1.0
protobuf==6.33.1
protobuf==6.32.0

View File

@@ -1,5 +1,5 @@
grpcio==1.76.0
protobuf==6.33.1
protobuf==6.32.0
certifi
setuptools
scipy==1.15.1

View File

@@ -3,13 +3,6 @@ set -e
EXTRA_PIP_INSTALL_FLAGS="--no-build-isolation"
# Avoid to overcommit the CPU during build
# https://github.com/vllm-project/vllm/issues/20079
# https://docs.vllm.ai/en/v0.8.3/serving/env_vars.html
# https://docs.redhat.com/it/documentation/red_hat_ai_inference_server/3.0/html/vllm_server_arguments/environment_variables-server-arguments
export NVCC_THREADS=2
export MAX_JOBS=1
backend_dir=$(dirname $0)
if [ -d $backend_dir/common ]; then

View File

@@ -1 +1 @@
https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp310-cp310-linux_x86_64.whl
flash-attn

View File

@@ -1,9 +1,6 @@
package application
import (
"context"
"sync"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/core/templates"
@@ -14,14 +11,8 @@ type Application struct {
backendLoader *config.ModelConfigLoader
modelLoader *model.ModelLoader
applicationConfig *config.ApplicationConfig
startupConfig *config.ApplicationConfig // Stores original config from env vars (before file loading)
templatesEvaluator *templates.Evaluator
galleryService *services.GalleryService
watchdogMutex sync.Mutex
watchdogStop chan bool
p2pMutex sync.Mutex
p2pCtx context.Context
p2pCancel context.CancelFunc
}
func newApplication(appConfig *config.ApplicationConfig) *Application {
@@ -53,11 +44,6 @@ func (a *Application) GalleryService() *services.GalleryService {
return a.galleryService
}
// StartupConfig returns the original startup configuration (from env vars, before file loading)
func (a *Application) StartupConfig() *config.ApplicationConfig {
return a.startupConfig
}
func (a *Application) start() error {
galleryService := services.NewGalleryService(a.ApplicationConfig(), a.ModelLoader())
err := galleryService.Start(a.ApplicationConfig().Context, a.ModelConfigLoader(), a.ApplicationConfig().SystemState)

View File

@@ -1,343 +1,180 @@
package application
import (
"encoding/json"
"fmt"
"os"
"path"
"path/filepath"
"time"
"dario.cat/mergo"
"github.com/fsnotify/fsnotify"
"github.com/mudler/LocalAI/core/config"
"github.com/rs/zerolog/log"
)
type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error
type configFileHandler struct {
handlers map[string]fileHandler
watcher *fsnotify.Watcher
appConfig *config.ApplicationConfig
}
// TODO: This should be a singleton eventually so other parts of the code can register config file handlers,
// then we can export it to other packages
func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler {
c := configFileHandler{
handlers: make(map[string]fileHandler),
appConfig: appConfig,
}
err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
if err != nil {
log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler")
}
err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
if err != nil {
log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler")
}
err = c.Register("runtime_settings.json", readRuntimeSettingsJson(*appConfig), true)
if err != nil {
log.Error().Err(err).Str("file", "runtime_settings.json").Msg("unable to register config file handler")
}
return c
}
func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error {
_, ok := c.handlers[filename]
if ok {
return fmt.Errorf("handler already registered for file %s", filename)
}
c.handlers[filename] = handler
if runNow {
c.callHandler(filename, handler)
}
return nil
}
func (c *configFileHandler) callHandler(filename string, handler fileHandler) {
rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename))
log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update")
fileContent, err := os.ReadFile(rootedFilePath)
if err != nil && !os.IsNotExist(err) {
log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file")
}
if err = handler(fileContent, c.appConfig); err != nil {
log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options")
}
}
func (c *configFileHandler) Watch() error {
configWatcher, err := fsnotify.NewWatcher()
c.watcher = configWatcher
if err != nil {
return err
}
if c.appConfig.DynamicConfigsDirPollInterval > 0 {
log.Debug().Msg("Poll interval set, falling back to polling for configuration changes")
ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval)
go func() {
for {
<-ticker.C
for file, handler := range c.handlers {
log.Debug().Str("file", file).Msg("polling config file")
c.callHandler(file, handler)
}
}
}()
}
// Start listening for events.
go func() {
for {
select {
case event, ok := <-c.watcher.Events:
if !ok {
return
}
if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) {
handler, ok := c.handlers[path.Base(event.Name)]
if !ok {
continue
}
c.callHandler(filepath.Base(event.Name), handler)
}
case err, ok := <-c.watcher.Errors:
log.Error().Err(err).Msg("config watcher error received")
if !ok {
return
}
}
}
}()
// Add a path.
err = c.watcher.Add(c.appConfig.DynamicConfigsDir)
if err != nil {
return fmt.Errorf("unable to create a watcher on the configuration directory: %+v", err)
}
return nil
}
// TODO: When we institute graceful shutdown, this should be called
func (c *configFileHandler) Stop() error {
return c.watcher.Close()
}
func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler {
handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
log.Debug().Msg("processing api keys runtime update")
log.Trace().Int("numKeys", len(startupAppConfig.ApiKeys)).Msg("api keys provided at startup")
if len(fileContent) > 0 {
// Parse JSON content from the file
var fileKeys []string
err := json.Unmarshal(fileContent, &fileKeys)
if err != nil {
return err
}
log.Trace().Int("numKeys", len(fileKeys)).Msg("discovered API keys from api keys dynamic config dile")
appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...)
} else {
log.Trace().Msg("no API keys discovered from dynamic config file")
appConfig.ApiKeys = startupAppConfig.ApiKeys
}
log.Trace().Int("numKeys", len(appConfig.ApiKeys)).Msg("total api keys after processing")
return nil
}
return handler
}
func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler {
handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
log.Debug().Msg("processing external_backends.json")
if len(fileContent) > 0 {
// Parse JSON content from the file
var fileBackends map[string]string
err := json.Unmarshal(fileContent, &fileBackends)
if err != nil {
return err
}
appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends)
if err != nil {
return err
}
} else {
appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
}
log.Debug().Msg("external backends loaded from external_backends.json")
return nil
}
return handler
}
type runtimeSettings struct {
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
SingleBackend *bool `json:"single_backend,omitempty"`
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
Threads *int `json:"threads,omitempty"`
ContextSize *int `json:"context_size,omitempty"`
F16 *bool `json:"f16,omitempty"`
Debug *bool `json:"debug,omitempty"`
CORS *bool `json:"cors,omitempty"`
CSRF *bool `json:"csrf,omitempty"`
CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"`
P2PToken *string `json:"p2p_token,omitempty"`
P2PNetworkID *string `json:"p2p_network_id,omitempty"`
Federated *bool `json:"federated,omitempty"`
Galleries *[]config.Gallery `json:"galleries,omitempty"`
BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"`
AutoloadGalleries *bool `json:"autoload_galleries,omitempty"`
AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"`
ApiKeys *[]string `json:"api_keys,omitempty"`
}
func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler {
handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
log.Debug().Msg("processing runtime_settings.json")
// Determine if settings came from env vars by comparing with startup config
// startupAppConfig contains the original values set from env vars at startup.
// If current values match startup values, they came from env vars (or defaults).
// We apply file settings only if current values match startup values (meaning not from env vars).
envWatchdogIdle := appConfig.WatchDogIdle == startupAppConfig.WatchDogIdle
envWatchdogBusy := appConfig.WatchDogBusy == startupAppConfig.WatchDogBusy
envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupAppConfig.WatchDogIdleTimeout
envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupAppConfig.WatchDogBusyTimeout
envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend
envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests
envThreads := appConfig.Threads == startupAppConfig.Threads
envContextSize := appConfig.ContextSize == startupAppConfig.ContextSize
envF16 := appConfig.F16 == startupAppConfig.F16
envDebug := appConfig.Debug == startupAppConfig.Debug
envCORS := appConfig.CORS == startupAppConfig.CORS
envCSRF := appConfig.CSRF == startupAppConfig.CSRF
envCORSAllowOrigins := appConfig.CORSAllowOrigins == startupAppConfig.CORSAllowOrigins
envP2PToken := appConfig.P2PToken == startupAppConfig.P2PToken
envP2PNetworkID := appConfig.P2PNetworkID == startupAppConfig.P2PNetworkID
envFederated := appConfig.Federated == startupAppConfig.Federated
envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries
envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries
if len(fileContent) > 0 {
var settings runtimeSettings
err := json.Unmarshal(fileContent, &settings)
if err != nil {
return err
}
// Apply file settings only if they don't match startup values (i.e., not from env vars)
if settings.WatchdogIdleEnabled != nil && !envWatchdogIdle {
appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled
if appConfig.WatchDogIdle {
appConfig.WatchDog = true
}
}
if settings.WatchdogBusyEnabled != nil && !envWatchdogBusy {
appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled
if appConfig.WatchDogBusy {
appConfig.WatchDog = true
}
}
if settings.WatchdogIdleTimeout != nil && !envWatchdogIdleTimeout {
dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
if err == nil {
appConfig.WatchDogIdleTimeout = dur
} else {
log.Warn().Err(err).Str("timeout", *settings.WatchdogIdleTimeout).Msg("invalid watchdog idle timeout in runtime_settings.json")
}
}
if settings.WatchdogBusyTimeout != nil && !envWatchdogBusyTimeout {
dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
if err == nil {
appConfig.WatchDogBusyTimeout = dur
} else {
log.Warn().Err(err).Str("timeout", *settings.WatchdogBusyTimeout).Msg("invalid watchdog busy timeout in runtime_settings.json")
}
}
if settings.SingleBackend != nil && !envSingleBackend {
appConfig.SingleBackend = *settings.SingleBackend
}
if settings.ParallelBackendRequests != nil && !envParallelRequests {
appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
}
if settings.Threads != nil && !envThreads {
appConfig.Threads = *settings.Threads
}
if settings.ContextSize != nil && !envContextSize {
appConfig.ContextSize = *settings.ContextSize
}
if settings.F16 != nil && !envF16 {
appConfig.F16 = *settings.F16
}
if settings.Debug != nil && !envDebug {
appConfig.Debug = *settings.Debug
}
if settings.CORS != nil && !envCORS {
appConfig.CORS = *settings.CORS
}
if settings.CSRF != nil && !envCSRF {
appConfig.CSRF = *settings.CSRF
}
if settings.CORSAllowOrigins != nil && !envCORSAllowOrigins {
appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins
}
if settings.P2PToken != nil && !envP2PToken {
appConfig.P2PToken = *settings.P2PToken
}
if settings.P2PNetworkID != nil && !envP2PNetworkID {
appConfig.P2PNetworkID = *settings.P2PNetworkID
}
if settings.Federated != nil && !envFederated {
appConfig.Federated = *settings.Federated
}
if settings.Galleries != nil {
appConfig.Galleries = *settings.Galleries
}
if settings.BackendGalleries != nil {
appConfig.BackendGalleries = *settings.BackendGalleries
}
if settings.AutoloadGalleries != nil && !envAutoloadGalleries {
appConfig.AutoloadGalleries = *settings.AutoloadGalleries
}
if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries {
appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
}
if settings.ApiKeys != nil {
// API keys from env vars (startup) should be kept, runtime settings keys replace all runtime keys
// If runtime_settings.json specifies ApiKeys (even if empty), it replaces all runtime keys
// Start with env keys, then add runtime_settings.json keys (which may be empty to clear them)
envKeys := startupAppConfig.ApiKeys
runtimeKeys := *settings.ApiKeys
// Replace all runtime keys with what's in runtime_settings.json
appConfig.ApiKeys = append(envKeys, runtimeKeys...)
}
// If watchdog is enabled via file but not via env, ensure WatchDog flag is set
if !envWatchdogIdle && !envWatchdogBusy {
if settings.WatchdogEnabled != nil && *settings.WatchdogEnabled {
appConfig.WatchDog = true
}
}
}
log.Debug().Msg("runtime settings loaded from runtime_settings.json")
return nil
}
return handler
}
package application
import (
"encoding/json"
"fmt"
"os"
"path"
"path/filepath"
"time"
"dario.cat/mergo"
"github.com/fsnotify/fsnotify"
"github.com/mudler/LocalAI/core/config"
"github.com/rs/zerolog/log"
)
type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error
type configFileHandler struct {
handlers map[string]fileHandler
watcher *fsnotify.Watcher
appConfig *config.ApplicationConfig
}
// TODO: This should be a singleton eventually so other parts of the code can register config file handlers,
// then we can export it to other packages
func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler {
c := configFileHandler{
handlers: make(map[string]fileHandler),
appConfig: appConfig,
}
err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true)
if err != nil {
log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler")
}
err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true)
if err != nil {
log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler")
}
return c
}
func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error {
_, ok := c.handlers[filename]
if ok {
return fmt.Errorf("handler already registered for file %s", filename)
}
c.handlers[filename] = handler
if runNow {
c.callHandler(filename, handler)
}
return nil
}
func (c *configFileHandler) callHandler(filename string, handler fileHandler) {
rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename))
log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update")
fileContent, err := os.ReadFile(rootedFilePath)
if err != nil && !os.IsNotExist(err) {
log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file")
}
if err = handler(fileContent, c.appConfig); err != nil {
log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options")
}
}
func (c *configFileHandler) Watch() error {
configWatcher, err := fsnotify.NewWatcher()
c.watcher = configWatcher
if err != nil {
return err
}
if c.appConfig.DynamicConfigsDirPollInterval > 0 {
log.Debug().Msg("Poll interval set, falling back to polling for configuration changes")
ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval)
go func() {
for {
<-ticker.C
for file, handler := range c.handlers {
log.Debug().Str("file", file).Msg("polling config file")
c.callHandler(file, handler)
}
}
}()
}
// Start listening for events.
go func() {
for {
select {
case event, ok := <-c.watcher.Events:
if !ok {
return
}
if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) {
handler, ok := c.handlers[path.Base(event.Name)]
if !ok {
continue
}
c.callHandler(filepath.Base(event.Name), handler)
}
case err, ok := <-c.watcher.Errors:
log.Error().Err(err).Msg("config watcher error received")
if !ok {
return
}
}
}
}()
// Add a path.
err = c.watcher.Add(c.appConfig.DynamicConfigsDir)
if err != nil {
return fmt.Errorf("unable to create a watcher on the configuration directory: %+v", err)
}
return nil
}
// TODO: When we institute graceful shutdown, this should be called
func (c *configFileHandler) Stop() error {
return c.watcher.Close()
}
func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler {
handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
log.Debug().Msg("processing api keys runtime update")
log.Trace().Int("numKeys", len(startupAppConfig.ApiKeys)).Msg("api keys provided at startup")
if len(fileContent) > 0 {
// Parse JSON content from the file
var fileKeys []string
err := json.Unmarshal(fileContent, &fileKeys)
if err != nil {
return err
}
log.Trace().Int("numKeys", len(fileKeys)).Msg("discovered API keys from api keys dynamic config dile")
appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...)
} else {
log.Trace().Msg("no API keys discovered from dynamic config file")
appConfig.ApiKeys = startupAppConfig.ApiKeys
}
log.Trace().Int("numKeys", len(appConfig.ApiKeys)).Msg("total api keys after processing")
return nil
}
return handler
}
func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler {
handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error {
log.Debug().Msg("processing external_backends.json")
if len(fileContent) > 0 {
// Parse JSON content from the file
var fileBackends map[string]string
err := json.Unmarshal(fileContent, &fileBackends)
if err != nil {
return err
}
appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends)
if err != nil {
return err
}
} else {
appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends
}
log.Debug().Msg("external backends loaded from external_backends.json")
return nil
}
return handler
}

View File

@@ -1,240 +0,0 @@
package application
import (
"context"
"fmt"
"net"
"slices"
"time"
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/edgevpn/pkg/node"
"github.com/rs/zerolog/log"
zlog "github.com/rs/zerolog/log"
)
func (a *Application) StopP2P() error {
if a.p2pCancel != nil {
a.p2pCancel()
a.p2pCancel = nil
a.p2pCtx = nil
// Wait a bit for shutdown to complete
time.Sleep(200 * time.Millisecond)
}
return nil
}
func (a *Application) StartP2P() error {
// we need a p2p token
if a.applicationConfig.P2PToken == "" {
return fmt.Errorf("P2P token is not set")
}
networkID := a.applicationConfig.P2PNetworkID
ctx, cancel := context.WithCancel(a.ApplicationConfig().Context)
a.p2pCtx = ctx
a.p2pCancel = cancel
var n *node.Node
// Here we are avoiding creating multiple nodes:
// - if the federated mode is enabled, we create a federated node and expose a service
// - exposing a service creates a node with specific options, and we don't want to create another node
// If the federated mode is enabled, we expose a service to the local instance running
// at r.Address
if a.applicationConfig.Federated {
_, port, err := net.SplitHostPort(a.applicationConfig.APIAddress)
if err != nil {
return err
}
// Here a new node is created and started
// and a service is exposed by the node
node, err := p2p.ExposeService(ctx, "localhost", port, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.FederatedID))
if err != nil {
return err
}
if err := p2p.ServiceDiscoverer(ctx, node, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
return err
}
n = node
// start node sync in the background
if err := a.p2pSync(ctx, node); err != nil {
return err
}
}
// If a node wasn't created previously, create it
if n == nil {
node, err := p2p.NewNode(a.applicationConfig.P2PToken)
if err != nil {
return err
}
err = node.Start(ctx)
if err != nil {
return fmt.Errorf("starting new node: %w", err)
}
n = node
}
// Attach a ServiceDiscoverer to the p2p node
log.Info().Msg("Starting P2P server discovery...")
if err := p2p.ServiceDiscoverer(ctx, n, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node schema.NodeData) {
var tunnelAddresses []string
for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
if v.IsOnline() {
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
} else {
log.Info().Msgf("Node %s is offline", v.ID)
}
}
if a.applicationConfig.TunnelCallback != nil {
a.applicationConfig.TunnelCallback(tunnelAddresses)
}
}, true); err != nil {
return err
}
return nil
}
// RestartP2P restarts the P2P stack with current ApplicationConfig settings
// Note: This method signals that P2P should be restarted, but the actual restart
// is handled by the caller to avoid import cycles
func (a *Application) RestartP2P() error {
a.p2pMutex.Lock()
defer a.p2pMutex.Unlock()
// Stop existing P2P if running
if a.p2pCancel != nil {
a.p2pCancel()
a.p2pCancel = nil
a.p2pCtx = nil
// Wait a bit for shutdown to complete
time.Sleep(200 * time.Millisecond)
}
appConfig := a.ApplicationConfig()
// Start P2P if token is set
if appConfig.P2PToken == "" {
return fmt.Errorf("P2P token is not set")
}
// Create new context for P2P
ctx, cancel := context.WithCancel(appConfig.Context)
a.p2pCtx = ctx
a.p2pCancel = cancel
// Get API address from config
address := appConfig.APIAddress
if address == "" {
address = "127.0.0.1:8080" // default
}
// Start P2P stack in a goroutine
go func() {
if err := a.StartP2P(); err != nil {
log.Error().Err(err).Msg("Failed to start P2P stack")
cancel() // Cancel context on error
}
}()
log.Info().Msg("P2P stack restarted with new settings")
return nil
}
func syncState(ctx context.Context, n *node.Node, app *Application) error {
zlog.Debug().Msg("[p2p-sync] Syncing state")
whatWeHave := []string{}
for _, model := range app.ModelConfigLoader().GetAllModelsConfigs() {
whatWeHave = append(whatWeHave, model.Name)
}
ledger, _ := n.Ledger()
currentData := ledger.CurrentData()
zlog.Debug().Msgf("[p2p-sync] Current data: %v", currentData)
data, exists := ledger.GetKey("shared_state", "models")
if !exists {
ledger.AnnounceUpdate(ctx, time.Minute, "shared_state", "models", whatWeHave)
zlog.Debug().Msgf("No models found in the ledger, announced our models: %v", whatWeHave)
}
models := []string{}
if err := data.Unmarshal(&models); err != nil {
zlog.Warn().Err(err).Msg("error unmarshalling models")
return nil
}
zlog.Debug().Msgf("[p2p-sync] Models that are present in this instance: %v\nModels that are in the ledger: %v", whatWeHave, models)
// Sync with our state
whatIsNotThere := []string{}
for _, model := range whatWeHave {
if !slices.Contains(models, model) {
whatIsNotThere = append(whatIsNotThere, model)
}
}
if len(whatIsNotThere) > 0 {
zlog.Debug().Msgf("[p2p-sync] Announcing our models: %v", append(models, whatIsNotThere...))
ledger.AnnounceUpdate(
ctx,
1*time.Minute,
"shared_state",
"models",
append(models, whatIsNotThere...),
)
}
// Check if we have a model that is not in our state, otherwise install it
for _, model := range models {
if slices.Contains(whatWeHave, model) {
zlog.Debug().Msgf("[p2p-sync] Model %s is already present in this instance", model)
continue
}
// we install model
zlog.Info().Msgf("[p2p-sync] Installing model which is not present in this instance: %s", model)
uuid, err := uuid.NewUUID()
if err != nil {
zlog.Error().Err(err).Msg("error generating UUID")
continue
}
app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
ID: uuid.String(),
GalleryElementName: model,
Galleries: app.ApplicationConfig().Galleries,
BackendGalleries: app.ApplicationConfig().BackendGalleries,
}
}
return nil
}
func (a *Application) p2pSync(ctx context.Context, n *node.Node) error {
go func() {
for {
select {
case <-ctx.Done():
return
case <-time.After(1 * time.Minute):
if err := syncState(ctx, n, a); err != nil {
zlog.Error().Err(err).Msg("error syncing state")
}
}
}
}()
return nil
}

View File

@@ -1,11 +1,8 @@
package application
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"time"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
@@ -21,12 +18,7 @@ import (
func New(opts ...config.AppOption) (*Application, error) {
options := config.NewApplicationConfig(opts...)
// Store a copy of the startup config (from env vars, before file loading)
// This is used to determine if settings came from env vars vs file
startupConfigCopy := *options
application := newApplication(options)
application.startupConfig = &startupConfigCopy
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.SystemState.Model.ModelsPath)
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
@@ -118,13 +110,6 @@ func New(opts ...config.AppOption) (*Application, error) {
}
}
// Load runtime settings from file if DynamicConfigsDir is set
// This applies file settings with env var precedence (env vars take priority)
// Note: startupConfigCopy was already created above, so it has the original env var values
if options.DynamicConfigsDir != "" {
loadRuntimeSettingsFromFile(options)
}
// turn off any process that was started by GRPC if the context is canceled
go func() {
<-options.Context.Done()
@@ -135,8 +120,21 @@ func New(opts ...config.AppOption) (*Application, error) {
}
}()
// Initialize watchdog with current settings (after loading from file)
initializeWatchdog(application, options)
if options.WatchDog {
wd := model.NewWatchDog(
application.ModelLoader(),
options.WatchDogBusyTimeout,
options.WatchDogIdleTimeout,
options.WatchDogBusy,
options.WatchDogIdle)
application.ModelLoader().SetWatchDog(wd)
go wd.Run()
go func() {
<-options.Context.Done()
log.Debug().Msgf("Context canceled, shutting down")
wd.Shutdown()
}()
}
if options.LoadToMemory != nil && !options.SingleBackend {
for _, m := range options.LoadToMemory {
@@ -188,131 +186,3 @@ func startWatcher(options *config.ApplicationConfig) {
log.Error().Err(err).Msg("failed creating watcher")
}
}
// loadRuntimeSettingsFromFile loads settings from runtime_settings.json with env var precedence
// This function is called at startup, before env vars are applied via AppOptions.
// Since env vars are applied via AppOptions in run.go, we need to check if they're set.
// We do this by checking if the current options values differ from defaults, which would
// indicate they were set from env vars. However, a simpler approach is to just apply
// file settings here, and let the AppOptions (which are applied after this) override them.
// But actually, this is called AFTER AppOptions are applied in New(), so we need to check env vars.
// The cleanest solution: Store original values before applying file, or check if values match
// what would be set from env vars. For now, we'll apply file settings and they'll be
// overridden by AppOptions if env vars were set (but AppOptions are already applied).
// Actually, this function is called in New() before AppOptions are fully processed for watchdog.
// Let's check the call order: New() -> loadRuntimeSettingsFromFile() -> initializeWatchdog()
// But AppOptions are applied in NewApplicationConfig() which is called first.
// So at this point, options already has values from env vars. We should compare against
// defaults to see if env vars were set. But we don't have defaults stored.
// Simplest: Just apply file settings. If env vars were set, they're already in options.
// The file watcher handler will handle runtime changes properly by comparing with startupAppConfig.
func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
settingsFile := filepath.Join(options.DynamicConfigsDir, "runtime_settings.json")
fileContent, err := os.ReadFile(settingsFile)
if err != nil {
if os.IsNotExist(err) {
log.Debug().Msg("runtime_settings.json not found, using defaults")
return
}
log.Warn().Err(err).Msg("failed to read runtime_settings.json")
return
}
var settings struct {
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
SingleBackend *bool `json:"single_backend,omitempty"`
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
}
if err := json.Unmarshal(fileContent, &settings); err != nil {
log.Warn().Err(err).Msg("failed to parse runtime_settings.json")
return
}
// At this point, options already has values from env vars (via AppOptions in run.go).
// To avoid env var duplication, we determine if env vars were set by checking if
// current values differ from defaults. Defaults are: false for bools, 0 for durations.
// If current value is at default, it likely wasn't set from env var, so we can apply file.
// If current value is non-default, it was likely set from env var, so we preserve it.
// Note: This means env vars explicitly setting to false/0 won't be distinguishable from defaults,
// but that's an acceptable limitation to avoid env var duplication.
if settings.WatchdogIdleEnabled != nil {
// Only apply if current value is default (false), suggesting it wasn't set from env var
if !options.WatchDogIdle {
options.WatchDogIdle = *settings.WatchdogIdleEnabled
if options.WatchDogIdle {
options.WatchDog = true
}
}
}
if settings.WatchdogBusyEnabled != nil {
if !options.WatchDogBusy {
options.WatchDogBusy = *settings.WatchdogBusyEnabled
if options.WatchDogBusy {
options.WatchDog = true
}
}
}
if settings.WatchdogIdleTimeout != nil {
// Only apply if current value is default (0), suggesting it wasn't set from env var
if options.WatchDogIdleTimeout == 0 {
dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
if err == nil {
options.WatchDogIdleTimeout = dur
} else {
log.Warn().Err(err).Str("timeout", *settings.WatchdogIdleTimeout).Msg("invalid watchdog idle timeout in runtime_settings.json")
}
}
}
if settings.WatchdogBusyTimeout != nil {
if options.WatchDogBusyTimeout == 0 {
dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
if err == nil {
options.WatchDogBusyTimeout = dur
} else {
log.Warn().Err(err).Str("timeout", *settings.WatchdogBusyTimeout).Msg("invalid watchdog busy timeout in runtime_settings.json")
}
}
}
if settings.SingleBackend != nil {
if !options.SingleBackend {
options.SingleBackend = *settings.SingleBackend
}
}
if settings.ParallelBackendRequests != nil {
if !options.ParallelBackendRequests {
options.ParallelBackendRequests = *settings.ParallelBackendRequests
}
}
if !options.WatchDogIdle && !options.WatchDogBusy {
if settings.WatchdogEnabled != nil && *settings.WatchdogEnabled {
options.WatchDog = true
}
}
log.Debug().Msg("Runtime settings loaded from runtime_settings.json")
}
// initializeWatchdog initializes the watchdog with current ApplicationConfig settings
func initializeWatchdog(application *Application, options *config.ApplicationConfig) {
if options.WatchDog {
wd := model.NewWatchDog(
application.ModelLoader(),
options.WatchDogBusyTimeout,
options.WatchDogIdleTimeout,
options.WatchDogBusy,
options.WatchDogIdle)
application.ModelLoader().SetWatchDog(wd)
go wd.Run()
go func() {
<-options.Context.Done()
log.Debug().Msgf("Context canceled, shutting down")
wd.Shutdown()
}()
}
}

View File

@@ -1,88 +0,0 @@
package application
import (
"time"
"github.com/mudler/LocalAI/pkg/model"
"github.com/rs/zerolog/log"
)
func (a *Application) StopWatchdog() error {
if a.watchdogStop != nil {
close(a.watchdogStop)
a.watchdogStop = nil
}
return nil
}
// startWatchdog starts the watchdog with current ApplicationConfig settings
// This is an internal method that assumes the caller holds the watchdogMutex
func (a *Application) startWatchdog() error {
appConfig := a.ApplicationConfig()
// Create new watchdog if enabled
if appConfig.WatchDog {
wd := model.NewWatchDog(
a.modelLoader,
appConfig.WatchDogBusyTimeout,
appConfig.WatchDogIdleTimeout,
appConfig.WatchDogBusy,
appConfig.WatchDogIdle)
a.modelLoader.SetWatchDog(wd)
// Create new stop channel
a.watchdogStop = make(chan bool, 1)
// Start watchdog goroutine
go wd.Run()
// Setup shutdown handler
go func() {
select {
case <-a.watchdogStop:
log.Debug().Msg("Watchdog stop signal received")
wd.Shutdown()
case <-appConfig.Context.Done():
log.Debug().Msg("Context canceled, shutting down watchdog")
wd.Shutdown()
}
}()
log.Info().Msg("Watchdog started with new settings")
} else {
log.Info().Msg("Watchdog disabled")
}
return nil
}
// StartWatchdog starts the watchdog with current ApplicationConfig settings
func (a *Application) StartWatchdog() error {
a.watchdogMutex.Lock()
defer a.watchdogMutex.Unlock()
return a.startWatchdog()
}
// RestartWatchdog restarts the watchdog with current ApplicationConfig settings
func (a *Application) RestartWatchdog() error {
a.watchdogMutex.Lock()
defer a.watchdogMutex.Unlock()
// Shutdown existing watchdog if running
if a.watchdogStop != nil {
close(a.watchdogStop)
a.watchdogStop = nil
}
// Shutdown existing watchdog if running
currentWD := a.modelLoader.GetWatchDog()
if currentWD != nil {
currentWD.Shutdown()
// Wait a bit for shutdown to complete
time.Sleep(100 * time.Millisecond)
}
// Start watchdog with new settings
return a.startWatchdog()
}

View File

@@ -40,7 +40,3 @@ func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negat
return fn, nil
}
// ImageGenerationFunc is a test-friendly indirection to call image generation logic.
// Tests can override this variable to provide a stub implementation.
var ImageGenerationFunc = ImageGeneration

87
core/cli/api/p2p.go Normal file
View File

@@ -0,0 +1,87 @@
package cli_api
import (
"context"
"fmt"
"net"
"os"
"strings"
"github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/edgevpn/pkg/node"
"github.com/rs/zerolog/log"
)
func StartP2PStack(ctx context.Context, address, token, networkID string, federated bool, app *application.Application) error {
var n *node.Node
// Here we are avoiding creating multiple nodes:
// - if the federated mode is enabled, we create a federated node and expose a service
// - exposing a service creates a node with specific options, and we don't want to create another node
// If the federated mode is enabled, we expose a service to the local instance running
// at r.Address
if federated {
_, port, err := net.SplitHostPort(address)
if err != nil {
return err
}
// Here a new node is created and started
// and a service is exposed by the node
node, err := p2p.ExposeService(ctx, "localhost", port, token, p2p.NetworkID(networkID, p2p.FederatedID))
if err != nil {
return err
}
if err := p2p.ServiceDiscoverer(ctx, node, token, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil {
return err
}
n = node
// start node sync in the background
if err := p2p.Sync(ctx, node, app); err != nil {
return err
}
}
// If the p2p mode is enabled, we start the service discovery
if token != "" {
// If a node wasn't created previously, create it
if n == nil {
node, err := p2p.NewNode(token)
if err != nil {
return err
}
err = node.Start(ctx)
if err != nil {
return fmt.Errorf("starting new node: %w", err)
}
n = node
}
// Attach a ServiceDiscoverer to the p2p node
log.Info().Msg("Starting P2P server discovery...")
if err := p2p.ServiceDiscoverer(ctx, n, token, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node schema.NodeData) {
var tunnelAddresses []string
for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) {
if v.IsOnline() {
tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
} else {
log.Info().Msgf("Node %s is offline", v.ID)
}
}
tunnelEnvVar := strings.Join(tunnelAddresses, ",")
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
}, true); err != nil {
return err
}
}
return nil
}

View File

@@ -8,6 +8,7 @@ import (
"time"
"github.com/mudler/LocalAI/core/application"
cli_api "github.com/mudler/LocalAI/core/cli/api"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http"
@@ -51,7 +52,6 @@ type RunCMD struct {
UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disables the web user interface. When set to true, the server will only expose API endpoints without serving the web interface" group:"api"`
DisableRuntimeSettings bool `env:"LOCALAI_DISABLE_RUNTIME_SETTINGS,DISABLE_RUNTIME_SETTINGS" default:"false" help:"Disables the runtime settings. When set to true, the server will not load the runtime settings from the runtime_settings.json file" group:"api"`
DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"`
OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"`
@@ -98,7 +98,6 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
}
opts := []config.AppOption{
config.WithContext(context.Background()),
config.WithConfigFile(r.ModelsConfigFile),
config.WithJSONStringPreload(r.PreloadModels),
config.WithYAMLConfigPreload(r.PreloadModelsConfig),
@@ -129,22 +128,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
config.WithLoadToMemory(r.LoadToMemory),
config.WithMachineTag(r.MachineTag),
config.WithAPIAddress(r.Address),
config.WithTunnelCallback(func(tunnels []string) {
tunnelEnvVar := strings.Join(tunnels, ",")
// TODO: this is very specific to llama.cpp, we should have a more generic way to set the environment variable
os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
}),
}
if r.DisableMetricsEndpoint {
opts = append(opts, config.DisableMetricsEndpoint)
}
if r.DisableRuntimeSettings {
opts = append(opts, config.DisableRuntimeSettings)
}
token := ""
if r.Peer2Peer || r.Peer2PeerToken != "" {
log.Info().Msg("P2P mode enabled")
@@ -163,9 +152,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
opts = append(opts, config.WithP2PToken(token))
}
if r.Federated {
opts = append(opts, config.EnableFederated)
}
backgroundCtx := context.Background()
idleWatchDog := r.EnableWatchdogIdle
busyWatchDog := r.EnableWatchdogBusy
@@ -235,10 +222,8 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
return err
}
if token != "" {
if err := app.StartP2P(); err != nil {
return err
}
if err := cli_api.StartP2PStack(backgroundCtx, r.Address, token, r.Peer2PeerNetworkID, r.Federated, app); err != nil {
return err
}
signals.RegisterGracefulTerminationHandler(func() {

View File

@@ -33,7 +33,6 @@ type ApplicationConfig struct {
ApiKeys []string
P2PToken string
P2PNetworkID string
Federated bool
DisableWebUI bool
EnforcePredownloadScans bool
@@ -66,10 +65,6 @@ type ApplicationConfig struct {
MachineTag string
APIAddress string
TunnelCallback func(tunnels []string)
DisableRuntimeSettings bool
}
type AppOption func(*ApplicationConfig)
@@ -78,6 +73,7 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
opt := &ApplicationConfig{
Context: context.Background(),
UploadLimitMB: 15,
ContextSize: 512,
Debug: true,
}
for _, oo := range o {
@@ -156,10 +152,6 @@ var DisableWebUI = func(o *ApplicationConfig) {
o.DisableWebUI = true
}
var DisableRuntimeSettings = func(o *ApplicationConfig) {
o.DisableRuntimeSettings = true
}
func SetWatchDogBusyTimeout(t time.Duration) AppOption {
return func(o *ApplicationConfig) {
o.WatchDogBusyTimeout = t
@@ -188,10 +180,6 @@ var EnableBackendGalleriesAutoload = func(o *ApplicationConfig) {
o.AutoloadBackendGalleries = true
}
var EnableFederated = func(o *ApplicationConfig) {
o.Federated = true
}
func WithExternalBackend(name string, uri string) AppOption {
return func(o *ApplicationConfig) {
if o.ExternalGRPCBackends == nil {
@@ -285,12 +273,6 @@ func WithContextSize(ctxSize int) AppOption {
}
}
func WithTunnelCallback(callback func(tunnels []string)) AppOption {
return func(o *ApplicationConfig) {
o.TunnelCallback = callback
}
}
func WithF16(f16 bool) AppOption {
return func(o *ApplicationConfig) {
o.F16 = f16

View File

@@ -1,7 +1,6 @@
package config
import (
"fmt"
"os"
"regexp"
"slices"
@@ -476,7 +475,7 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
cfg.syncKnownUsecasesFromString()
}
func (c *ModelConfig) Validate() (bool, error) {
func (c *ModelConfig) Validate() bool {
downloadedFileNames := []string{}
for _, f := range c.DownloadFiles {
downloadedFileNames = append(downloadedFileNames, f.Filename)
@@ -490,20 +489,17 @@ func (c *ModelConfig) Validate() (bool, error) {
}
if strings.HasPrefix(n, string(os.PathSeparator)) ||
strings.Contains(n, "..") {
return false, fmt.Errorf("invalid file path: %s", n)
return false
}
}
if c.Backend != "" {
// a regex that checks that is a string name with no special characters, except '-' and '_'
re := regexp.MustCompile(`^[a-zA-Z0-9-_]+$`)
if !re.MatchString(c.Backend) {
return false, fmt.Errorf("invalid backend name: %s", c.Backend)
}
return true, nil
return re.MatchString(c.Backend)
}
return true, nil
return true
}
func (c *ModelConfig) HasTemplate() bool {
@@ -538,8 +534,7 @@ const (
func GetAllModelConfigUsecases() map[string]ModelConfigUsecases {
return map[string]ModelConfigUsecases{
// Note: FLAG_ANY is intentionally excluded from this map
// because it's 0 and would always match in HasUsecases checks
"FLAG_ANY": FLAG_ANY,
"FLAG_CHAT": FLAG_CHAT,
"FLAG_COMPLETION": FLAG_COMPLETION,
"FLAG_EDIT": FLAG_EDIT,
@@ -641,7 +636,7 @@ func (c *ModelConfig) GuessUsecases(u ModelConfigUsecases) bool {
}
}
if (u & FLAG_TTS) == FLAG_TTS {
ttsBackends := []string{"bark-cpp", "piper", "transformers-musicgen", "kokoro"}
ttsBackends := []string{"bark-cpp", "piper", "transformers-musicgen"}
if !slices.Contains(ttsBackends, c.Backend) {
return false
}

View File

@@ -169,7 +169,7 @@ func (bcl *ModelConfigLoader) LoadMultipleModelConfigsSingleFile(file string, op
}
for _, cc := range c {
if valid, _ := cc.Validate(); valid {
if cc.Validate() {
bcl.configs[cc.Name] = *cc
}
}
@@ -184,7 +184,7 @@ func (bcl *ModelConfigLoader) ReadModelConfig(file string, opts ...ConfigLoaderO
return fmt.Errorf("ReadModelConfig cannot read config file %q: %w", file, err)
}
if valid, _ := c.Validate(); valid {
if c.Validate() {
bcl.configs[c.Name] = *c
} else {
return fmt.Errorf("config is not valid")
@@ -362,7 +362,7 @@ func (bcl *ModelConfigLoader) LoadModelConfigsFromPath(path string, opts ...Conf
log.Error().Err(err).Str("File Name", file.Name()).Msgf("LoadModelConfigsFromPath cannot read config file")
continue
}
if valid, _ := c.Validate(); valid {
if c.Validate() {
bcl.configs[c.Name] = *c
} else {
log.Error().Err(err).Str("Name", c.Name).Msgf("config is not valid")

View File

@@ -28,9 +28,7 @@ known_usecases:
config, err := readModelConfigFromFile(tmp.Name())
Expect(err).To(BeNil())
Expect(config).ToNot(BeNil())
valid, err := config.Validate()
Expect(err).To(HaveOccurred())
Expect(valid).To(BeFalse())
Expect(config.Validate()).To(BeFalse())
Expect(config.KnownUsecases).ToNot(BeNil())
})
It("Test Validate", func() {
@@ -48,9 +46,7 @@ parameters:
Expect(config).ToNot(BeNil())
// two configs in config.yaml
Expect(config.Name).To(Equal("bar-baz"))
valid, err := config.Validate()
Expect(err).To(BeNil())
Expect(valid).To(BeTrue())
Expect(config.Validate()).To(BeTrue())
// download https://raw.githubusercontent.com/mudler/LocalAI/v2.25.0/embedded/models/hermes-2-pro-mistral.yaml
httpClient := http.Client{}
@@ -67,9 +63,7 @@ parameters:
Expect(config).ToNot(BeNil())
// two configs in config.yaml
Expect(config.Name).To(Equal("hermes-2-pro-mistral"))
valid, err = config.Validate()
Expect(err).To(BeNil())
Expect(valid).To(BeTrue())
Expect(config.Validate()).To(BeTrue())
})
})
It("Properly handles backend usecase matching", func() {

View File

@@ -164,7 +164,7 @@ func InstallBackend(ctx context.Context, systemState *system.SystemState, modelL
return fmt.Errorf("failed copying: %w", err)
}
} else {
log.Debug().Str("uri", config.URI).Str("backendPath", backendPath).Msg("Downloading backend")
uri := downloader.URI(config.URI)
if err := uri.DownloadFileWithContext(ctx, backendPath, "", 1, 1, downloadStatus); err != nil {
success := false
// Try to download from mirrors
@@ -177,27 +177,16 @@ func InstallBackend(ctx context.Context, systemState *system.SystemState, modelL
}
if err := downloader.URI(mirror).DownloadFileWithContext(ctx, backendPath, "", 1, 1, downloadStatus); err == nil {
success = true
log.Debug().Str("uri", config.URI).Str("backendPath", backendPath).Msg("Downloaded backend")
break
}
}
if !success {
log.Error().Str("uri", config.URI).Str("backendPath", backendPath).Err(err).Msg("Failed to download backend")
return fmt.Errorf("failed to download backend %q: %v", config.URI, err)
}
} else {
log.Debug().Str("uri", config.URI).Str("backendPath", backendPath).Msg("Downloaded backend")
}
}
// sanity check - check if runfile is present
runFile := filepath.Join(backendPath, runFile)
if _, err := os.Stat(runFile); os.IsNotExist(err) {
log.Error().Str("runFile", runFile).Msg("Run file not found")
return fmt.Errorf("not a valid backend: run file not found %q", runFile)
}
// Create metadata for the backend
metadata := &BackendMetadata{
Name: name,

View File

@@ -563,8 +563,8 @@ var _ = Describe("Gallery Backends", func() {
)
Expect(err).NotTo(HaveOccurred())
err = InstallBackend(context.TODO(), systemState, ml, &backend, nil)
Expect(newPath).To(BeADirectory())
Expect(err).To(HaveOccurred()) // Will fail due to invalid URI, but path should be created
Expect(newPath).To(BeADirectory())
})
It("should overwrite existing backend", func() {

View File

@@ -6,13 +6,11 @@ import (
"os"
"path/filepath"
"strings"
"time"
"github.com/lithammer/fuzzysearch/fuzzy"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/system"
"github.com/mudler/LocalAI/pkg/xsync"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v2"
@@ -21,7 +19,7 @@ import (
func GetGalleryConfigFromURL[T any](url string, basePath string) (T, error) {
var config T
uri := downloader.URI(url)
err := uri.ReadWithCallback(basePath, func(url string, d []byte) error {
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
return yaml.Unmarshal(d, &config)
})
if err != nil {
@@ -34,7 +32,7 @@ func GetGalleryConfigFromURL[T any](url string, basePath string) (T, error) {
func GetGalleryConfigFromURLWithContext[T any](ctx context.Context, url string, basePath string) (T, error) {
var config T
uri := downloader.URI(url)
err := uri.ReadWithAuthorizationAndCallback(ctx, basePath, "", func(url string, d []byte) error {
err := uri.DownloadWithAuthorizationAndCallback(ctx, basePath, "", func(url string, d []byte) error {
return yaml.Unmarshal(d, &config)
})
if err != nil {
@@ -143,7 +141,7 @@ func AvailableGalleryModels(galleries []config.Gallery, systemState *system.Syst
// Get models from galleries
for _, gallery := range galleries {
galleryModels, err := getGalleryElements(gallery, systemState.Model.ModelsPath, func(model *GalleryModel) bool {
galleryModels, err := getGalleryElements[*GalleryModel](gallery, systemState.Model.ModelsPath, func(model *GalleryModel) bool {
if _, err := os.Stat(filepath.Join(systemState.Model.ModelsPath, fmt.Sprintf("%s.yaml", model.GetName()))); err == nil {
return true
}
@@ -184,7 +182,7 @@ func AvailableBackends(galleries []config.Gallery, systemState *system.SystemSta
func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
var refFile string
uri := downloader.URI(url)
err := uri.ReadWithCallback(basePath, func(url string, d []byte) error {
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
refFile = string(d)
if len(refFile) == 0 {
return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@@ -196,17 +194,6 @@ func findGalleryURLFromReferenceURL(url string, basePath string) (string, error)
return refFile, err
}
type galleryCacheEntry struct {
yamlEntry []byte
lastUpdated time.Time
}
func (entry galleryCacheEntry) hasExpired() bool {
return entry.lastUpdated.Before(time.Now().Add(-1 * time.Hour))
}
var galleryCache = xsync.NewSyncedMap[string, galleryCacheEntry]()
func getGalleryElements[T GalleryElement](gallery config.Gallery, basePath string, isInstalledCallback func(T) bool) ([]T, error) {
var models []T = []T{}
@@ -217,37 +204,16 @@ func getGalleryElements[T GalleryElement](gallery config.Gallery, basePath strin
return models, err
}
}
cacheKey := fmt.Sprintf("%s-%s", gallery.Name, gallery.URL)
if galleryCache.Exists(cacheKey) {
entry := galleryCache.Get(cacheKey)
// refresh if last updated is more than 1 hour ago
if !entry.hasExpired() {
err := yaml.Unmarshal(entry.yamlEntry, &models)
if err != nil {
return models, err
}
} else {
galleryCache.Delete(cacheKey)
}
}
uri := downloader.URI(gallery.URL)
if len(models) == 0 {
err := uri.ReadWithCallback(basePath, func(url string, d []byte) error {
galleryCache.Set(cacheKey, galleryCacheEntry{
yamlEntry: d,
lastUpdated: time.Now(),
})
return yaml.Unmarshal(d, &models)
})
if err != nil {
if yamlErr, ok := err.(*yaml.TypeError); ok {
log.Debug().Msgf("YAML errors: %s\n\nwreckage of models: %+v", strings.Join(yamlErr.Errors, "\n"), models)
}
return models, fmt.Errorf("failed to read gallery elements: %w", err)
err := uri.DownloadWithCallback(basePath, func(url string, d []byte) error {
return yaml.Unmarshal(d, &models)
})
if err != nil {
if yamlErr, ok := err.(*yaml.TypeError); ok {
log.Debug().Msgf("YAML errors: %s\n\nwreckage of models: %+v", strings.Join(yamlErr.Errors, "\n"), models)
}
return models, err
}
// Add gallery to models

View File

@@ -1,121 +0,0 @@
package importers
import (
"encoding/json"
"path/filepath"
"strings"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"gopkg.in/yaml.v3"
)
var _ Importer = &DiffuserImporter{}
type DiffuserImporter struct{}
func (i *DiffuserImporter) Match(details Details) bool {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return false
}
preferencesMap := make(map[string]any)
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return false
}
b, ok := preferencesMap["backend"].(string)
if ok && b == "diffusers" {
return true
}
if details.HuggingFace != nil {
for _, file := range details.HuggingFace.Files {
if strings.Contains(file.Path, "model_index.json") ||
strings.Contains(file.Path, "scheduler/scheduler_config.json") {
return true
}
}
}
return false
}
func (i *DiffuserImporter) Import(details Details) (gallery.ModelConfig, error) {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return gallery.ModelConfig{}, err
}
preferencesMap := make(map[string]any)
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return gallery.ModelConfig{}, err
}
name, ok := preferencesMap["name"].(string)
if !ok {
name = filepath.Base(details.URI)
}
description, ok := preferencesMap["description"].(string)
if !ok {
description = "Imported from " + details.URI
}
backend := "diffusers"
b, ok := preferencesMap["backend"].(string)
if ok {
backend = b
}
pipelineType, ok := preferencesMap["pipeline_type"].(string)
if !ok {
pipelineType = "StableDiffusionPipeline"
}
schedulerType, ok := preferencesMap["scheduler_type"].(string)
if !ok {
schedulerType = ""
}
enableParameters, ok := preferencesMap["enable_parameters"].(string)
if !ok {
enableParameters = "negative_prompt,num_inference_steps"
}
cuda := false
if cudaVal, ok := preferencesMap["cuda"].(bool); ok {
cuda = cudaVal
}
modelConfig := config.ModelConfig{
Name: name,
Description: description,
KnownUsecaseStrings: []string{"image"},
Backend: backend,
PredictionOptions: schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
Model: details.URI,
},
},
Diffusers: config.Diffusers{
PipelineType: pipelineType,
SchedulerType: schedulerType,
EnableParameters: enableParameters,
CUDA: cuda,
},
}
data, err := yaml.Marshal(modelConfig)
if err != nil {
return gallery.ModelConfig{}, err
}
return gallery.ModelConfig{
Name: name,
Description: description,
ConfigFile: string(data),
}, nil
}

View File

@@ -1,246 +0,0 @@
package importers_test
import (
"encoding/json"
"github.com/mudler/LocalAI/core/gallery/importers"
. "github.com/mudler/LocalAI/core/gallery/importers"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("DiffuserImporter", func() {
var importer *DiffuserImporter
BeforeEach(func() {
importer = &DiffuserImporter{}
})
Context("Match", func() {
It("should match when backend preference is diffusers", func() {
preferences := json.RawMessage(`{"backend": "diffusers"}`)
details := Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should match when HuggingFace details contain model_index.json", func() {
hfDetails := &hfapi.ModelDetails{
Files: []hfapi.ModelFile{
{Path: "model_index.json"},
},
}
details := Details{
URI: "https://huggingface.co/test/model",
HuggingFace: hfDetails,
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should match when HuggingFace details contain scheduler config", func() {
hfDetails := &hfapi.ModelDetails{
Files: []hfapi.ModelFile{
{Path: "scheduler/scheduler_config.json"},
},
}
details := Details{
URI: "https://huggingface.co/test/model",
HuggingFace: hfDetails,
}
result := importer.Match(details)
Expect(result).To(BeTrue())
})
It("should not match when URI has no diffuser files and no backend preference", func() {
details := Details{
URI: "https://example.com/model.bin",
}
result := importer.Match(details)
Expect(result).To(BeFalse())
})
It("should not match when backend preference is different", func() {
preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
details := Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeFalse())
})
It("should return false when JSON preferences are invalid", func() {
preferences := json.RawMessage(`invalid json`)
details := Details{
URI: "https://example.com/model",
Preferences: preferences,
}
result := importer.Match(details)
Expect(result).To(BeFalse())
})
})
Context("Import", func() {
It("should import model config with default name and description", func() {
details := Details{
URI: "https://huggingface.co/test/my-diffuser-model",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("my-diffuser-model"))
Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/test/my-diffuser-model"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: diffusers"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("model: https://huggingface.co/test/my-diffuser-model"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("pipeline_type: StableDiffusionPipeline"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("enable_parameters: negative_prompt,num_inference_steps"))
})
It("should import model config with custom name and description from preferences", func() {
preferences := json.RawMessage(`{"name": "custom-diffuser", "description": "Custom diffuser model"}`)
details := Details{
URI: "https://huggingface.co/test/my-model",
Preferences: preferences,
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("custom-diffuser"))
Expect(modelConfig.Description).To(Equal("Custom diffuser model"))
})
It("should use custom pipeline_type from preferences", func() {
preferences := json.RawMessage(`{"pipeline_type": "StableDiffusion3Pipeline"}`)
details := Details{
URI: "https://huggingface.co/test/my-model",
Preferences: preferences,
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("pipeline_type: StableDiffusion3Pipeline"))
})
It("should use default pipeline_type when not specified", func() {
details := Details{
URI: "https://huggingface.co/test/my-model",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("pipeline_type: StableDiffusionPipeline"))
})
It("should use custom scheduler_type from preferences", func() {
preferences := json.RawMessage(`{"scheduler_type": "k_dpmpp_2m"}`)
details := Details{
URI: "https://huggingface.co/test/my-model",
Preferences: preferences,
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("scheduler_type: k_dpmpp_2m"))
})
It("should use cuda setting from preferences", func() {
preferences := json.RawMessage(`{"cuda": true}`)
details := Details{
URI: "https://huggingface.co/test/my-model",
Preferences: preferences,
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("cuda: true"))
})
It("should use custom enable_parameters from preferences", func() {
preferences := json.RawMessage(`{"enable_parameters": "num_inference_steps,guidance_scale"}`)
details := Details{
URI: "https://huggingface.co/test/my-model",
Preferences: preferences,
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("enable_parameters: num_inference_steps,guidance_scale"))
})
It("should use custom backend from preferences", func() {
preferences := json.RawMessage(`{"backend": "diffusers"}`)
details := Details{
URI: "https://huggingface.co/test/my-model",
Preferences: preferences,
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: diffusers"))
})
It("should handle invalid JSON preferences", func() {
preferences := json.RawMessage(`invalid json`)
details := Details{
URI: "https://huggingface.co/test/my-model",
Preferences: preferences,
}
_, err := importer.Import(details)
Expect(err).To(HaveOccurred())
})
It("should extract filename correctly from URI with path", func() {
details := importers.Details{
URI: "https://huggingface.co/test/path/to/model",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("model"))
})
It("should include known_usecases as image in config", func() {
details := Details{
URI: "https://huggingface.co/test/my-model",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("known_usecases:"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("- image"))
})
It("should include diffusers configuration in config", func() {
details := Details{
URI: "https://huggingface.co/test/my-model",
}
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.ConfigFile).To(ContainSubstring("diffusers:"))
})
})
})

View File

@@ -2,16 +2,11 @@ package importers
import (
"encoding/json"
"fmt"
"os"
"strings"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v3"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/pkg/downloader"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
)
@@ -20,7 +15,6 @@ var defaultImporters = []Importer{
&MLXImporter{},
&VLLMImporter{},
&TransformersImporter{},
&DiffuserImporter{},
}
type Details struct {
@@ -34,10 +28,6 @@ type Importer interface {
Import(details Details) (gallery.ModelConfig, error)
}
func hasYAMLExtension(uri string) bool {
return strings.HasSuffix(uri, ".yaml") || strings.HasSuffix(uri, ".yml")
}
func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.ModelConfig, error) {
var err error
var modelConfig gallery.ModelConfig
@@ -52,61 +42,20 @@ func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.Model
if err != nil {
// maybe not a HF repository
// TODO: maybe we can check if the URI is a valid HF repository
log.Debug().Str("uri", uri).Str("hfrepoID", hfrepoID).Msg("Failed to get model details, maybe not a HF repository")
log.Debug().Str("uri", uri).Msg("Failed to get model details, maybe not a HF repository")
} else {
log.Debug().Str("uri", uri).Msg("Got model details")
log.Debug().Any("details", hfDetails).Msg("Model details")
}
// handle local config files ("/my-model.yaml" or "file://my-model.yaml")
localURI := uri
if strings.HasPrefix(uri, downloader.LocalPrefix) {
localURI = strings.TrimPrefix(uri, downloader.LocalPrefix)
}
// if a file exists or it's an url that ends with .yaml or .yml, read the config file directly
if _, e := os.Stat(localURI); hasYAMLExtension(localURI) && (e == nil || downloader.URI(localURI).LooksLikeURL()) {
var modelYAML []byte
if downloader.URI(localURI).LooksLikeURL() {
err := downloader.URI(localURI).ReadWithCallback(localURI, func(url string, i []byte) error {
modelYAML = i
return nil
})
if err != nil {
log.Error().Err(err).Str("filepath", localURI).Msg("error reading model definition")
return gallery.ModelConfig{}, err
}
} else {
modelYAML, err = os.ReadFile(localURI)
if err != nil {
log.Error().Err(err).Str("filepath", localURI).Msg("error reading model definition")
return gallery.ModelConfig{}, err
}
}
var modelConfig config.ModelConfig
if e := yaml.Unmarshal(modelYAML, &modelConfig); e != nil {
return gallery.ModelConfig{}, e
}
configFile, err := yaml.Marshal(modelConfig)
return gallery.ModelConfig{
Description: modelConfig.Description,
Name: modelConfig.Name,
ConfigFile: string(configFile),
}, err
}
details := Details{
HuggingFace: hfDetails,
URI: uri,
Preferences: preferences,
}
importerMatched := false
for _, importer := range defaultImporters {
if importer.Match(details) {
importerMatched = true
modelConfig, err = importer.Import(details)
if err != nil {
continue
@@ -114,8 +63,5 @@ func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.Model
break
}
}
if !importerMatched {
return gallery.ModelConfig{}, fmt.Errorf("no importer matched for %s", uri)
}
return modelConfig, nil
return modelConfig, err
}

View File

@@ -3,8 +3,6 @@ package importers_test
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"github.com/mudler/LocalAI/core/gallery/importers"
. "github.com/onsi/ginkgo/v2"
@@ -214,139 +212,4 @@ var _ = Describe("DiscoverModelConfig", func() {
Expect(modelConfig.Name).To(BeEmpty())
})
})
Context("with local YAML config files", func() {
var tempDir string
BeforeEach(func() {
var err error
tempDir, err = os.MkdirTemp("", "importers-test-*")
Expect(err).ToNot(HaveOccurred())
})
AfterEach(func() {
os.RemoveAll(tempDir)
})
It("should read local YAML file with file:// prefix", func() {
yamlContent := `name: test-model
backend: llama-cpp
description: Test model from local YAML
parameters:
model: /path/to/model.gguf
temperature: 0.7
`
yamlFile := filepath.Join(tempDir, "test-model.yaml")
err := os.WriteFile(yamlFile, []byte(yamlContent), 0644)
Expect(err).ToNot(HaveOccurred())
uri := "file://" + yamlFile
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("test-model"))
Expect(modelConfig.Description).To(Equal("Test model from local YAML"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("name: test-model"))
})
It("should read local YAML file without file:// prefix (direct path)", func() {
yamlContent := `name: direct-path-model
backend: mlx
description: Test model from direct path
parameters:
model: /path/to/model.safetensors
`
yamlFile := filepath.Join(tempDir, "direct-model.yaml")
err := os.WriteFile(yamlFile, []byte(yamlContent), 0644)
Expect(err).ToNot(HaveOccurred())
uri := yamlFile
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("direct-path-model"))
Expect(modelConfig.Description).To(Equal("Test model from direct path"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
})
It("should read local YAML file with .yml extension", func() {
yamlContent := `name: yml-extension-model
backend: transformers
description: Test model with .yml extension
parameters:
model: /path/to/model
`
yamlFile := filepath.Join(tempDir, "test-model.yml")
err := os.WriteFile(yamlFile, []byte(yamlContent), 0644)
Expect(err).ToNot(HaveOccurred())
uri := "file://" + yamlFile
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("yml-extension-model"))
Expect(modelConfig.Description).To(Equal("Test model with .yml extension"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: transformers"))
})
It("should ignore preferences when reading YAML files directly", func() {
yamlContent := `name: yaml-model
backend: llama-cpp
description: Original description
parameters:
model: /path/to/model.gguf
`
yamlFile := filepath.Join(tempDir, "prefs-test.yaml")
err := os.WriteFile(yamlFile, []byte(yamlContent), 0644)
Expect(err).ToNot(HaveOccurred())
uri := "file://" + yamlFile
// Preferences should be ignored when reading YAML directly
preferences := json.RawMessage(`{"name": "custom-name", "description": "Custom description", "backend": "mlx"}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).ToNot(HaveOccurred())
// Should use values from YAML file, not preferences
Expect(modelConfig.Name).To(Equal("yaml-model"))
Expect(modelConfig.Description).To(Equal("Original description"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
})
It("should return error when local YAML file doesn't exist", func() {
nonExistentFile := filepath.Join(tempDir, "nonexistent.yaml")
uri := "file://" + nonExistentFile
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).To(HaveOccurred())
Expect(modelConfig.Name).To(BeEmpty())
})
It("should return error when YAML file is invalid/malformed", func() {
invalidYaml := `name: invalid-model
backend: llama-cpp
invalid: yaml: content: [unclosed bracket
`
yamlFile := filepath.Join(tempDir, "invalid.yaml")
err := os.WriteFile(yamlFile, []byte(invalidYaml), 0644)
Expect(err).ToNot(HaveOccurred())
uri := "file://" + yamlFile
preferences := json.RawMessage(`{}`)
modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
Expect(err).To(HaveOccurred())
Expect(modelConfig.Name).To(BeEmpty())
})
})
})

View File

@@ -9,9 +9,7 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/functions"
"github.com/rs/zerolog/log"
"go.yaml.in/yaml/v2"
)
@@ -22,22 +20,14 @@ type LlamaCPPImporter struct{}
func (i *LlamaCPPImporter) Match(details Details) bool {
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
log.Error().Err(err).Msg("failed to marshal preferences")
return false
}
preferencesMap := make(map[string]any)
if len(preferences) > 0 {
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
log.Error().Err(err).Msg("failed to unmarshal preferences")
return false
}
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return false
}
uri := downloader.URI(details.URI)
if preferencesMap["backend"] == "llama-cpp" {
return true
}
@@ -46,10 +36,6 @@ func (i *LlamaCPPImporter) Match(details Details) bool {
return true
}
if uri.LooksLikeOCI() {
return true
}
if details.HuggingFace != nil {
for _, file := range details.HuggingFace.Files {
if strings.HasSuffix(file.Path, ".gguf") {
@@ -62,19 +48,14 @@ func (i *LlamaCPPImporter) Match(details Details) bool {
}
func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) {
log.Debug().Str("uri", details.URI).Msg("llama.cpp importer matched")
preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return gallery.ModelConfig{}, err
}
preferencesMap := make(map[string]any)
if len(preferences) > 0 {
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return gallery.ModelConfig{}, err
}
err = json.Unmarshal(preferences, &preferencesMap)
if err != nil {
return gallery.ModelConfig{}, err
}
name, ok := preferencesMap["name"].(string)
@@ -127,40 +108,7 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
Description: description,
}
uri := downloader.URI(details.URI)
switch {
case uri.LooksLikeOCI():
ociName := strings.TrimPrefix(string(uri), downloader.OCIPrefix)
ociName = strings.TrimPrefix(ociName, downloader.OllamaPrefix)
ociName = strings.ReplaceAll(ociName, "/", "__")
ociName = strings.ReplaceAll(ociName, ":", "__")
cfg.Files = append(cfg.Files, gallery.File{
URI: details.URI,
Filename: ociName,
})
modelConfig.PredictionOptions = schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
Model: ociName,
},
}
case uri.LooksLikeURL() && strings.HasSuffix(details.URI, ".gguf"):
// Extract filename from URL
fileName, e := uri.FilenameFromUrl()
if e != nil {
return gallery.ModelConfig{}, e
}
cfg.Files = append(cfg.Files, gallery.File{
URI: details.URI,
Filename: fileName,
})
modelConfig.PredictionOptions = schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
Model: fileName,
},
}
case strings.HasSuffix(details.URI, ".gguf"):
if strings.HasSuffix(details.URI, ".gguf") {
cfg.Files = append(cfg.Files, gallery.File{
URI: details.URI,
Filename: filepath.Base(details.URI),
@@ -170,7 +118,7 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
Model: filepath.Base(details.URI),
},
}
case details.HuggingFace != nil:
} else if details.HuggingFace != nil {
// We want to:
// Get first the chosen quants that match filenames
// OR the first mmproj/gguf file found
@@ -247,6 +195,7 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
}
break
}
}
data, err := yaml.Marshal(modelConfig)

View File

@@ -6,10 +6,10 @@ import (
"fmt"
"os"
"path/filepath"
"slices"
"strings"
"dario.cat/mergo"
"github.com/mudler/LocalAI/core/config"
lconfig "github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/model"
@@ -17,7 +17,7 @@ import (
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v3"
"gopkg.in/yaml.v2"
)
/*
@@ -74,7 +74,7 @@ type PromptTemplate struct {
// Installs a model from the gallery
func InstallModelFromGallery(
ctx context.Context,
modelGalleries, backendGalleries []lconfig.Gallery,
modelGalleries, backendGalleries []config.Gallery,
systemState *system.SystemState,
modelLoader *model.ModelLoader,
name string, req GalleryModel, downloadStatus func(string, string, string, float64), enforceScan, automaticallyInstallBackend bool) error {
@@ -260,8 +260,8 @@ func InstallModel(ctx context.Context, systemState *system.SystemState, nameOver
return nil, fmt.Errorf("failed to unmarshal updated config YAML: %v", err)
}
if valid, err := modelConfig.Validate(); !valid {
return nil, fmt.Errorf("failed to validate updated config YAML: %v", err)
if !modelConfig.Validate() {
return nil, fmt.Errorf("failed to validate updated config YAML")
}
err = os.WriteFile(configFilePath, updatedConfigYAML, 0600)
@@ -294,32 +294,21 @@ func GetLocalModelConfiguration(basePath string, name string) (*ModelConfig, err
return ReadConfigFile[ModelConfig](galleryFile)
}
func listModelFiles(systemState *system.SystemState, name string) ([]string, error) {
func DeleteModelFromSystem(systemState *system.SystemState, name string) error {
additionalFiles := []string{}
configFile := filepath.Join(systemState.Model.ModelsPath, fmt.Sprintf("%s.yaml", name))
if err := utils.VerifyPath(configFile, systemState.Model.ModelsPath); err != nil {
return nil, fmt.Errorf("failed to verify path %s: %w", configFile, err)
return fmt.Errorf("failed to verify path %s: %w", configFile, err)
}
// os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths.
name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
galleryFile := filepath.Join(systemState.Model.ModelsPath, galleryFileName(name))
if err := utils.VerifyPath(galleryFile, systemState.Model.ModelsPath); err != nil {
return nil, fmt.Errorf("failed to verify path %s: %w", galleryFile, err)
}
additionalFiles := []string{}
allFiles := []string{}
// Galleryname is the name of the model in this case
dat, err := os.ReadFile(configFile)
if err == nil {
modelConfig := &lconfig.ModelConfig{}
modelConfig := &config.ModelConfig{}
err = yaml.Unmarshal(dat, &modelConfig)
if err != nil {
return nil, err
return err
}
if modelConfig.Model != "" {
additionalFiles = append(additionalFiles, modelConfig.ModelFileName())
@@ -330,15 +319,26 @@ func listModelFiles(systemState *system.SystemState, name string) ([]string, err
}
}
// os.PathSeparator is not allowed in model names. Replace them with "__" to avoid conflicts with file paths.
name = strings.ReplaceAll(name, string(os.PathSeparator), "__")
galleryFile := filepath.Join(systemState.Model.ModelsPath, galleryFileName(name))
if err := utils.VerifyPath(galleryFile, systemState.Model.ModelsPath); err != nil {
return fmt.Errorf("failed to verify path %s: %w", galleryFile, err)
}
var filesToRemove []string
// Delete all the files associated to the model
// read the model config
galleryconfig, err := ReadConfigFile[ModelConfig](galleryFile)
if err == nil && galleryconfig != nil {
for _, f := range galleryconfig.Files {
fullPath := filepath.Join(systemState.Model.ModelsPath, f.Filename)
if err := utils.VerifyPath(fullPath, systemState.Model.ModelsPath); err != nil {
return allFiles, fmt.Errorf("failed to verify path %s: %w", fullPath, err)
return fmt.Errorf("failed to verify path %s: %w", fullPath, err)
}
allFiles = append(allFiles, fullPath)
filesToRemove = append(filesToRemove, fullPath)
}
} else {
log.Error().Err(err).Msgf("failed to read gallery file %s", configFile)
@@ -347,68 +347,18 @@ func listModelFiles(systemState *system.SystemState, name string) ([]string, err
for _, f := range additionalFiles {
fullPath := filepath.Join(filepath.Join(systemState.Model.ModelsPath, f))
if err := utils.VerifyPath(fullPath, systemState.Model.ModelsPath); err != nil {
return allFiles, fmt.Errorf("failed to verify path %s: %w", fullPath, err)
return fmt.Errorf("failed to verify path %s: %w", fullPath, err)
}
allFiles = append(allFiles, fullPath)
filesToRemove = append(filesToRemove, fullPath)
}
allFiles = append(allFiles, galleryFile)
filesToRemove = append(filesToRemove, galleryFile)
// skip duplicates
allFiles = utils.Unique(allFiles)
return allFiles, nil
}
func DeleteModelFromSystem(systemState *system.SystemState, name string) error {
configFile := filepath.Join(systemState.Model.ModelsPath, fmt.Sprintf("%s.yaml", name))
filesToRemove, err := listModelFiles(systemState, name)
if err != nil {
return err
}
allOtherFiles := []string{}
// Get all files of all other models
fi, err := os.ReadDir(systemState.Model.ModelsPath)
if err != nil {
return err
}
for _, f := range fi {
if f.IsDir() {
continue
}
if strings.HasPrefix(f.Name(), "._gallery_") {
continue
}
if !strings.HasSuffix(f.Name(), ".yaml") && !strings.HasSuffix(f.Name(), ".yml") {
continue
}
if f.Name() == fmt.Sprintf("%s.yaml", name) || f.Name() == fmt.Sprintf("%s.yml", name) {
continue
}
name := strings.TrimSuffix(f.Name(), ".yaml")
name = strings.TrimSuffix(name, ".yml")
log.Debug().Msgf("Checking file %s", f.Name())
files, err := listModelFiles(systemState, name)
if err != nil {
log.Debug().Err(err).Msgf("failed to list files for model %s", f.Name())
continue
}
allOtherFiles = append(allOtherFiles, files...)
}
log.Debug().Msgf("Files to remove: %+v", filesToRemove)
log.Debug().Msgf("All other files: %+v", allOtherFiles)
filesToRemove = utils.Unique(filesToRemove)
// Removing files
for _, f := range filesToRemove {
if slices.Contains(allOtherFiles, f) {
log.Debug().Msgf("Skipping file %s because it is part of another model", f)
continue
}
if e := os.Remove(f); e != nil {
log.Error().Err(e).Msgf("failed to remove file %s", f)
}
@@ -419,7 +369,7 @@ func DeleteModelFromSystem(systemState *system.SystemState, name string) error {
// This is ***NEVER*** going to be perfect or finished.
// This is a BEST EFFORT function to surface known-vulnerable models to users.
func SafetyScanGalleryModels(galleries []lconfig.Gallery, systemState *system.SystemState) error {
func SafetyScanGalleryModels(galleries []config.Gallery, systemState *system.SystemState) error {
galleryModels, err := AvailableGalleryModels(galleries, systemState)
if err != nil {
return err

View File

@@ -183,98 +183,5 @@ var _ = Describe("Model test", func() {
_, err = InstallModel(context.TODO(), systemState, "../../../foo", c, map[string]interface{}{}, func(string, string, string, float64) {}, true)
Expect(err).To(HaveOccurred())
})
It("does not delete shared model files when one config is deleted", func() {
tempdir, err := os.MkdirTemp("", "test")
Expect(err).ToNot(HaveOccurred())
defer os.RemoveAll(tempdir)
systemState, err := system.GetSystemState(
system.WithModelPath(tempdir),
)
Expect(err).ToNot(HaveOccurred())
// Create a shared model file
sharedModelFile := filepath.Join(tempdir, "shared_model.bin")
err = os.WriteFile(sharedModelFile, []byte("fake model content"), 0600)
Expect(err).ToNot(HaveOccurred())
// Create first model configuration
config1 := `name: model1
model: shared_model.bin`
err = os.WriteFile(filepath.Join(tempdir, "model1.yaml"), []byte(config1), 0600)
Expect(err).ToNot(HaveOccurred())
// Create first model's gallery file
galleryConfig1 := ModelConfig{
Name: "model1",
Files: []File{
{Filename: "shared_model.bin"},
},
}
galleryData1, err := yaml.Marshal(galleryConfig1)
Expect(err).ToNot(HaveOccurred())
err = os.WriteFile(filepath.Join(tempdir, "._gallery_model1.yaml"), galleryData1, 0600)
Expect(err).ToNot(HaveOccurred())
// Create second model configuration sharing the same model file
config2 := `name: model2
model: shared_model.bin`
err = os.WriteFile(filepath.Join(tempdir, "model2.yaml"), []byte(config2), 0600)
Expect(err).ToNot(HaveOccurred())
// Create second model's gallery file
galleryConfig2 := ModelConfig{
Name: "model2",
Files: []File{
{Filename: "shared_model.bin"},
},
}
galleryData2, err := yaml.Marshal(galleryConfig2)
Expect(err).ToNot(HaveOccurred())
err = os.WriteFile(filepath.Join(tempdir, "._gallery_model2.yaml"), galleryData2, 0600)
Expect(err).ToNot(HaveOccurred())
// Verify both configurations exist
_, err = os.Stat(filepath.Join(tempdir, "model1.yaml"))
Expect(err).ToNot(HaveOccurred())
_, err = os.Stat(filepath.Join(tempdir, "model2.yaml"))
Expect(err).ToNot(HaveOccurred())
// Verify the shared model file exists
_, err = os.Stat(sharedModelFile)
Expect(err).ToNot(HaveOccurred())
// Delete the first model
err = DeleteModelFromSystem(systemState, "model1")
Expect(err).ToNot(HaveOccurred())
// Verify the first configuration is deleted
_, err = os.Stat(filepath.Join(tempdir, "model1.yaml"))
Expect(err).To(HaveOccurred())
Expect(errors.Is(err, os.ErrNotExist)).To(BeTrue())
// Verify the shared model file still exists (not deleted because model2 still uses it)
_, err = os.Stat(sharedModelFile)
Expect(err).ToNot(HaveOccurred(), "shared model file should not be deleted when used by other configs")
// Verify the second configuration still exists
_, err = os.Stat(filepath.Join(tempdir, "model2.yaml"))
Expect(err).ToNot(HaveOccurred())
// Now delete the second model
err = DeleteModelFromSystem(systemState, "model2")
Expect(err).ToNot(HaveOccurred())
// Verify the second configuration is deleted
_, err = os.Stat(filepath.Join(tempdir, "model2.yaml"))
Expect(err).To(HaveOccurred())
Expect(errors.Is(err, os.ErrNotExist)).To(BeTrue())
// Verify the shared model file is now deleted (no more references)
_, err = os.Stat(sharedModelFile)
Expect(err).To(HaveOccurred(), "shared model file should be deleted when no configs reference it")
Expect(errors.Is(err, os.ErrNotExist)).To(BeTrue())
})
})
})

View File

@@ -208,7 +208,7 @@ func API(application *application.Application) (*echo.Echo, error) {
routes.RegisterLocalAIRoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application.TemplatesEvaluator())
routes.RegisterOpenAIRoutes(e, requestExtractor, application)
if !application.ApplicationConfig().DisableWebUI {
routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application)
routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
routes.RegisterUIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
}
routes.RegisterJINARoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())

View File

@@ -87,7 +87,7 @@ func getModels(url string) ([]gallery.GalleryModel, error) {
response := []gallery.GalleryModel{}
uri := downloader.URI(url)
// TODO: No tests currently seem to exercise file:// urls. Fix?
err := uri.ReadWithAuthorizationAndCallback(context.TODO(), "", bearerKey, func(url string, i []byte) error {
err := uri.DownloadWithAuthorizationAndCallback(context.TODO(), "", bearerKey, func(url string, i []byte) error {
// Unmarshal YAML data into a struct
return json.Unmarshal(i, &response)
})
@@ -513,124 +513,6 @@ var _ = Describe("API test", func() {
})
})
Context("Importing models from URI", func() {
var testYamlFile string
BeforeEach(func() {
// Create a test YAML config file
yamlContent := `name: test-import-model
backend: llama-cpp
description: Test model imported from file URI
parameters:
model: path/to/model.gguf
temperature: 0.7
`
testYamlFile = filepath.Join(tmpdir, "test-import.yaml")
err := os.WriteFile(testYamlFile, []byte(yamlContent), 0644)
Expect(err).ToNot(HaveOccurred())
})
AfterEach(func() {
err := os.Remove(testYamlFile)
Expect(err).ToNot(HaveOccurred())
})
It("should import model from file:// URI pointing to local YAML config", func() {
importReq := schema.ImportModelRequest{
URI: "file://" + testYamlFile,
Preferences: json.RawMessage(`{}`),
}
var response schema.GalleryResponse
err := postRequestResponseJSON("http://127.0.0.1:9090/models/import-uri", &importReq, &response)
Expect(err).ToNot(HaveOccurred())
Expect(response.ID).ToNot(BeEmpty())
uuid := response.ID
resp := map[string]interface{}{}
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
resp = response
return response["processed"].(bool)
}, "360s", "10s").Should(Equal(true))
// Check that the model was imported successfully
Expect(resp["message"]).ToNot(ContainSubstring("error"))
Expect(resp["error"]).To(BeNil())
// Verify the model config file was created
dat, err := os.ReadFile(filepath.Join(modelDir, "test-import-model.yaml"))
Expect(err).ToNot(HaveOccurred())
content := map[string]interface{}{}
err = yaml.Unmarshal(dat, &content)
Expect(err).ToNot(HaveOccurred())
Expect(content["name"]).To(Equal("test-import-model"))
Expect(content["backend"]).To(Equal("llama-cpp"))
})
It("should return error when file:// URI points to non-existent file", func() {
nonExistentFile := filepath.Join(tmpdir, "nonexistent.yaml")
importReq := schema.ImportModelRequest{
URI: "file://" + nonExistentFile,
Preferences: json.RawMessage(`{}`),
}
var response schema.GalleryResponse
err := postRequestResponseJSON("http://127.0.0.1:9090/models/import-uri", &importReq, &response)
// The endpoint should return an error immediately
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("failed to discover model config"))
})
})
Context("Importing models from URI can't point to absolute paths", func() {
var testYamlFile string
BeforeEach(func() {
// Create a test YAML config file
yamlContent := `name: test-import-model
backend: llama-cpp
description: Test model imported from file URI
parameters:
model: /path/to/model.gguf
temperature: 0.7
`
testYamlFile = filepath.Join(tmpdir, "test-import.yaml")
err := os.WriteFile(testYamlFile, []byte(yamlContent), 0644)
Expect(err).ToNot(HaveOccurred())
})
AfterEach(func() {
err := os.Remove(testYamlFile)
Expect(err).ToNot(HaveOccurred())
})
It("should fail to import model from file:// URI pointing to local YAML config", func() {
importReq := schema.ImportModelRequest{
URI: "file://" + testYamlFile,
Preferences: json.RawMessage(`{}`),
}
var response schema.GalleryResponse
err := postRequestResponseJSON("http://127.0.0.1:9090/models/import-uri", &importReq, &response)
Expect(err).ToNot(HaveOccurred())
Expect(response.ID).ToNot(BeEmpty())
uuid := response.ID
resp := map[string]interface{}{}
Eventually(func() bool {
response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
resp = response
return response["processed"].(bool)
}, "360s", "10s").Should(Equal(true))
// Check that the model was imported successfully
Expect(resp["message"]).To(ContainSubstring("error"))
Expect(resp["error"]).ToNot(BeNil())
})
})
})
Context("Model gallery", func() {
@@ -1199,9 +1081,6 @@ parameters:
Context("Config file", func() {
BeforeEach(func() {
if runtime.GOOS != "linux" {
Skip("run this test only on linux")
}
modelPath := os.Getenv("MODELS_PATH")
backendPath := os.Getenv("BACKENDS_PATH")
c, cancel = context.WithCancel(context.Background())

View File

@@ -135,7 +135,7 @@ func EditModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applicati
}
// Validate the configuration
if valid, _ := req.Validate(); !valid {
if !req.Validate() {
response := ModelResponse{
Success: false,
Error: "Validation failed",
@@ -196,7 +196,7 @@ func EditModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applicati
func ReloadModelsEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
// Reload configurations
if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath); err != nil {
response := ModelResponse{
Success: false,
Error: "Failed to reload configurations: " + err.Error(),

View File

@@ -145,10 +145,10 @@ func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applica
}
// Set defaults
modelConfig.SetDefaults(appConfig.ToConfigLoaderOptions()...)
modelConfig.SetDefaults()
// Validate the configuration
if valid, _ := modelConfig.Validate(); !valid {
if !modelConfig.Validate() {
response := ModelResponse{
Success: false,
Error: "Invalid configuration",
@@ -185,7 +185,7 @@ func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applica
return c.JSON(http.StatusInternalServerError, response)
}
// Reload configurations
if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath, appConfig.ToConfigLoaderOptions()...); err != nil {
if err := cl.LoadModelConfigsFromPath(appConfig.SystemState.Model.ModelsPath); err != nil {
response := ModelResponse{
Success: false,
Error: "Failed to reload configurations: " + err.Error(),

View File

@@ -5,7 +5,7 @@ import (
"encoding/json"
"errors"
"fmt"
"net"
"strings"
"time"
"github.com/labstack/echo/v4"
@@ -105,10 +105,7 @@ func MCPStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eval
fragment = fragment.AddMessage(message.Role, message.StringContent)
}
_, port, err := net.SplitHostPort(appConfig.APIAddress)
if err != nil {
return err
}
port := appConfig.APIAddress[strings.LastIndex(appConfig.APIAddress, ":")+1:]
apiKey := ""
if len(appConfig.ApiKeys) > 0 {
apiKey = appConfig.ApiKeys[0]

View File

@@ -1,340 +0,0 @@
package localai
import (
"encoding/json"
"io"
"net/http"
"os"
"path/filepath"
"time"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/p2p"
"github.com/rs/zerolog/log"
)
type SettingsResponse struct {
Success bool `json:"success"`
Error string `json:"error,omitempty"`
Message string `json:"message,omitempty"`
}
type RuntimeSettings struct {
WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"`
WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"`
WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"`
WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"`
WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"`
SingleBackend *bool `json:"single_backend,omitempty"`
ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"`
Threads *int `json:"threads,omitempty"`
ContextSize *int `json:"context_size,omitempty"`
F16 *bool `json:"f16,omitempty"`
Debug *bool `json:"debug,omitempty"`
CORS *bool `json:"cors,omitempty"`
CSRF *bool `json:"csrf,omitempty"`
CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"`
P2PToken *string `json:"p2p_token,omitempty"`
P2PNetworkID *string `json:"p2p_network_id,omitempty"`
Federated *bool `json:"federated,omitempty"`
Galleries *[]config.Gallery `json:"galleries,omitempty"`
BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"`
AutoloadGalleries *bool `json:"autoload_galleries,omitempty"`
AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"`
ApiKeys *[]string `json:"api_keys"` // No omitempty - we need to save empty arrays to clear keys
}
// GetSettingsEndpoint returns current settings with precedence (env > file > defaults)
func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
appConfig := app.ApplicationConfig()
startupConfig := app.StartupConfig()
if startupConfig == nil {
// Fallback if startup config not available
startupConfig = appConfig
}
settings := RuntimeSettings{}
// Set all current values (using pointers for RuntimeSettings)
watchdogIdle := appConfig.WatchDogIdle
watchdogBusy := appConfig.WatchDogBusy
watchdogEnabled := appConfig.WatchDog
singleBackend := appConfig.SingleBackend
parallelBackendRequests := appConfig.ParallelBackendRequests
threads := appConfig.Threads
contextSize := appConfig.ContextSize
f16 := appConfig.F16
debug := appConfig.Debug
cors := appConfig.CORS
csrf := appConfig.CSRF
corsAllowOrigins := appConfig.CORSAllowOrigins
p2pToken := appConfig.P2PToken
p2pNetworkID := appConfig.P2PNetworkID
federated := appConfig.Federated
galleries := appConfig.Galleries
backendGalleries := appConfig.BackendGalleries
autoloadGalleries := appConfig.AutoloadGalleries
autoloadBackendGalleries := appConfig.AutoloadBackendGalleries
apiKeys := appConfig.ApiKeys
settings.WatchdogIdleEnabled = &watchdogIdle
settings.WatchdogBusyEnabled = &watchdogBusy
settings.WatchdogEnabled = &watchdogEnabled
settings.SingleBackend = &singleBackend
settings.ParallelBackendRequests = &parallelBackendRequests
settings.Threads = &threads
settings.ContextSize = &contextSize
settings.F16 = &f16
settings.Debug = &debug
settings.CORS = &cors
settings.CSRF = &csrf
settings.CORSAllowOrigins = &corsAllowOrigins
settings.P2PToken = &p2pToken
settings.P2PNetworkID = &p2pNetworkID
settings.Federated = &federated
settings.Galleries = &galleries
settings.BackendGalleries = &backendGalleries
settings.AutoloadGalleries = &autoloadGalleries
settings.AutoloadBackendGalleries = &autoloadBackendGalleries
settings.ApiKeys = &apiKeys
var idleTimeout, busyTimeout string
if appConfig.WatchDogIdleTimeout > 0 {
idleTimeout = appConfig.WatchDogIdleTimeout.String()
} else {
idleTimeout = "15m" // default
}
if appConfig.WatchDogBusyTimeout > 0 {
busyTimeout = appConfig.WatchDogBusyTimeout.String()
} else {
busyTimeout = "5m" // default
}
settings.WatchdogIdleTimeout = &idleTimeout
settings.WatchdogBusyTimeout = &busyTimeout
return c.JSON(http.StatusOK, settings)
}
}
// UpdateSettingsEndpoint updates settings, saves to file, and applies immediately
func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc {
return func(c echo.Context) error {
appConfig := app.ApplicationConfig()
startupConfig := app.StartupConfig()
if startupConfig == nil {
// Fallback if startup config not available
startupConfig = appConfig
}
body, err := io.ReadAll(c.Request().Body)
if err != nil {
return c.JSON(http.StatusBadRequest, SettingsResponse{
Success: false,
Error: "Failed to read request body: " + err.Error(),
})
}
var settings RuntimeSettings
if err := json.Unmarshal(body, &settings); err != nil {
return c.JSON(http.StatusBadRequest, SettingsResponse{
Success: false,
Error: "Failed to parse JSON: " + err.Error(),
})
}
// Validate timeouts if provided
if settings.WatchdogIdleTimeout != nil {
_, err := time.ParseDuration(*settings.WatchdogIdleTimeout)
if err != nil {
return c.JSON(http.StatusBadRequest, SettingsResponse{
Success: false,
Error: "Invalid watchdog_idle_timeout format: " + err.Error(),
})
}
}
if settings.WatchdogBusyTimeout != nil {
_, err := time.ParseDuration(*settings.WatchdogBusyTimeout)
if err != nil {
return c.JSON(http.StatusBadRequest, SettingsResponse{
Success: false,
Error: "Invalid watchdog_busy_timeout format: " + err.Error(),
})
}
}
// Save to file
if appConfig.DynamicConfigsDir == "" {
return c.JSON(http.StatusBadRequest, SettingsResponse{
Success: false,
Error: "DynamicConfigsDir is not set",
})
}
settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json")
settingsJSON, err := json.MarshalIndent(settings, "", " ")
if err != nil {
return c.JSON(http.StatusInternalServerError, SettingsResponse{
Success: false,
Error: "Failed to marshal settings: " + err.Error(),
})
}
if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil {
return c.JSON(http.StatusInternalServerError, SettingsResponse{
Success: false,
Error: "Failed to write settings file: " + err.Error(),
})
}
// Apply settings immediately, checking env var overrides per field
watchdogChanged := false
if settings.WatchdogEnabled != nil {
appConfig.WatchDog = *settings.WatchdogEnabled
watchdogChanged = true
}
if settings.WatchdogIdleEnabled != nil {
appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled
if appConfig.WatchDogIdle {
appConfig.WatchDog = true
}
watchdogChanged = true
}
if settings.WatchdogBusyEnabled != nil {
appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled
if appConfig.WatchDogBusy {
appConfig.WatchDog = true
}
watchdogChanged = true
}
if settings.WatchdogIdleTimeout != nil {
dur, _ := time.ParseDuration(*settings.WatchdogIdleTimeout)
appConfig.WatchDogIdleTimeout = dur
watchdogChanged = true
}
if settings.WatchdogBusyTimeout != nil {
dur, _ := time.ParseDuration(*settings.WatchdogBusyTimeout)
appConfig.WatchDogBusyTimeout = dur
watchdogChanged = true
}
if settings.SingleBackend != nil {
appConfig.SingleBackend = *settings.SingleBackend
}
if settings.ParallelBackendRequests != nil {
appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests
}
if settings.Threads != nil {
appConfig.Threads = *settings.Threads
}
if settings.ContextSize != nil {
appConfig.ContextSize = *settings.ContextSize
}
if settings.F16 != nil {
appConfig.F16 = *settings.F16
}
if settings.Debug != nil {
appConfig.Debug = *settings.Debug
}
if settings.CORS != nil {
appConfig.CORS = *settings.CORS
}
if settings.CSRF != nil {
appConfig.CSRF = *settings.CSRF
}
if settings.CORSAllowOrigins != nil {
appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins
}
if settings.P2PToken != nil {
appConfig.P2PToken = *settings.P2PToken
}
if settings.P2PNetworkID != nil {
appConfig.P2PNetworkID = *settings.P2PNetworkID
}
if settings.Federated != nil {
appConfig.Federated = *settings.Federated
}
if settings.Galleries != nil {
appConfig.Galleries = *settings.Galleries
}
if settings.BackendGalleries != nil {
appConfig.BackendGalleries = *settings.BackendGalleries
}
if settings.AutoloadGalleries != nil {
appConfig.AutoloadGalleries = *settings.AutoloadGalleries
}
if settings.AutoloadBackendGalleries != nil {
appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries
}
if settings.ApiKeys != nil {
// API keys from env vars (startup) should be kept, runtime settings keys are added
// Combine startup keys (env vars) with runtime settings keys
envKeys := startupConfig.ApiKeys
runtimeKeys := *settings.ApiKeys
// Merge: env keys first (they take precedence), then runtime keys
appConfig.ApiKeys = append(envKeys, runtimeKeys...)
// Note: We only save to runtime_settings.json (not api_keys.json) to avoid duplication
// The runtime_settings.json is the unified config file. If api_keys.json exists,
// it will be loaded first, but runtime_settings.json takes precedence and deduplicates.
}
// Restart watchdog if settings changed
if watchdogChanged {
if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled || settings.WatchdogEnabled == nil {
if err := app.StopWatchdog(); err != nil {
log.Error().Err(err).Msg("Failed to stop watchdog")
return c.JSON(http.StatusInternalServerError, SettingsResponse{
Success: false,
Error: "Settings saved but failed to stop watchdog: " + err.Error(),
})
}
} else {
if err := app.RestartWatchdog(); err != nil {
log.Error().Err(err).Msg("Failed to restart watchdog")
return c.JSON(http.StatusInternalServerError, SettingsResponse{
Success: false,
Error: "Settings saved but failed to restart watchdog: " + err.Error(),
})
}
}
}
// Restart P2P if P2P settings changed
p2pChanged := settings.P2PToken != nil || settings.P2PNetworkID != nil || settings.Federated != nil
if p2pChanged {
if settings.P2PToken != nil && *settings.P2PToken == "" {
// stop P2P
if err := app.StopP2P(); err != nil {
log.Error().Err(err).Msg("Failed to stop P2P")
return c.JSON(http.StatusInternalServerError, SettingsResponse{
Success: false,
Error: "Settings saved but failed to stop P2P: " + err.Error(),
})
}
} else {
if settings.P2PToken != nil && *settings.P2PToken == "0" {
// generate a token if users sets 0 (disabled)
token := p2p.GenerateToken(60, 60)
settings.P2PToken = &token
appConfig.P2PToken = token
}
// Stop existing P2P
if err := app.RestartP2P(); err != nil {
log.Error().Err(err).Msg("Failed to stop P2P")
return c.JSON(http.StatusInternalServerError, SettingsResponse{
Success: false,
Error: "Settings saved but failed to stop P2P: " + err.Error(),
})
}
}
}
return c.JSON(http.StatusOK, SettingsResponse{
Success: true,
Message: "Settings updated successfully",
})
}
}

View File

@@ -43,18 +43,17 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
processingModels, taskTypes := opcache.GetStatus()
summary := map[string]interface{}{
"Title": "LocalAI API - " + internal.PrintableVersion(),
"Version": internal.PrintableVersion(),
"BaseURL": middleware.BaseURL(c),
"Models": modelsWithoutConfig,
"ModelsConfig": modelConfigs,
"GalleryConfig": galleryConfigs,
"ApplicationConfig": appConfig,
"ProcessingModels": processingModels,
"TaskTypes": taskTypes,
"LoadedModels": loadedModelsMap,
"InstalledBackends": installedBackends,
"DisableRuntimeSettings": appConfig.DisableRuntimeSettings,
"Title": "LocalAI API - " + internal.PrintableVersion(),
"Version": internal.PrintableVersion(),
"BaseURL": middleware.BaseURL(c),
"Models": modelsWithoutConfig,
"ModelsConfig": modelConfigs,
"GalleryConfig": galleryConfigs,
"ApplicationConfig": appConfig,
"ProcessingModels": processingModels,
"TaskTypes": taskTypes,
"LoadedModels": loadedModelsMap,
"InstalledBackends": installedBackends,
}
contentType := c.Request().Header.Get("Content-Type")

View File

@@ -1,268 +0,0 @@
package openai
import (
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strconv"
"time"
"github.com/google/uuid"
"github.com/labstack/echo/v4"
"github.com/rs/zerolog/log"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
model "github.com/mudler/LocalAI/pkg/model"
)
// InpaintingEndpoint handles POST /v1/images/inpainting
//
// Swagger / OpenAPI docstring (swaggo):
// @Summary Image inpainting
// @Description Perform image inpainting. Accepts multipart/form-data with `image` and `mask` files.
// @Tags images
// @Accept multipart/form-data
// @Produce application/json
// @Param model formData string true "Model identifier"
// @Param prompt formData string true "Text prompt guiding the generation"
// @Param steps formData int false "Number of inference steps (default 25)"
// @Param image formData file true "Original image file"
// @Param mask formData file true "Mask image file (white = area to inpaint)"
// @Success 200 {object} schema.OpenAIResponse
// @Failure 400 {object} map[string]string
// @Failure 500 {object} map[string]string
// @Router /v1/images/inpainting [post]
func InpaintingEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
// Parse basic form values
modelName := c.FormValue("model")
prompt := c.FormValue("prompt")
stepsStr := c.FormValue("steps")
if modelName == "" || prompt == "" {
log.Error().Msg("Inpainting Endpoint - missing model or prompt")
return echo.ErrBadRequest
}
// steps default
steps := 25
if stepsStr != "" {
if v, err := strconv.Atoi(stepsStr); err == nil {
steps = v
}
}
// Get uploaded files
imageFile, err := c.FormFile("image")
if err != nil {
log.Error().Err(err).Msg("Inpainting Endpoint - missing image file")
return echo.NewHTTPError(http.StatusBadRequest, "missing image file")
}
maskFile, err := c.FormFile("mask")
if err != nil {
log.Error().Err(err).Msg("Inpainting Endpoint - missing mask file")
return echo.NewHTTPError(http.StatusBadRequest, "missing mask file")
}
// Read files into memory (small files expected)
imgSrc, err := imageFile.Open()
if err != nil {
return err
}
defer imgSrc.Close()
imgBytes, err := io.ReadAll(imgSrc)
if err != nil {
return err
}
maskSrc, err := maskFile.Open()
if err != nil {
return err
}
defer maskSrc.Close()
maskBytes, err := io.ReadAll(maskSrc)
if err != nil {
return err
}
// Create JSON with base64 fields expected by backend
b64Image := base64.StdEncoding.EncodeToString(imgBytes)
b64Mask := base64.StdEncoding.EncodeToString(maskBytes)
// get model config from context (middleware set it)
cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || cfg == nil {
log.Error().Msg("Inpainting Endpoint - model config not found in context")
return echo.ErrBadRequest
}
// Use the GeneratedContentDir so the generated PNG is placed where the
// HTTP static handler serves `/generated-images`.
tmpDir := appConfig.GeneratedContentDir
// Ensure the directory exists
if err := os.MkdirAll(tmpDir, 0750); err != nil {
log.Error().Err(err).Msgf("Inpainting Endpoint - failed to create generated content dir: %s", tmpDir)
return echo.NewHTTPError(http.StatusInternalServerError, "failed to prepare storage")
}
id := uuid.New().String()
jsonPath := filepath.Join(tmpDir, fmt.Sprintf("inpaint_%s.json", id))
jsonFile := map[string]string{
"image": b64Image,
"mask_image": b64Mask,
}
jf, err := os.CreateTemp(tmpDir, "inpaint_")
if err != nil {
return err
}
// setup cleanup on error; if everything succeeds we set success = true
success := false
var dst string
var origRef string
var maskRef string
defer func() {
if !success {
// Best-effort cleanup; log any failures
if jf != nil {
if cerr := jf.Close(); cerr != nil {
log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close temp json file in cleanup")
}
if name := jf.Name(); name != "" {
if rerr := os.Remove(name); rerr != nil && !os.IsNotExist(rerr) {
log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove temp json file %s in cleanup", name)
}
}
}
if jsonPath != "" {
if rerr := os.Remove(jsonPath); rerr != nil && !os.IsNotExist(rerr) {
log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove json file %s in cleanup", jsonPath)
}
}
if dst != "" {
if rerr := os.Remove(dst); rerr != nil && !os.IsNotExist(rerr) {
log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove dst file %s in cleanup", dst)
}
}
if origRef != "" {
if rerr := os.Remove(origRef); rerr != nil && !os.IsNotExist(rerr) {
log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove orig ref file %s in cleanup", origRef)
}
}
if maskRef != "" {
if rerr := os.Remove(maskRef); rerr != nil && !os.IsNotExist(rerr) {
log.Warn().Err(rerr).Msgf("Inpainting Endpoint - failed to remove mask ref file %s in cleanup", maskRef)
}
}
}
}()
// write original image and mask to disk as ref images so backends that
// accept reference image files can use them (maintainer request).
origTmp, err := os.CreateTemp(tmpDir, "refimg_")
if err != nil {
return err
}
if _, err := origTmp.Write(imgBytes); err != nil {
_ = origTmp.Close()
_ = os.Remove(origTmp.Name())
return err
}
if cerr := origTmp.Close(); cerr != nil {
log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close orig temp file")
}
origRef = origTmp.Name()
maskTmp, err := os.CreateTemp(tmpDir, "refmask_")
if err != nil {
// cleanup origTmp on error
_ = os.Remove(origRef)
return err
}
if _, err := maskTmp.Write(maskBytes); err != nil {
_ = maskTmp.Close()
_ = os.Remove(maskTmp.Name())
_ = os.Remove(origRef)
return err
}
if cerr := maskTmp.Close(); cerr != nil {
log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close mask temp file")
}
maskRef = maskTmp.Name()
// write JSON
enc := json.NewEncoder(jf)
if err := enc.Encode(jsonFile); err != nil {
if cerr := jf.Close(); cerr != nil {
log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close temp json file after encode error")
}
return err
}
if cerr := jf.Close(); cerr != nil {
log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close temp json file")
}
// rename to desired name
if err := os.Rename(jf.Name(), jsonPath); err != nil {
return err
}
// prepare dst
outTmp, err := os.CreateTemp(tmpDir, "out_")
if err != nil {
return err
}
if cerr := outTmp.Close(); cerr != nil {
log.Warn().Err(cerr).Msg("Inpainting Endpoint - failed to close out temp file")
}
dst = outTmp.Name() + ".png"
if err := os.Rename(outTmp.Name(), dst); err != nil {
return err
}
// Determine width/height default
width := 512
height := 512
// Call backend image generation via indirection so tests can stub it
// Note: ImageGenerationFunc will call into the loaded model's GenerateImage which expects src JSON
// Also pass ref images (orig + mask) so backends that support ref images can use them.
refImages := []string{origRef, maskRef}
fn, err := backend.ImageGenerationFunc(height, width, 0, steps, 0, prompt, "", jsonPath, dst, ml, *cfg, appConfig, refImages)
if err != nil {
return err
}
// Execute generation function (blocking)
if err := fn(); err != nil {
return err
}
// On success, build response URL using BaseURL middleware helper and
// the same `generated-images` prefix used by the server static mount.
baseURL := middleware.BaseURL(c)
// Build response using url.JoinPath for correct URL escaping
imgPath, err := url.JoinPath(baseURL, "generated-images", filepath.Base(dst))
if err != nil {
return err
}
created := int(time.Now().Unix())
resp := &schema.OpenAIResponse{
ID: id,
Created: created,
Data: []schema.Item{{
URL: imgPath,
}},
}
// mark success so defer cleanup will not remove output files
success = true
return c.JSON(http.StatusOK, resp)
}
}

View File

@@ -1,107 +0,0 @@
package openai
import (
"bytes"
"mime/multipart"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
model "github.com/mudler/LocalAI/pkg/model"
"github.com/stretchr/testify/require"
)
func makeMultipartRequest(t *testing.T, fields map[string]string, files map[string][]byte) (*http.Request, string) {
b := &bytes.Buffer{}
w := multipart.NewWriter(b)
for k, v := range fields {
_ = w.WriteField(k, v)
}
for fname, content := range files {
fw, err := w.CreateFormFile(fname, fname+".png")
require.NoError(t, err)
_, err = fw.Write(content)
require.NoError(t, err)
}
require.NoError(t, w.Close())
req := httptest.NewRequest(http.MethodPost, "/v1/images/inpainting", b)
req.Header.Set("Content-Type", w.FormDataContentType())
return req, w.FormDataContentType()
}
func TestInpainting_MissingFiles(t *testing.T) {
e := echo.New()
// handler requires cl, ml, appConfig but this test verifies missing files early
h := InpaintingEndpoint(nil, nil, config.NewApplicationConfig())
req := httptest.NewRequest(http.MethodPost, "/v1/images/inpainting", nil)
rec := httptest.NewRecorder()
c := e.NewContext(req, rec)
err := h(c)
require.Error(t, err)
}
func TestInpainting_HappyPath(t *testing.T) {
// Setup temp generated content dir
tmpDir, err := os.MkdirTemp("", "gencontent")
require.NoError(t, err)
defer os.RemoveAll(tmpDir)
appConf := config.NewApplicationConfig(config.WithGeneratedContentDir(tmpDir))
// stub the backend.ImageGenerationFunc
orig := backend.ImageGenerationFunc
backend.ImageGenerationFunc = func(height, width, mode, step, seed int, positive_prompt, negative_prompt, src, dst string, loader *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig, refImages []string) (func() error, error) {
fn := func() error {
// write a fake png file to dst
return os.WriteFile(dst, []byte("PNGDATA"), 0644)
}
return fn, nil
}
defer func() { backend.ImageGenerationFunc = orig }()
// prepare multipart request with image and mask
fields := map[string]string{"model": "dreamshaper-8-inpainting", "prompt": "A test"}
files := map[string][]byte{"image": []byte("IMAGEDATA"), "mask": []byte("MASKDATA")}
reqBuf, _ := makeMultipartRequest(t, fields, files)
rec := httptest.NewRecorder()
e := echo.New()
c := e.NewContext(reqBuf, rec)
// set a minimal model config in context as handler expects
c.Set(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG, &config.ModelConfig{Backend: "diffusers"})
h := InpaintingEndpoint(nil, nil, appConf)
// call handler
err = h(c)
require.NoError(t, err)
require.Equal(t, http.StatusOK, rec.Code)
// verify response body contains generated-images path
body := rec.Body.String()
require.Contains(t, body, "generated-images")
// confirm the file was created in tmpDir
// parse out filename from response (naive search)
// find "generated-images/" and extract until closing quote or brace
idx := bytes.Index(rec.Body.Bytes(), []byte("generated-images/"))
require.True(t, idx >= 0)
rest := rec.Body.Bytes()[idx:]
end := bytes.IndexAny(rest, "\",}\n")
if end == -1 {
end = len(rest)
}
fname := string(rest[len("generated-images/"):end])
// ensure file exists
_, err = os.Stat(filepath.Join(tmpDir, fname))
require.NoError(t, err)
}

View File

@@ -5,7 +5,7 @@ import (
"encoding/json"
"errors"
"fmt"
"net"
"strings"
"time"
"github.com/labstack/echo/v4"
@@ -75,11 +75,7 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
fragment = fragment.AddMessage(message.Role, message.StringContent)
}
_, port, err := net.SplitHostPort(appConfig.APIAddress)
if err != nil {
return err
}
port := appConfig.APIAddress[strings.LastIndex(appConfig.APIAddress, ":")+1:]
apiKey := ""
if appConfig.ApiKeys != nil {
apiKey = appConfig.ApiKeys[0]
@@ -108,11 +104,11 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
log.Debug().Msgf("[model agent] [model: %s] Reasoning: %s", config.Name, s)
}),
cogito.WithToolCallBack(func(t *cogito.ToolChoice) bool {
log.Debug().Msgf("[model agent] [model: %s] Tool call: %s, reasoning: %s, arguments: %+v", config.Name, t.Name, t.Reasoning, t.Arguments)
log.Debug().Msgf("[model agent] [model: %s] Tool call: %s, reasoning: %s, arguments: %+v", t.Name, t.Reasoning, t.Arguments)
return true
}),
cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) {
log.Debug().Msgf("[model agent] [model: %s] Tool call result: %s, result: %s, tool arguments: %+v", config.Name, t.Name, t.Result, t.ToolArguments)
log.Debug().Msgf("[model agent] [model: %s] Tool call result: %s, tool arguments: %+v", t.Name, t.Result, t.ToolArguments)
}),
)

View File

@@ -112,7 +112,7 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig
return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
}
if valid, _ := cfgVAD.Validate(); !valid {
if !cfgVAD.Validate() {
return nil, nil, fmt.Errorf("failed to validate config: %w", err)
}
@@ -128,7 +128,7 @@ func newTranscriptionOnlyModel(pipeline *config.Pipeline, cl *config.ModelConfig
return nil, nil, fmt.Errorf("failed to load backend config: %w", err)
}
if valid, _ := cfgSST.Validate(); !valid {
if !cfgSST.Validate() {
return nil, nil, fmt.Errorf("failed to validate config: %w", err)
}
@@ -155,7 +155,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
return nil, fmt.Errorf("failed to load backend config: %w", err)
}
if valid, _ := cfgVAD.Validate(); !valid {
if !cfgVAD.Validate() {
return nil, fmt.Errorf("failed to validate config: %w", err)
}
@@ -172,7 +172,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
return nil, fmt.Errorf("failed to load backend config: %w", err)
}
if valid, _ := cfgSST.Validate(); !valid {
if !cfgSST.Validate() {
return nil, fmt.Errorf("failed to validate config: %w", err)
}
@@ -191,7 +191,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
return nil, fmt.Errorf("failed to load backend config: %w", err)
}
if valid, _ := cfgAnyToAny.Validate(); !valid {
if !cfgAnyToAny.Validate() {
return nil, fmt.Errorf("failed to validate config: %w", err)
}
@@ -218,7 +218,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
return nil, fmt.Errorf("failed to load backend config: %w", err)
}
if valid, _ := cfgLLM.Validate(); !valid {
if !cfgLLM.Validate() {
return nil, fmt.Errorf("failed to validate config: %w", err)
}
@@ -228,7 +228,7 @@ func newModel(pipeline *config.Pipeline, cl *config.ModelConfigLoader, ml *model
return nil, fmt.Errorf("failed to load backend config: %w", err)
}
if valid, _ := cfgTTS.Validate(); !valid {
if !cfgTTS.Validate() {
return nil, fmt.Errorf("failed to validate config: %w", err)
}

View File

@@ -55,11 +55,6 @@ func (re *RequestExtractor) setModelNameFromRequest(c echo.Context) {
model = c.QueryParam("model")
}
// Check FormValue for multipart/form-data requests (e.g., /v1/images/inpainting)
if model == "" {
model = c.FormValue("model")
}
if model == "" {
// Set model from bearer token, if available
auth := c.Request().Header.Get("Authorization")
@@ -480,7 +475,7 @@ func mergeOpenAIRequestAndModelConfig(config *config.ModelConfig, input *schema.
}
}
if valid, _ := config.Validate(); valid {
if config.Validate() {
return nil
}
return fmt.Errorf("unable to validate configuration after merging")

View File

@@ -140,8 +140,7 @@ func RegisterOpenAIRoutes(app *echo.Echo,
// images
imageHandler := openai.ImageEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
imageMiddleware := []echo.MiddlewareFunc{
// Default: use the first available image generation model
re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_IMAGE)),
re.BuildConstantDefaultModelNameMiddleware("stablediffusion"),
re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
@@ -156,11 +155,6 @@ func RegisterOpenAIRoutes(app *echo.Echo,
app.POST("/v1/images/generations", imageHandler, imageMiddleware...)
app.POST("/images/generations", imageHandler, imageMiddleware...)
// inpainting endpoint (image + mask) - reuse same middleware config as images
inpaintingHandler := openai.InpaintingEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
app.POST("/v1/images/inpainting", inpaintingHandler, imageMiddleware...)
app.POST("/images/inpainting", inpaintingHandler, imageMiddleware...)
// videos (OpenAI-compatible endpoints mapped to LocalAI video handler)
videoHandler := openai.VideoEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
videoMiddleware := []echo.MiddlewareFunc{

View File

@@ -23,17 +23,6 @@ func RegisterUIRoutes(app *echo.Echo,
app.GET("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps))
app.GET("/manage", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps))
if !appConfig.DisableRuntimeSettings {
// Settings page
app.GET("/settings", func(c echo.Context) error {
summary := map[string]interface{}{
"Title": "LocalAI - Settings",
"BaseURL": middleware.BaseURL(c),
}
return c.Render(200, "views/settings", summary)
})
}
// P2P
app.GET("/p2p/", func(c echo.Context) error {
summary := map[string]interface{}{

View File

@@ -12,10 +12,8 @@ import (
"github.com/google/uuid"
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/http/endpoints/localai"
"github.com/mudler/LocalAI/core/p2p"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/model"
@@ -23,7 +21,7 @@ import (
)
// RegisterUIAPIRoutes registers JSON API routes for the web UI
func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache, applicationInstance *application.Application) {
func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) {
// Operations API - Get all current operations (models + backends)
app.GET("/api/operations", func(c echo.Context) error {
@@ -266,17 +264,17 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
installedModelsCount := len(modelConfigs) + len(modelsWithoutConfig)
return c.JSON(200, map[string]interface{}{
"models": modelsJSON,
"repositories": appConfig.Galleries,
"allTags": tags,
"processingModels": processingModelsData,
"taskTypes": taskTypes,
"availableModels": totalModels,
"installedModels": installedModelsCount,
"currentPage": pageNum,
"totalPages": totalPages,
"prevPage": prevPage,
"nextPage": nextPage,
"models": modelsJSON,
"repositories": appConfig.Galleries,
"allTags": tags,
"processingModels": processingModelsData,
"taskTypes": taskTypes,
"availableModels": totalModels,
"installedModels": installedModelsCount,
"currentPage": pageNum,
"totalPages": totalPages,
"prevPage": prevPage,
"nextPage": nextPage,
})
})
@@ -804,10 +802,4 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
},
})
})
if !appConfig.DisableRuntimeSettings {
// Settings API
app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance))
app.POST("/api/settings", localai.UpdateSettingsEndpoint(applicationInstance))
}
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -629,33 +629,11 @@ function backendsGallery() {
this.fetchBackends();
}
if (jobData.error || (jobData.message && jobData.message.startsWith('error:'))) {
if (jobData.error) {
backend.processing = false;
delete this.jobProgress[backend.jobID];
const action = backend.isDeletion ? 'deleting' : 'installing';
// Extract error message - handle both string and object errors
let errorMessage = 'Unknown error';
if (typeof jobData.error === 'string') {
errorMessage = jobData.error;
} else if (jobData.error && typeof jobData.error === 'object') {
// Check if error object has any properties
const errorKeys = Object.keys(jobData.error);
if (errorKeys.length > 0) {
// Try common error object properties
errorMessage = jobData.error.message || jobData.error.error || jobData.error.Error || JSON.stringify(jobData.error);
} else {
// Empty object {}, fall back to message field
errorMessage = jobData.message || 'Unknown error';
}
} else if (jobData.message) {
// Use message field if error is not present or is empty
errorMessage = jobData.message;
}
// Remove "error: " prefix if present
if (errorMessage.startsWith('error: ')) {
errorMessage = errorMessage.substring(7);
}
this.addNotification(`Error ${action} backend "${backend.name}": ${errorMessage}`, 'error');
this.addNotification(`Error ${action} backend "${backend.name}": ${jobData.error}`, 'error');
}
} catch (error) {
console.error('Error polling job:', error);

View File

File diff suppressed because it is too large Load Diff

View File

@@ -127,10 +127,6 @@
imageFiles: [],
audioFiles: [],
textFiles: [],
attachedFiles: [],
mcpMode: false,
mcpAvailable: false,
mcpModels: {},
currentPlaceholder: 'Send a message...',
placeholderIndex: 0,
charIndex: 0,
@@ -166,8 +162,6 @@
init() {
window.currentPlaceholderText = this.currentPlaceholder;
this.startTypingAnimation();
// Build MCP models map from data attributes
this.buildMCPModelsMap();
// Select first model by default
this.$nextTick(() => {
const select = this.$el.querySelector('select');
@@ -176,43 +170,9 @@
const firstModelOption = select.options[1];
if (firstModelOption && firstModelOption.value) {
this.selectedModel = firstModelOption.value;
this.checkMCPAvailability();
}
}
});
// Watch for changes to selectedModel to update MCP availability
this.$watch('selectedModel', () => {
this.checkMCPAvailability();
});
},
buildMCPModelsMap() {
const select = this.$el.querySelector('select');
if (!select) return;
this.mcpModels = {};
for (let i = 0; i < select.options.length; i++) {
const option = select.options[i];
if (option.value) {
const hasMcpAttr = option.getAttribute('data-has-mcp');
this.mcpModels[option.value] = hasMcpAttr === 'true';
}
}
// Debug: uncomment to see the MCP models map
// console.log('MCP Models Map:', this.mcpModels);
},
checkMCPAvailability() {
if (!this.selectedModel) {
this.mcpAvailable = false;
this.mcpMode = false;
return;
}
// Check MCP availability from the map
const hasMCP = this.mcpModels[this.selectedModel] === true;
this.mcpAvailable = hasMCP;
// Debug: uncomment to see what's happening
// console.log('MCP Check:', { model: this.selectedModel, hasMCP, mcpAvailable: this.mcpAvailable, map: this.mcpModels });
if (!hasMCP) {
this.mcpMode = false;
}
},
startTypingAnimation() {
if (this.isTyping) return;
@@ -281,98 +241,30 @@
} else {
this.resumeTyping();
}
},
handleFileSelection(files, fileType) {
Array.from(files).forEach(file => {
// Check if file already exists
const exists = this.attachedFiles.some(f => f.name === file.name && f.type === fileType);
if (!exists) {
this.attachedFiles.push({ name: file.name, type: fileType });
}
});
},
removeAttachedFile(fileType, fileName) {
// Remove from attachedFiles array
const index = this.attachedFiles.findIndex(f => f.name === fileName && f.type === fileType);
if (index !== -1) {
this.attachedFiles.splice(index, 1);
}
// Remove from corresponding file array
if (fileType === 'image') {
this.imageFiles = this.imageFiles.filter(f => f.name !== fileName);
} else if (fileType === 'audio') {
this.audioFiles = this.audioFiles.filter(f => f.name !== fileName);
} else if (fileType === 'file') {
this.textFiles = this.textFiles.filter(f => f.name !== fileName);
}
}
}">
<!-- Model Selector with MCP Toggle -->
<!-- Model Selector -->
<div class="mb-4">
<label class="block text-sm font-medium text-[#94A3B8] mb-2">Select Model</label>
<div class="flex items-center gap-3">
<select
x-model="selectedModel"
@change="$nextTick(() => checkMCPAvailability())"
class="flex-1 bg-[#1E293B] text-[#E5E7EB] border border-[#38BDF8]/20 focus:border-[#38BDF8] focus:ring-2 focus:ring-[#38BDF8]/50 rounded-lg p-3 appearance-none"
required
>
<option value="" disabled class="text-[#94A3B8]">Select a model to chat with...</option>
{{ range .ModelsConfig }}
{{ $cfg := . }}
{{ $hasMCP := or (ne $cfg.MCP.Servers "") (ne $cfg.MCP.Stdio "") }}
{{ range .KnownUsecaseStrings }}
{{ if eq . "FLAG_CHAT" }}
<option value="{{$cfg.Name}}" data-has-mcp="{{if $hasMCP}}true{{else}}false{{end}}" class="bg-[#1E293B] text-[#E5E7EB]">{{$cfg.Name}}</option>
{{ end }}
<select
x-model="selectedModel"
class="w-full bg-[#1E293B] text-[#E5E7EB] border border-[#38BDF8]/20 focus:border-[#38BDF8] focus:ring-2 focus:ring-[#38BDF8]/50 rounded-lg p-3 appearance-none"
required
>
<option value="" disabled class="text-[#94A3B8]">Select a model to chat with...</option>
{{ range .ModelsConfig }}
{{ $cfg := . }}
{{ range .KnownUsecaseStrings }}
{{ if eq . "FLAG_CHAT" }}
<option value="{{$cfg.Name}}" class="bg-[#1E293B] text-[#E5E7EB]">{{$cfg.Name}}</option>
{{ end }}
{{ end }}
</select>
<!-- Compact MCP Toggle - Show only if MCP is available for selected model -->
<div
x-show="mcpAvailable"
class="flex items-center gap-2 px-3 py-2 text-xs rounded text-[#E5E7EB] bg-[#1E293B] border border-[#38BDF8]/20 whitespace-nowrap">
<i class="fa-solid fa-plug text-[#38BDF8] text-sm"></i>
<span class="text-[#94A3B8]">MCP</span>
<label class="relative inline-flex items-center cursor-pointer ml-1">
<input type="checkbox" id="index_mcp_toggle" class="sr-only peer" x-model="mcpMode">
<div class="w-9 h-5 bg-[#101827] peer-focus:outline-none peer-focus:ring-2 peer-focus:ring-[#38BDF8]/30 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-[#1E293B] after:border after:rounded-full after:h-4 after:w-4 after:transition-all peer-checked:bg-[#38BDF8]"></div>
</label>
</div>
</div>
<!-- MCP Mode Notification - Compact tooltip style -->
<div
x-show="mcpMode && mcpAvailable"
class="mt-2 p-2 bg-[#38BDF8]/10 border border-[#38BDF8]/30 rounded text-[#94A3B8] text-xs">
<div class="flex items-start space-x-2">
<i class="fa-solid fa-info-circle text-[#38BDF8] mt-0.5 text-xs"></i>
<p class="text-[#94A3B8]">Non-streaming mode active. Responses may take longer to process.</p>
</div>
</div>
{{ end }}
{{ end }}
</select>
</div>
<!-- Input Bar -->
<form @submit.prevent="startChat($event)" class="relative w-full">
<!-- Attachment Tags - Show above input when files are attached -->
<div x-show="attachedFiles.length > 0" class="mb-3 flex flex-wrap gap-2 items-center">
<template x-for="(file, index) in attachedFiles" :key="index">
<div class="inline-flex items-center gap-2 px-3 py-1.5 rounded-lg text-sm bg-[#38BDF8]/20 border border-[#38BDF8]/40 text-[#E5E7EB]">
<i :class="file.type === 'image' ? 'fa-solid fa-image' : file.type === 'audio' ? 'fa-solid fa-microphone' : 'fa-solid fa-file'" class="text-[#38BDF8]"></i>
<span x-text="file.name" class="max-w-[200px] truncate"></span>
<button
type="button"
@click="attachedFiles.splice(index, 1); removeAttachedFile(file.type, file.name)"
class="ml-1 text-[#94A3B8] hover:text-[#E5E7EB] transition-colors"
title="Remove attachment"
>
<i class="fa-solid fa-times text-xs"></i>
</button>
</div>
</template>
</div>
<div class="relative w-full bg-[#1E293B] border border-[#38BDF8]/20 rounded-xl focus-within:ring-2 focus-within:ring-[#38BDF8]/50 focus-within:border-[#38BDF8] transition-all duration-200">
<textarea
x-model="inputValue"
@@ -387,6 +279,7 @@
@input="handleInput()"
rows="2"
></textarea>
<span x-show="fileName" x-text="fileName" class="absolute right-16 top-3 text-[#94A3B8] text-xs mr-2"></span>
<!-- Attachment Buttons -->
<button
@@ -428,7 +321,7 @@
multiple
accept="image/*"
style="display: none;"
@change="imageFiles = Array.from($event.target.files); handleFileSelection($event.target.files, 'image')"
@change="imageFiles = Array.from($event.target.files); fileName = imageFiles.length > 0 ? imageFiles.length + ' image(s) selected' : ''"
/>
<input
id="index_input_audio"
@@ -436,7 +329,7 @@
multiple
accept="audio/*"
style="display: none;"
@change="audioFiles = Array.from($event.target.files); handleFileSelection($event.target.files, 'audio')"
@change="audioFiles = Array.from($event.target.files); fileName = audioFiles.length > 0 ? audioFiles.length + ' audio file(s) selected' : ''"
/>
<input
id="index_input_file"
@@ -444,7 +337,7 @@
multiple
accept=".txt,.md,.pdf"
style="display: none;"
@change="textFiles = Array.from($event.target.files); handleFileSelection($event.target.files, 'file')"
@change="textFiles = Array.from($event.target.files); fileName = textFiles.length > 0 ? textFiles.length + ' file(s) selected' : ''"
/>
</div>
@@ -541,20 +434,12 @@ function startChat(event) {
return;
}
// Get MCP mode from checkbox (if available)
let mcpMode = false;
const mcpToggle = document.getElementById('index_mcp_toggle');
if (mcpToggle && mcpToggle.checked) {
mcpMode = true;
}
// Store message and files in localStorage for chat page to pick up
const chatData = {
message: message,
imageFiles: [],
audioFiles: [],
textFiles: [],
mcpMode: mcpMode
textFiles: []
};
// Convert files to base64 for storage

View File

@@ -66,14 +66,6 @@
<i class="fas fa-cogs mr-1.5 text-[10px]"></i>
<span>Backend Gallery</span>
</a>
{{ if not .DisableRuntimeSettings }}
<a href="/settings"
class="inline-flex items-center bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#38BDF8]/20 text-[#E5E7EB] py-1.5 px-3 rounded text-xs font-medium transition-colors">
<i class="fas fa-cog mr-1.5 text-[10px]"></i>
<span>Settings</span>
</a>
{{ end }}
</div>
<!-- Models Section -->
@@ -287,22 +279,10 @@
<!-- Backends Section -->
<div class="mt-8">
<div class="mb-6">
<div class="flex items-center justify-between mb-1">
<h2 class="text-2xl font-semibold text-[#E5E7EB] flex items-center">
<i class="fas fa-cogs mr-2 text-[#8B5CF6] text-sm"></i>
Installed Backends
</h2>
{{ if gt (len .InstalledBackends) 0 }}
<button
@click="reinstallAllBackends()"
:disabled="reinstallingAll"
class="inline-flex items-center bg-[#38BDF8] hover:bg-[#38BDF8]/80 disabled:opacity-50 disabled:cursor-not-allowed text-white py-1.5 px-3 rounded text-xs font-medium transition-colors"
title="Reinstall all backends">
<i class="fas fa-arrow-rotate-right mr-1.5 text-[10px]" :class="reinstallingAll ? 'fa-spin' : ''"></i>
<span x-text="reinstallingAll ? 'Reinstalling...' : 'Reinstall All'"></span>
</button>
{{ end }}
</div>
<h2 class="text-2xl font-semibold text-[#E5E7EB] mb-1 flex items-center">
<i class="fas fa-cogs mr-2 text-[#8B5CF6] text-sm"></i>
Installed Backends
</h2>
<p class="text-sm text-[#94A3B8] mb-4">
<span class="text-[#8B5CF6] font-medium">{{len .InstalledBackends}}</span> backend{{if gt (len .InstalledBackends) 1}}s{{end}} ready to use
</p>
@@ -344,7 +324,7 @@
</thead>
<tbody>
{{ range .InstalledBackends }}
<tr class="hover:bg-[#1E293B]/50 border-b border-[#1E293B] transition-colors" data-backend-name="{{.Name}}" data-is-system="{{.IsSystem}}">
<tr class="hover:bg-[#1E293B]/50 border-b border-[#1E293B] transition-colors">
<!-- Name Column -->
<td class="p-2">
<div class="flex items-center gap-2">
@@ -398,13 +378,6 @@
<td class="p-2">
<div class="flex items-center justify-end gap-1">
{{ if not .IsSystem }}
<button
@click="reinstallBackend('{{.Name}}')"
:disabled="reinstallingBackends['{{.Name}}']"
class="text-[#38BDF8]/60 hover:text-[#38BDF8] hover:bg-[#38BDF8]/10 disabled:opacity-50 disabled:cursor-not-allowed rounded p-1 transition-colors"
title="Reinstall {{.Name}}">
<i class="fas fa-arrow-rotate-right text-xs" :class="reinstallingBackends['{{.Name}}'] ? 'fa-spin' : ''"></i>
</button>
<button
@click="deleteBackend('{{.Name}}')"
class="text-red-400/60 hover:text-red-400 hover:bg-red-500/10 rounded p-1 transition-colors"
@@ -433,13 +406,9 @@
function indexDashboard() {
return {
notifications: [],
reinstallingBackends: {},
reinstallingAll: false,
backendJobs: {},
init() {
// Poll for job progress every 600ms
setInterval(() => this.pollJobs(), 600);
// Initialize component
},
addNotification(message, type = 'success') {
@@ -453,137 +422,6 @@ function indexDashboard() {
this.notifications = this.notifications.filter(n => n.id !== id);
},
async reinstallBackend(backendName) {
if (this.reinstallingBackends[backendName]) {
return; // Already reinstalling
}
try {
this.reinstallingBackends[backendName] = true;
const response = await fetch(`/api/backends/install/${encodeURIComponent(backendName)}`, {
method: 'POST'
});
const data = await response.json();
if (response.ok && data.jobID) {
this.backendJobs[backendName] = data.jobID;
this.addNotification(`Reinstalling backend "${backendName}"...`, 'success');
} else {
this.reinstallingBackends[backendName] = false;
this.addNotification(`Failed to start reinstall: ${data.error || 'Unknown error'}`, 'error');
}
} catch (error) {
console.error('Error reinstalling backend:', error);
this.reinstallingBackends[backendName] = false;
this.addNotification(`Failed to reinstall backend: ${error.message}`, 'error');
}
},
async reinstallAllBackends() {
if (this.reinstallingAll) {
return; // Already reinstalling
}
if (!confirm('Are you sure you want to reinstall all backends? This may take some time.')) {
return;
}
this.reinstallingAll = true;
// Get all non-system backends from the page using data attributes
const backendRows = document.querySelectorAll('tr[data-backend-name]');
const backendsToReinstall = [];
backendRows.forEach(row => {
const backendName = row.getAttribute('data-backend-name');
const isSystem = row.getAttribute('data-is-system') === 'true';
if (backendName && !isSystem && !this.reinstallingBackends[backendName]) {
backendsToReinstall.push(backendName);
}
});
if (backendsToReinstall.length === 0) {
this.reinstallingAll = false;
this.addNotification('No backends available to reinstall', 'error');
return;
}
this.addNotification(`Starting reinstall of ${backendsToReinstall.length} backend(s)...`, 'success');
// Reinstall all backends sequentially to avoid overwhelming the system
for (const backendName of backendsToReinstall) {
await this.reinstallBackend(backendName);
// Small delay between installations
await new Promise(resolve => setTimeout(resolve, 500));
}
// Don't set reinstallingAll to false here - let pollJobs handle it when all jobs complete
// This allows the UI to show the batch operation is in progress
},
async pollJobs() {
for (const [backendName, jobID] of Object.entries(this.backendJobs)) {
try {
const response = await fetch(`/api/backends/job/${jobID}`);
const jobData = await response.json();
if (jobData.completed) {
delete this.backendJobs[backendName];
this.reinstallingBackends[backendName] = false;
this.addNotification(`Backend "${backendName}" reinstalled successfully!`, 'success');
// Only reload if not in batch mode and no other jobs are running
if (!this.reinstallingAll && Object.keys(this.backendJobs).length === 0) {
setTimeout(() => {
window.location.reload();
}, 1500);
}
}
if (jobData.error || (jobData.message && jobData.message.startsWith('error:'))) {
delete this.backendJobs[backendName];
this.reinstallingBackends[backendName] = false;
let errorMessage = 'Unknown error';
if (typeof jobData.error === 'string') {
errorMessage = jobData.error;
} else if (jobData.error && typeof jobData.error === 'object') {
const errorKeys = Object.keys(jobData.error);
if (errorKeys.length > 0) {
errorMessage = jobData.error.message || jobData.error.error || jobData.error.Error || JSON.stringify(jobData.error);
} else {
errorMessage = jobData.message || 'Unknown error';
}
} else if (jobData.message) {
errorMessage = jobData.message;
}
if (errorMessage.startsWith('error: ')) {
errorMessage = errorMessage.substring(7);
}
this.addNotification(`Error reinstalling backend "${backendName}": ${errorMessage}`, 'error');
// If batch mode and all jobs are done (completed or errored), reload
if (this.reinstallingAll && Object.keys(this.backendJobs).length === 0) {
this.reinstallingAll = false;
setTimeout(() => {
window.location.reload();
}, 2000);
}
}
} catch (error) {
console.error('Error polling job:', error);
}
}
// If batch mode completed and no jobs left, reload
if (this.reinstallingAll && Object.keys(this.backendJobs).length === 0) {
this.reinstallingAll = false;
setTimeout(() => {
window.location.reload();
}, 2000);
}
},
async deleteBackend(backendName) {
if (!confirm(`Are you sure you want to delete the backend "${backendName}"?`)) {
return;

View File

@@ -77,197 +77,18 @@
<!-- URI Input -->
<div>
<div class="flex items-center justify-between mb-2">
<label class="block text-sm font-medium text-[#94A3B8]">
<i class="fas fa-link mr-2"></i>Model URI
</label>
<div class="flex gap-2">
<a href="https://huggingface.co/models?search=gguf&sort=trending"
target="_blank"
class="text-xs px-3 py-1.5 rounded-lg bg-purple-600/20 hover:bg-purple-600/30 text-purple-300 border border-purple-500/30 transition-all flex items-center gap-1.5">
<i class="fab fa-huggingface"></i>
<span>Search GGUF Models on Hugging Face</span>
<i class="fas fa-external-link-alt text-xs"></i>
</a>
<a href="https://huggingface.co/models?sort=trending"
target="_blank"
class="text-xs px-3 py-1.5 rounded-lg bg-purple-600/20 hover:bg-purple-600/30 text-purple-300 border border-purple-500/30 transition-all flex items-center gap-1.5">
<i class="fab fa-huggingface"></i>
<span>Browse All Models on Hugging Face</span>
<i class="fas fa-external-link-alt text-xs"></i>
</a>
</div>
</div>
<label class="block text-sm font-medium text-[#94A3B8] mb-2">
<i class="fas fa-link mr-2"></i>Model URI
</label>
<input
x-model="importUri"
type="text"
placeholder="huggingface://TheBloke/Llama-2-7B-Chat-GGUF or https://example.com/model.gguf"
placeholder="https://example.com/model.gguf or file:///path/to/model.gguf"
class="w-full px-4 py-3 bg-[#101827] border border-[#1E293B] rounded-lg text-[#E5E7EB] focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-colors"
:disabled="isSubmitting">
<p class="mt-2 text-xs text-[#94A3B8]">
Enter the URI or path to the model file you want to import
</p>
<!-- URI Format Guide -->
<div class="mt-4" x-data="{ showGuide: false }">
<button @click="showGuide = !showGuide"
class="flex items-center gap-2 text-sm text-[#94A3B8] hover:text-[#E5E7EB] transition-colors">
<i class="fas" :class="showGuide ? 'fa-chevron-down' : 'fa-chevron-right'"></i>
<i class="fas fa-info-circle"></i>
<span>Supported URI Formats</span>
</button>
<div x-show="showGuide"
x-transition:enter="transition ease-out duration-200"
x-transition:enter-start="opacity-0 transform -translate-y-2"
x-transition:enter-end="opacity-100 transform translate-y-0"
class="mt-3 p-4 bg-[#101827] border border-[#1E293B] rounded-lg space-y-4">
<!-- HuggingFace -->
<div>
<h4 class="text-sm font-semibold text-[#E5E7EB] mb-2 flex items-center gap-2">
<i class="fab fa-huggingface text-purple-400"></i>
HuggingFace
</h4>
<div class="space-y-1.5 text-xs text-[#94A3B8] font-mono pl-6">
<div class="flex items-start gap-2">
<span class="text-green-400"></span>
<div>
<code class="text-[#10B981]">huggingface://</code><span class="text-[#94A3B8]">TheBloke/Llama-2-7B-Chat-GGUF</span>
<p class="text-[#6B7280] mt-0.5">Standard HuggingFace format</p>
</div>
</div>
<div class="flex items-start gap-2">
<span class="text-green-400"></span>
<div>
<code class="text-[#10B981]">hf://</code><span class="text-[#94A3B8]">TheBloke/Llama-2-7B-Chat-GGUF</span>
<p class="text-[#6B7280] mt-0.5">Short HuggingFace format</p>
</div>
</div>
<div class="flex items-start gap-2">
<span class="text-green-400"></span>
<div>
<code class="text-[#10B981]">https://huggingface.co/</code><span class="text-[#94A3B8]">TheBloke/Llama-2-7B-Chat-GGUF</span>
<p class="text-[#6B7280] mt-0.5">Full HuggingFace URL</p>
</div>
</div>
</div>
</div>
<!-- HTTP/HTTPS -->
<div>
<h4 class="text-sm font-semibold text-[#E5E7EB] mb-2 flex items-center gap-2">
<i class="fas fa-globe text-blue-400"></i>
HTTP/HTTPS URLs
</h4>
<div class="space-y-1.5 text-xs text-[#94A3B8] font-mono pl-6">
<div class="flex items-start gap-2">
<span class="text-green-400"></span>
<div>
<code class="text-[#10B981]">https://</code><span class="text-[#94A3B8]">example.com/model.gguf</span>
<p class="text-[#6B7280] mt-0.5">Direct download from any HTTPS URL</p>
</div>
</div>
</div>
</div>
<!-- Local Files -->
<div>
<h4 class="text-sm font-semibold text-[#E5E7EB] mb-2 flex items-center gap-2">
<i class="fas fa-file text-yellow-400"></i>
Local Files
</h4>
<div class="space-y-1.5 text-xs text-[#94A3B8] font-mono pl-6">
<div class="flex items-start gap-2">
<span class="text-green-400"></span>
<div>
<code class="text-[#10B981]">file://</code><span class="text-[#94A3B8]">/path/to/model.gguf</span>
<p class="text-[#6B7280] mt-0.5">Local file path (absolute)</p>
</div>
</div>
<div class="flex items-start gap-2">
<span class="text-green-400"></span>
<div>
<code class="text-[#94A3B8]">/path/to/model.yaml</code>
<p class="text-[#6B7280] mt-0.5">Direct local YAML config file</p>
</div>
</div>
</div>
</div>
<!-- OCI -->
<div>
<h4 class="text-sm font-semibold text-[#E5E7EB] mb-2 flex items-center gap-2">
<i class="fas fa-box text-cyan-400"></i>
OCI Registry
</h4>
<div class="space-y-1.5 text-xs text-[#94A3B8] font-mono pl-6">
<div class="flex items-start gap-2">
<span class="text-green-400"></span>
<div>
<code class="text-[#10B981]">oci://</code><span class="text-[#94A3B8]">registry.example.com/model:tag</span>
<p class="text-[#6B7280] mt-0.5">OCI container registry</p>
</div>
</div>
<div class="flex items-start gap-2">
<span class="text-green-400"></span>
<div>
<code class="text-[#10B981]">ocifile://</code><span class="text-[#94A3B8]">/path/to/image.tar</span>
<p class="text-[#6B7280] mt-0.5">Local OCI tarball file</p>
</div>
</div>
</div>
</div>
<!-- Ollama -->
<div>
<h4 class="text-sm font-semibold text-[#E5E7EB] mb-2 flex items-center gap-2">
<i class="fas fa-cube text-indigo-400"></i>
Ollama
</h4>
<div class="space-y-1.5 text-xs text-[#94A3B8] font-mono pl-6">
<div class="flex items-start gap-2">
<span class="text-green-400"></span>
<div>
<code class="text-[#10B981]">ollama://</code><span class="text-[#94A3B8]">llama2:7b</span>
<p class="text-[#6B7280] mt-0.5">Ollama model format</p>
</div>
</div>
</div>
</div>
<!-- YAML Config Files -->
<div>
<h4 class="text-sm font-semibold text-[#E5E7EB] mb-2 flex items-center gap-2">
<i class="fas fa-code text-pink-400"></i>
YAML Configuration Files
</h4>
<div class="space-y-1.5 text-xs text-[#94A3B8] font-mono pl-6">
<div class="flex items-start gap-2">
<span class="text-green-400"></span>
<div>
<code class="text-[#94A3B8]">https://example.com/model.yaml</code>
<p class="text-[#6B7280] mt-0.5">Remote YAML config file</p>
</div>
</div>
<div class="flex items-start gap-2">
<span class="text-green-400"></span>
<div>
<code class="text-[#94A3B8]">file:///path/to/config.yaml</code>
<p class="text-[#6B7280] mt-0.5">Local YAML config file</p>
</div>
</div>
</div>
</div>
<div class="pt-2 mt-3 border-t border-[#1E293B]">
<p class="text-xs text-[#6B7280] italic">
<i class="fas fa-lightbulb mr-1.5 text-yellow-400"></i>
Tip: For HuggingFace models, you can use any of the three formats. The system will automatically detect and download the appropriate model files.
</p>
</div>
</div>
</div>
</div>
<!-- Preferences Section -->
@@ -299,7 +120,6 @@
<option value="mlx-vlm">mlx-vlm</option>
<option value="transformers">transformers</option>
<option value="vllm">vllm</option>
<option value="diffusers">diffusers</option>
</select>
<p class="mt-1 text-xs text-gray-400">
Force a specific backend. Leave empty to auto-detect from URI.
@@ -402,71 +222,6 @@
Model type for transformers backend. Examples: AutoModelForCausalLM, SentenceTransformer, Mamba, MusicgenForConditionalGeneration. Leave empty to use default (AutoModelForCausalLM).
</p>
</div>
<!-- Pipeline Type (Diffusers) -->
<div x-show="commonPreferences.backend === 'diffusers'">
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-stream mr-2"></i>Pipeline Type
</label>
<input
x-model="commonPreferences.pipeline_type"
type="text"
placeholder="StableDiffusionPipeline (for diffusers backend)"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<p class="mt-1 text-xs text-gray-400">
Pipeline type for diffusers backend. Examples: StableDiffusionPipeline, StableDiffusion3Pipeline, FluxPipeline. Leave empty to use default (StableDiffusionPipeline).
</p>
</div>
<!-- Scheduler Type (Diffusers) -->
<div x-show="commonPreferences.backend === 'diffusers'">
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-clock mr-2"></i>Scheduler Type
</label>
<input
x-model="commonPreferences.scheduler_type"
type="text"
placeholder="k_dpmpp_2m (optional)"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<p class="mt-1 text-xs text-gray-400">
Scheduler type for diffusers backend. Examples: k_dpmpp_2m, euler_a, ddim. Leave empty to use model default.
</p>
</div>
<!-- Enable Parameters (Diffusers) -->
<div x-show="commonPreferences.backend === 'diffusers'">
<label class="block text-sm font-medium text-gray-300 mb-2">
<i class="fas fa-cogs mr-2"></i>Enable Parameters
</label>
<input
x-model="commonPreferences.enable_parameters"
type="text"
placeholder="negative_prompt,num_inference_steps (comma-separated)"
class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
:disabled="isSubmitting">
<p class="mt-1 text-xs text-gray-400">
Enabled parameters for diffusers backend (comma-separated). Leave empty to use default (negative_prompt,num_inference_steps).
</p>
</div>
<!-- CUDA (Diffusers) -->
<div x-show="commonPreferences.backend === 'diffusers'">
<label class="flex items-center cursor-pointer">
<input
x-model="commonPreferences.cuda"
type="checkbox"
class="w-5 h-5 rounded bg-gray-900/90 border-gray-700/70 text-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all cursor-pointer"
:disabled="isSubmitting">
<span class="ml-3 text-sm font-medium text-gray-300">
<i class="fas fa-microchip mr-2"></i>CUDA
</span>
</label>
<p class="mt-1 ml-8 text-xs text-gray-400">
Enable CUDA support for GPU acceleration with diffusers backend.
</p>
</div>
</div>
<!-- Custom Preferences -->
@@ -724,11 +479,7 @@ function importModel() {
quantizations: '',
mmproj_quantizations: '',
embeddings: false,
type: '',
pipeline_type: '',
scheduler_type: '',
enable_parameters: '',
cuda: false
type: ''
},
isSubmitting: false,
currentJobId: null,
@@ -803,18 +554,6 @@ function importModel() {
if (this.commonPreferences.type && this.commonPreferences.type.trim()) {
prefsObj.type = this.commonPreferences.type.trim();
}
if (this.commonPreferences.pipeline_type && this.commonPreferences.pipeline_type.trim()) {
prefsObj.pipeline_type = this.commonPreferences.pipeline_type.trim();
}
if (this.commonPreferences.scheduler_type && this.commonPreferences.scheduler_type.trim()) {
prefsObj.scheduler_type = this.commonPreferences.scheduler_type.trim();
}
if (this.commonPreferences.enable_parameters && this.commonPreferences.enable_parameters.trim()) {
prefsObj.enable_parameters = this.commonPreferences.enable_parameters.trim();
}
if (this.commonPreferences.cuda) {
prefsObj.cuda = true;
}
// Add custom preferences (can override common ones)
this.preferences.forEach(pref => {
@@ -890,33 +629,11 @@ function importModel() {
setTimeout(() => {
window.location.reload();
}, 2000);
} else if (jobData.error || (jobData.message && jobData.message.startsWith('error:'))) {
} else if (jobData.error) {
clearInterval(this.jobPollInterval);
this.isSubmitting = false;
this.currentJobId = null;
// Extract error message - handle both string and object errors
let errorMessage = 'Unknown error';
if (typeof jobData.error === 'string') {
errorMessage = jobData.error;
} else if (jobData.error && typeof jobData.error === 'object') {
// Check if error object has any properties
const errorKeys = Object.keys(jobData.error);
if (errorKeys.length > 0) {
// Try common error object properties
errorMessage = jobData.error.message || jobData.error.error || jobData.error.Error || JSON.stringify(jobData.error);
} else {
// Empty object {}, fall back to message field
errorMessage = jobData.message || 'Unknown error';
}
} else if (jobData.message) {
// Use message field if error is not present or is empty
errorMessage = jobData.message;
}
// Remove "error: " prefix if present
if (errorMessage.startsWith('error: ')) {
errorMessage = errorMessage.substring(7);
}
this.showAlert('error', 'Import failed: ' + errorMessage);
this.showAlert('error', 'Import failed: ' + jobData.error);
}
} catch (error) {
console.error('Error polling job status:', error);

View File

@@ -714,33 +714,11 @@ function modelsGallery() {
this.fetchModels();
}
if (jobData.error || (jobData.message && jobData.message.startsWith('error:'))) {
if (jobData.error) {
model.processing = false;
delete this.jobProgress[model.jobID];
const action = model.isDeletion ? 'deleting' : 'installing';
// Extract error message - handle both string and object errors
let errorMessage = 'Unknown error';
if (typeof jobData.error === 'string') {
errorMessage = jobData.error;
} else if (jobData.error && typeof jobData.error === 'object') {
// Check if error object has any properties
const errorKeys = Object.keys(jobData.error);
if (errorKeys.length > 0) {
// Try common error object properties
errorMessage = jobData.error.message || jobData.error.error || jobData.error.Error || JSON.stringify(jobData.error);
} else {
// Empty object {}, fall back to message field
errorMessage = jobData.message || 'Unknown error';
}
} else if (jobData.message) {
// Use message field if error is not present or is empty
errorMessage = jobData.message;
}
// Remove "error: " prefix if present
if (errorMessage.startsWith('error: ')) {
errorMessage = errorMessage.substring(7);
}
this.addNotification(`Error ${action} model "${model.name}": ${errorMessage}`, 'error');
this.addNotification(`Error ${action} model "${model.name}": ${jobData.error}`, 'error');
}
} catch (error) {
console.error('Error polling job:', error);

View File

@@ -1,12 +1,12 @@
<nav class="bg-[#101827] shadow-2xl border-b border-[#1E293B]">
<div class="container mx-auto px-4 py-2">
<div class="container mx-auto px-4 py-3">
<div class="flex items-center justify-between">
<div class="flex items-center">
<!-- Logo Image -->
<a href="./" class="flex items-center group">
<img src="static/logo_horizontal.png"
alt="LocalAI Logo"
class="h-10 mr-3 brightness-110 transition-all duration-300 group-hover:brightness-125 group-hover:drop-shadow-[0_0_8px_rgba(56,189,248,0.5)]">
class="h-14 mr-3 brightness-110 transition-all duration-300 group-hover:brightness-125 group-hover:drop-shadow-[0_0_8px_rgba(56,189,248,0.5)]">
</a>
</div>

View File

@@ -1,653 +0,0 @@
<!DOCTYPE html>
<html lang="en">
{{template "views/partials/head" .}}
<body class="bg-[#101827] text-[#E5E7EB]">
<div class="flex flex-col min-h-screen" x-data="settingsDashboard()">
{{template "views/partials/navbar" .}}
<!-- Notifications -->
<div class="fixed top-20 right-4 z-50 space-y-2" style="max-width: 400px;">
<template x-for="notification in notifications" :key="notification.id">
<div x-show="true"
x-transition:enter="transition ease-out duration-200"
x-transition:enter-start="opacity-0"
x-transition:enter-end="opacity-100"
x-transition:leave="transition ease-in duration-150"
x-transition:leave-start="opacity-100"
x-transition:leave-end="opacity-0"
:class="notification.type === 'error' ? 'bg-red-500' : 'bg-green-500'"
class="rounded-lg p-4 text-white flex items-start space-x-3">
<div class="flex-shrink-0">
<i :class="notification.type === 'error' ? 'fas fa-exclamation-circle' : 'fas fa-check-circle'" class="text-xl"></i>
</div>
<div class="flex-1 min-w-0">
<p class="text-sm font-medium break-words" x-text="notification.message"></p>
</div>
<button @click="dismissNotification(notification.id)" class="flex-shrink-0 text-white hover:opacity-80 transition-opacity">
<i class="fas fa-times"></i>
</button>
</div>
</template>
</div>
<div class="container mx-auto px-4 py-6 flex-grow max-w-4xl">
<!-- Header -->
<div class="mb-6">
<div class="flex items-center justify-between mb-2">
<h1 class="text-2xl font-semibold text-[#E5E7EB]">
Application Settings
</h1>
<a href="/manage"
class="inline-flex items-center text-[#94A3B8] hover:text-[#E5E7EB] transition-colors">
<i class="fas fa-arrow-left mr-2 text-sm"></i>
<span class="text-sm">Back to Manage</span>
</a>
</div>
<p class="text-sm text-[#94A3B8]">Configure watchdog and backend request settings</p>
</div>
<!-- Settings Form -->
<form @submit.prevent="saveSettings()" class="space-y-6">
<!-- Watchdog Settings Section -->
<div class="bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg p-6">
<h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
<i class="fas fa-shield-alt mr-2 text-[#38BDF8] text-sm"></i>
Watchdog Settings
</h2>
<p class="text-xs text-[#94A3B8] mb-4">
Configure automatic monitoring and management of backend processes
</p>
<div class="space-y-4">
<!-- Enable Watchdog -->
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-[#E5E7EB]">Enable Watchdog</label>
<p class="text-xs text-[#94A3B8] mt-1">Enable automatic monitoring of backend processes</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.watchdog_enabled"
@change="updateWatchdogEnabled()"
class="sr-only peer">
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#38BDF8]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#38BDF8]"></div>
</label>
</div>
<!-- Enable Idle Check -->
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-[#E5E7EB]">Enable Idle Check</label>
<p class="text-xs text-[#94A3B8] mt-1">Automatically stop backends that are idle for too long</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.watchdog_idle_enabled"
:disabled="!settings.watchdog_enabled"
class="sr-only peer" :class="!settings.watchdog_enabled ? 'opacity-50' : ''">
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#38BDF8]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#38BDF8]"></div>
</label>
</div>
<!-- Idle Timeout -->
<div>
<label class="block text-sm font-medium text-[#E5E7EB] mb-2">Idle Timeout</label>
<p class="text-xs text-[#94A3B8] mb-2">Time before an idle backend is stopped (e.g., 15m, 1h)</p>
<input type="text" x-model="settings.watchdog_idle_timeout"
:disabled="!settings.watchdog_idle_enabled"
placeholder="15m"
class="w-full px-3 py-2 bg-[#101827] border border-[#38BDF8]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#38BDF8]/50"
:class="!settings.watchdog_idle_enabled ? 'opacity-50 cursor-not-allowed' : ''">
</div>
<!-- Enable Busy Check -->
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-[#E5E7EB]">Enable Busy Check</label>
<p class="text-xs text-[#94A3B8] mt-1">Automatically stop backends that are busy for too long (stuck processes)</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.watchdog_busy_enabled"
:disabled="!settings.watchdog_enabled"
class="sr-only peer" :class="!settings.watchdog_enabled ? 'opacity-50' : ''">
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#38BDF8]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#38BDF8]"></div>
</label>
</div>
<!-- Busy Timeout -->
<div>
<label class="block text-sm font-medium text-[#E5E7EB] mb-2">Busy Timeout</label>
<p class="text-xs text-[#94A3B8] mb-2">Time before a busy backend is stopped (e.g., 5m, 30m)</p>
<input type="text" x-model="settings.watchdog_busy_timeout"
:disabled="!settings.watchdog_busy_enabled"
placeholder="5m"
class="w-full px-3 py-2 bg-[#101827] border border-[#38BDF8]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#38BDF8]/50"
:class="!settings.watchdog_busy_enabled ? 'opacity-50 cursor-not-allowed' : ''">
</div>
</div>
</div>
<!-- Backend Request Settings Section -->
<div class="bg-[#1E293B] border border-[#8B5CF6]/20 rounded-lg p-6">
<h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
<i class="fas fa-cogs mr-2 text-[#8B5CF6] text-sm"></i>
Backend Request Settings
</h2>
<p class="text-xs text-[#94A3B8] mb-4">
Configure how backends handle multiple requests
</p>
<div class="space-y-4">
<!-- Single Backend Mode -->
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-[#E5E7EB]">Single Backend Mode</label>
<p class="text-xs text-[#94A3B8] mt-1">Allow only one backend to be active at a time</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.single_backend"
class="sr-only peer">
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#8B5CF6]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#8B5CF6]"></div>
</label>
</div>
<!-- Parallel Backend Requests -->
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-[#E5E7EB]">Parallel Backend Requests</label>
<p class="text-xs text-[#94A3B8] mt-1">Enable backends to handle multiple requests in parallel (if supported)</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.parallel_backend_requests"
class="sr-only peer">
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#8B5CF6]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#8B5CF6]"></div>
</label>
</div>
</div>
</div>
<!-- Performance Settings Section -->
<div class="bg-[#1E293B] border border-[#10B981]/20 rounded-lg p-6">
<h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
<i class="fas fa-tachometer-alt mr-2 text-[#10B981] text-sm"></i>
Performance Settings
</h2>
<p class="text-xs text-[#94A3B8] mb-4">
Configure default performance parameters for models
</p>
<div class="space-y-4">
<!-- Threads -->
<div>
<label class="block text-sm font-medium text-[#E5E7EB] mb-2">Default Threads</label>
<p class="text-xs text-[#94A3B8] mb-2">Number of threads to use for model inference (0 = auto)</p>
<input type="number" x-model="settings.threads"
min="0"
placeholder="0"
class="w-full px-3 py-2 bg-[#101827] border border-[#10B981]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#10B981]/50">
</div>
<!-- Context Size -->
<div>
<label class="block text-sm font-medium text-[#E5E7EB] mb-2">Default Context Size</label>
<p class="text-xs text-[#94A3B8] mb-2">Default context window size for models</p>
<input type="number" x-model="settings.context_size"
min="0"
placeholder="512"
class="w-full px-3 py-2 bg-[#101827] border border-[#10B981]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#10B981]/50">
</div>
<!-- F16 -->
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-[#E5E7EB]">F16 Precision</label>
<p class="text-xs text-[#94A3B8] mt-1">Use 16-bit floating point precision</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.f16"
class="sr-only peer">
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#10B981]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#10B981]"></div>
</label>
</div>
<!-- Debug -->
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-[#E5E7EB]">Debug Mode</label>
<p class="text-xs text-[#94A3B8] mt-1">Enable debug logging</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.debug"
class="sr-only peer">
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#10B981]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#10B981]"></div>
</label>
</div>
</div>
</div>
<!-- API Settings Section -->
<div class="bg-[#1E293B] border border-[#F59E0B]/20 rounded-lg p-6">
<h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
<i class="fas fa-globe mr-2 text-[#F59E0B] text-sm"></i>
API Settings
</h2>
<p class="text-xs text-[#94A3B8] mb-4">
Configure CORS and CSRF protection
</p>
<div class="space-y-4">
<!-- CORS -->
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-[#E5E7EB]">Enable CORS</label>
<p class="text-xs text-[#94A3B8] mt-1">Enable Cross-Origin Resource Sharing</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.cors"
class="sr-only peer">
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#F59E0B]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#F59E0B]"></div>
</label>
</div>
<!-- CORS Allow Origins -->
<div>
<label class="block text-sm font-medium text-[#E5E7EB] mb-2">CORS Allow Origins</label>
<p class="text-xs text-[#94A3B8] mb-2">Comma-separated list of allowed origins</p>
<input type="text" x-model="settings.cors_allow_origins"
placeholder="*"
class="w-full px-3 py-2 bg-[#101827] border border-[#F59E0B]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#F59E0B]/50">
</div>
<!-- CSRF -->
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-[#E5E7EB]">Enable CSRF Protection</label>
<p class="text-xs text-[#94A3B8] mt-1">Enable Cross-Site Request Forgery protection</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.csrf"
class="sr-only peer">
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#F59E0B]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#F59E0B]"></div>
</label>
</div>
</div>
</div>
<!-- P2P Settings Section -->
<div class="bg-[#1E293B] border border-[#EC4899]/20 rounded-lg p-6">
<h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
<i class="fas fa-network-wired mr-2 text-[#EC4899] text-sm"></i>
P2P Settings
</h2>
<p class="text-xs text-[#94A3B8] mb-4">
Configure peer-to-peer networking
</p>
<div class="space-y-4">
<!-- P2P Token -->
<div>
<label class="block text-sm font-medium text-[#E5E7EB] mb-2">P2P Token</label>
<p class="text-xs text-[#94A3B8] mb-2">Authentication token for P2P network (set to 0 to generate a new token)</p>
<input type="text" x-model="settings.p2p_token"
placeholder=""
class="w-full px-3 py-2 bg-[#101827] border border-[#EC4899]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#EC4899]/50">
</div>
<!-- P2P Network ID -->
<div>
<label class="block text-sm font-medium text-[#E5E7EB] mb-2">P2P Network ID</label>
<p class="text-xs text-[#94A3B8] mb-2">Network identifier for P2P connections</p>
<input type="text" x-model="settings.p2p_network_id"
placeholder=""
class="w-full px-3 py-2 bg-[#101827] border border-[#EC4899]/20 rounded text-sm text-[#E5E7EB] focus:outline-none focus:ring-2 focus:ring-[#EC4899]/50">
</div>
<!-- Federated -->
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-[#E5E7EB]">Federated Mode</label>
<p class="text-xs text-[#94A3B8] mt-1">Enable federated instance mode</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.federated"
class="sr-only peer">
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#EC4899]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#EC4899]"></div>
</label>
</div>
</div>
</div>
<!-- API Keys Settings Section -->
<div class="bg-[#1E293B] border border-[#EF4444]/20 rounded-lg p-6">
<h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
<i class="fas fa-key mr-2 text-[#EF4444] text-sm"></i>
API Keys
</h2>
<p class="text-xs text-[#94A3B8] mb-4">
Manage API keys for authentication. Keys from environment variables are always included.
</p>
<div class="space-y-4">
<!-- API Keys List -->
<div>
<label class="block text-sm font-medium text-[#E5E7EB] mb-2">API Keys</label>
<p class="text-xs text-[#94A3B8] mb-2">List of API keys (one per line or comma-separated)</p>
<textarea x-model="settings.api_keys_text"
rows="4"
placeholder="sk-1234567890abcdef&#10;sk-0987654321fedcba"
class="w-full px-3 py-2 bg-[#101827] border border-[#EF4444]/20 rounded text-sm text-[#E5E7EB] font-mono focus:outline-none focus:ring-2 focus:ring-[#EF4444]/50"></textarea>
<p class="text-xs text-[#94A3B8] mt-1">Note: API keys are sensitive. Handle with care.</p>
</div>
</div>
</div>
<!-- Gallery Settings Section -->
<div class="bg-[#1E293B] border border-[#6366F1]/20 rounded-lg p-6">
<h2 class="text-xl font-semibold text-[#E5E7EB] mb-4 flex items-center">
<i class="fas fa-images mr-2 text-[#6366F1] text-sm"></i>
Gallery Settings
</h2>
<p class="text-xs text-[#94A3B8] mb-4">
Configure model and backend galleries
</p>
<div class="space-y-4">
<!-- Autoload Galleries -->
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-[#E5E7EB]">Autoload Galleries</label>
<p class="text-xs text-[#94A3B8] mt-1">Automatically load model galleries on startup</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.autoload_galleries"
class="sr-only peer">
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#6366F1]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#6366F1]"></div>
</label>
</div>
<!-- Autoload Backend Galleries -->
<div class="flex items-center justify-between">
<div>
<label class="text-sm font-medium text-[#E5E7EB]">Autoload Backend Galleries</label>
<p class="text-xs text-[#94A3B8] mt-1">Automatically load backend galleries on startup</p>
</div>
<label class="relative inline-flex items-center cursor-pointer">
<input type="checkbox" x-model="settings.autoload_backend_galleries"
class="sr-only peer">
<div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#6366F1]/20 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#6366F1]"></div>
</label>
</div>
<!-- Galleries (JSON) -->
<div>
<label class="block text-sm font-medium text-[#E5E7EB] mb-2">Model Galleries (JSON)</label>
<p class="text-xs text-[#94A3B8] mb-2">Array of gallery objects with 'url' and 'name' fields</p>
<textarea x-model="settings.galleries_json"
rows="4"
placeholder='[{"url": "https://example.com", "name": "Example Gallery"}]'
class="w-full px-3 py-2 bg-[#101827] border border-[#6366F1]/20 rounded text-sm text-[#E5E7EB] font-mono focus:outline-none focus:ring-2 focus:ring-[#6366F1]/50"></textarea>
</div>
<!-- Backend Galleries (JSON) -->
<div>
<label class="block text-sm font-medium text-[#E5E7EB] mb-2">Backend Galleries (JSON)</label>
<p class="text-xs text-[#94A3B8] mb-2">Array of backend gallery objects with 'url' and 'name' fields</p>
<textarea x-model="settings.backend_galleries_json"
rows="4"
placeholder='[{"url": "https://example.com", "name": "Example Backend Gallery"}]'
class="w-full px-3 py-2 bg-[#101827] border border-[#6366F1]/20 rounded text-sm text-[#E5E7EB] font-mono focus:outline-none focus:ring-2 focus:ring-[#6366F1]/50"></textarea>
</div>
</div>
</div>
<!-- Source Info -->
<div class="bg-yellow-500/10 border border-yellow-500/20 rounded-lg p-4" x-show="sourceInfo">
<div class="flex items-start">
<i class="fas fa-info-circle text-yellow-400 mr-2 mt-0.5"></i>
<div class="flex-1">
<p class="text-sm text-yellow-300 font-medium mb-1">Configuration Source</p>
<p class="text-xs text-yellow-200" x-text="'Settings are currently loaded from: ' + sourceInfo"></p>
<p class="text-xs text-yellow-200 mt-1" x-show="sourceInfo === 'env'">
Environment variables take precedence. To modify settings via the UI, unset the relevant environment variables first.
</p>
</div>
</div>
</div>
<!-- Save Button -->
<div class="flex justify-end">
<button type="submit"
:disabled="saving"
class="inline-flex items-center bg-[#38BDF8] hover:bg-[#38BDF8]/90 disabled:opacity-50 disabled:cursor-not-allowed text-white py-2 px-6 rounded-lg font-medium transition-colors">
<i class="fas fa-save mr-2" :class="saving ? 'fa-spin fa-spinner' : ''"></i>
<span x-text="saving ? 'Saving...' : 'Save Settings'"></span>
</button>
</div>
</form>
</div>
{{template "views/partials/footer" .}}
</div>
<script>
function settingsDashboard() {
return {
notifications: [],
settings: {
watchdog_enabled: false,
watchdog_idle_enabled: false,
watchdog_busy_enabled: false,
watchdog_idle_timeout: '15m',
watchdog_busy_timeout: '5m',
single_backend: false,
parallel_backend_requests: false,
threads: 0,
context_size: 0,
f16: false,
debug: false,
cors: false,
csrf: false,
cors_allow_origins: '',
p2p_token: '',
p2p_network_id: '',
federated: false,
autoload_galleries: false,
autoload_backend_galleries: false,
galleries_json: '[]',
backend_galleries_json: '[]',
api_keys_text: ''
},
sourceInfo: '',
saving: false,
init() {
this.loadSettings();
},
async loadSettings() {
try {
const response = await fetch('/api/settings');
const data = await response.json();
if (response.ok) {
this.settings = {
watchdog_enabled: data.watchdog_enabled,
watchdog_idle_enabled: data.watchdog_idle_enabled,
watchdog_busy_enabled: data.watchdog_busy_enabled,
watchdog_idle_timeout: data.watchdog_idle_timeout || '15m',
watchdog_busy_timeout: data.watchdog_busy_timeout || '5m',
single_backend: data.single_backend,
parallel_backend_requests: data.parallel_backend_requests,
threads: data.threads || 0,
context_size: data.context_size || 0,
f16: data.f16 || false,
debug: data.debug || false,
cors: data.cors || false,
csrf: data.csrf || false,
cors_allow_origins: data.cors_allow_origins || '',
p2p_token: data.p2p_token || '',
p2p_network_id: data.p2p_network_id || '',
federated: data.federated || false,
autoload_galleries: data.autoload_galleries || false,
autoload_backend_galleries: data.autoload_backend_galleries || false,
galleries_json: JSON.stringify(data.galleries || [], null, 2),
backend_galleries_json: JSON.stringify(data.backend_galleries || [], null, 2),
api_keys_text: (data.api_keys || []).join('\n')
};
this.sourceInfo = data.source || 'default';
} else {
this.addNotification('Failed to load settings: ' + (data.error || 'Unknown error'), 'error');
}
} catch (error) {
console.error('Error loading settings:', error);
this.addNotification('Failed to load settings: ' + error.message, 'error');
}
},
updateWatchdogEnabled() {
if (!this.settings.watchdog_enabled) {
this.settings.watchdog_idle_enabled = false;
this.settings.watchdog_busy_enabled = false;
}
},
async saveSettings() {
if (this.saving) return;
this.saving = true;
try {
const payload = {};
// Only include changed values
if (this.settings.watchdog_enabled !== undefined) {
payload.watchdog_enabled = this.settings.watchdog_enabled;
}
if (this.settings.watchdog_idle_enabled !== undefined) {
payload.watchdog_idle_enabled = this.settings.watchdog_idle_enabled;
}
if (this.settings.watchdog_busy_enabled !== undefined) {
payload.watchdog_busy_enabled = this.settings.watchdog_busy_enabled;
}
if (this.settings.watchdog_idle_timeout) {
payload.watchdog_idle_timeout = this.settings.watchdog_idle_timeout;
}
if (this.settings.watchdog_busy_timeout) {
payload.watchdog_busy_timeout = this.settings.watchdog_busy_timeout;
}
if (this.settings.single_backend !== undefined) {
payload.single_backend = this.settings.single_backend;
}
if (this.settings.parallel_backend_requests !== undefined) {
payload.parallel_backend_requests = this.settings.parallel_backend_requests;
}
if (this.settings.threads !== undefined) {
payload.threads = parseInt(this.settings.threads) || 0;
}
if (this.settings.context_size !== undefined) {
payload.context_size = parseInt(this.settings.context_size) || 0;
}
if (this.settings.f16 !== undefined) {
payload.f16 = this.settings.f16;
}
if (this.settings.debug !== undefined) {
payload.debug = this.settings.debug;
}
if (this.settings.cors !== undefined) {
payload.cors = this.settings.cors;
}
if (this.settings.csrf !== undefined) {
payload.csrf = this.settings.csrf;
}
if (this.settings.cors_allow_origins !== undefined) {
payload.cors_allow_origins = this.settings.cors_allow_origins;
}
if (this.settings.p2p_token !== undefined) {
payload.p2p_token = this.settings.p2p_token;
}
if (this.settings.p2p_network_id !== undefined) {
payload.p2p_network_id = this.settings.p2p_network_id;
}
if (this.settings.federated !== undefined) {
payload.federated = this.settings.federated;
}
if (this.settings.autoload_galleries !== undefined) {
payload.autoload_galleries = this.settings.autoload_galleries;
}
if (this.settings.autoload_backend_galleries !== undefined) {
payload.autoload_backend_galleries = this.settings.autoload_backend_galleries;
}
// Parse API keys from text (split by newline or comma, trim whitespace, filter empty)
if (this.settings.api_keys_text !== undefined) {
const keys = this.settings.api_keys_text
.split(/[\n,]/)
.map(k => k.trim())
.filter(k => k.length > 0);
if (keys.length > 0) {
payload.api_keys = keys;
} else {
// If empty, send empty array to clear keys
payload.api_keys = [];
}
}
// Parse galleries JSON
if (this.settings.galleries_json) {
try {
payload.galleries = JSON.parse(this.settings.galleries_json);
} catch (e) {
this.addNotification('Invalid galleries JSON: ' + e.message, 'error');
this.saving = false;
return;
}
}
if (this.settings.backend_galleries_json) {
try {
payload.backend_galleries = JSON.parse(this.settings.backend_galleries_json);
} catch (e) {
this.addNotification('Invalid backend galleries JSON: ' + e.message, 'error');
this.saving = false;
return;
}
}
const response = await fetch('/api/settings', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(payload)
});
const data = await response.json();
if (response.ok && data.success) {
this.addNotification('Settings saved successfully!', 'success');
// Reload settings to get updated source info
setTimeout(() => this.loadSettings(), 1000);
} else {
this.addNotification('Failed to save settings: ' + (data.error || 'Unknown error'), 'error');
}
} catch (error) {
console.error('Error saving settings:', error);
this.addNotification('Failed to save settings: ' + error.message, 'error');
} finally {
this.saving = false;
}
},
addNotification(message, type = 'success') {
const id = Date.now();
this.notifications.push({ id, message, type });
setTimeout(() => this.dismissNotification(id), 5000);
},
dismissNotification(id) {
this.notifications = this.notifications.filter(n => n.id !== id);
}
}
}
</script>
</body>
</html>

View File

@@ -34,14 +34,15 @@
<div class="border-b border-[#1E293B] p-5">
<div class="flex flex-col sm:flex-row items-center justify-between gap-4">
<!-- Model Selection -->
<div class="flex items-center" x-data="{ link : '{{ if .Model }}tts/{{.Model}}{{ end }}' }">
<div class="flex items-center">
<label for="model-select" class="mr-3 text-[#94A3B8] font-medium">
<i class="fas fa-microphone-lines text-[#8B5CF6] mr-2"></i>Model:
</label>
<select
<select
id="model-select"
x-model="link"
@change="window.location = link"
x-data="{ link : '' }"
x-model="link"
x-init="$watch('link', value => window.location = link)"
class="bg-[#101827] text-[#E5E7EB] border border-[#1E293B] focus:border-[#8B5CF6] focus:ring-2 focus:ring-[#8B5CF6]/50 rounded-lg shadow-sm p-2.5 appearance-none"
>
<option value="" disabled class="text-[#94A3B8]">Select a model</option>

102
core/p2p/sync.go Normal file
View File

@@ -0,0 +1,102 @@
package p2p
import (
"context"
"slices"
"time"
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/application"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/edgevpn/pkg/node"
zlog "github.com/rs/zerolog/log"
)
func syncState(ctx context.Context, n *node.Node, app *application.Application) error {
zlog.Debug().Msg("[p2p-sync] Syncing state")
whatWeHave := []string{}
for _, model := range app.ModelConfigLoader().GetAllModelsConfigs() {
whatWeHave = append(whatWeHave, model.Name)
}
ledger, _ := n.Ledger()
currentData := ledger.CurrentData()
zlog.Debug().Msgf("[p2p-sync] Current data: %v", currentData)
data, exists := ledger.GetKey("shared_state", "models")
if !exists {
ledger.AnnounceUpdate(ctx, time.Minute, "shared_state", "models", whatWeHave)
zlog.Debug().Msgf("No models found in the ledger, announced our models: %v", whatWeHave)
}
models := []string{}
if err := data.Unmarshal(&models); err != nil {
zlog.Warn().Err(err).Msg("error unmarshalling models")
return nil
}
zlog.Debug().Msgf("[p2p-sync] Models that are present in this instance: %v\nModels that are in the ledger: %v", whatWeHave, models)
// Sync with our state
whatIsNotThere := []string{}
for _, model := range whatWeHave {
if !slices.Contains(models, model) {
whatIsNotThere = append(whatIsNotThere, model)
}
}
if len(whatIsNotThere) > 0 {
zlog.Debug().Msgf("[p2p-sync] Announcing our models: %v", append(models, whatIsNotThere...))
ledger.AnnounceUpdate(
ctx,
1*time.Minute,
"shared_state",
"models",
append(models, whatIsNotThere...),
)
}
// Check if we have a model that is not in our state, otherwise install it
for _, model := range models {
if slices.Contains(whatWeHave, model) {
zlog.Debug().Msgf("[p2p-sync] Model %s is already present in this instance", model)
continue
}
// we install model
zlog.Info().Msgf("[p2p-sync] Installing model which is not present in this instance: %s", model)
uuid, err := uuid.NewUUID()
if err != nil {
zlog.Error().Err(err).Msg("error generating UUID")
continue
}
app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
ID: uuid.String(),
GalleryElementName: model,
Galleries: app.ApplicationConfig().Galleries,
BackendGalleries: app.ApplicationConfig().BackendGalleries,
}
}
return nil
}
func Sync(ctx context.Context, n *node.Node, app *application.Application) error {
go func() {
for {
select {
case <-ctx.Done():
return
case <-time.After(1 * time.Minute):
if err := syncState(ctx, n, app); err != nil {
zlog.Error().Err(err).Msg("error syncing state")
}
}
}
}()
return nil
}

View File

@@ -85,7 +85,7 @@ func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel, galler
}
// Reload models
err = cl.LoadModelConfigsFromPath(systemState.Model.ModelsPath, g.appConfig.ToConfigLoaderOptions()...)
err = cl.LoadModelConfigsFromPath(systemState.Model.ModelsPath)
if err != nil {
return err
}

View File

@@ -5,6 +5,10 @@ import (
"encoding/json"
"errors"
"fmt"
"os"
"path"
"path/filepath"
"strings"
"time"
"github.com/google/uuid"
@@ -12,10 +16,12 @@ import (
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/gallery/importers"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
"github.com/mudler/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v2"
)
const (
@@ -28,59 +34,178 @@ const (
func InstallModels(ctx context.Context, galleryService *services.GalleryService, galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
// create an error that groups all errors
var err error
installBackend := func(modelPath string) error {
// Then load the model file, and read the backend
modelYAML, e := os.ReadFile(modelPath)
if e != nil {
log.Error().Err(e).Str("filepath", modelPath).Msg("error reading model definition")
return e
}
var model config.ModelConfig
if e := yaml.Unmarshal(modelYAML, &model); e != nil {
log.Error().Err(e).Str("filepath", modelPath).Msg("error unmarshalling model definition")
return e
}
if model.Backend == "" {
log.Debug().Str("filepath", modelPath).Msg("no backend found in model definition")
return nil
}
if err := gallery.InstallBackendFromGallery(ctx, backendGalleries, systemState, modelLoader, model.Backend, downloadStatus, false); err != nil {
log.Error().Err(err).Str("backend", model.Backend).Msg("error installing backend")
return err
}
return nil
}
for _, url := range models {
// Check if it's a model gallery, or print a warning
e, found := installModel(ctx, galleries, backendGalleries, url, systemState, modelLoader, downloadStatus, enforceScan, autoloadBackendGalleries)
if e != nil && found {
log.Error().Err(err).Msgf("[startup] failed installing model '%s'", url)
err = errors.Join(err, e)
} else if !found {
log.Debug().Msgf("[startup] model not found in the gallery '%s'", url)
// As a best effort, try to resolve the model from the remote library
// if it's not resolved we try with the other method below
if galleryService == nil {
return fmt.Errorf("cannot start autoimporter, not sure how to handle this uri")
}
uri := downloader.URI(url)
// TODO: we should just use the discoverModelConfig here and default to this.
modelConfig, discoverErr := importers.DiscoverModelConfig(url, json.RawMessage{})
if discoverErr != nil {
log.Error().Err(discoverErr).Msgf("[startup] failed to discover model config '%s'", url)
err = errors.Join(discoverErr, fmt.Errorf("failed to discover model config: %w", err))
continue
}
switch {
case uri.LooksLikeOCI():
log.Debug().Msgf("[startup] resolved OCI model to download: %s", url)
uuid, uuidErr := uuid.NewUUID()
if uuidErr != nil {
err = errors.Join(uuidErr, fmt.Errorf("failed to generate UUID: %w", uuidErr))
continue
}
// convert OCI image name to a file name.
ociName := strings.TrimPrefix(url, downloader.OCIPrefix)
ociName = strings.TrimPrefix(ociName, downloader.OllamaPrefix)
ociName = strings.ReplaceAll(ociName, "/", "__")
ociName = strings.ReplaceAll(ociName, ":", "__")
galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Req: gallery.GalleryModel{
Overrides: map[string]interface{}{},
},
ID: uuid.String(),
GalleryElementName: modelConfig.Name,
GalleryElement: &modelConfig,
BackendGalleries: backendGalleries,
}
var status *services.GalleryOpStatus
// wait for op to finish
for {
status = galleryService.GetStatus(uuid.String())
if status != nil && status.Processed {
break
// check if file exists
if _, e := os.Stat(filepath.Join(systemState.Model.ModelsPath, ociName)); errors.Is(e, os.ErrNotExist) {
modelDefinitionFilePath := filepath.Join(systemState.Model.ModelsPath, ociName)
e := uri.DownloadFile(modelDefinitionFilePath, "", 0, 0, func(fileName, current, total string, percent float64) {
utils.DisplayDownloadFunction(fileName, current, total, percent)
})
if e != nil {
log.Error().Err(e).Str("url", url).Str("filepath", modelDefinitionFilePath).Msg("error downloading model")
err = errors.Join(err, e)
}
time.Sleep(1 * time.Second)
}
if status.Error != nil {
log.Error().Err(status.Error).Msgf("[startup] failed to import model '%s' from '%s'", modelConfig.Name, url)
return status.Error
log.Info().Msgf("[startup] installed model from OCI repository: %s", ociName)
case uri.LooksLikeURL():
log.Debug().Msgf("[startup] downloading %s", url)
// Extract filename from URL
fileName, e := uri.FilenameFromUrl()
if e != nil {
log.Warn().Err(e).Str("url", url).Msg("error extracting filename from URL")
err = errors.Join(err, e)
continue
}
log.Info().Msgf("[startup] imported model '%s' from '%s'", modelConfig.Name, url)
modelPath := filepath.Join(systemState.Model.ModelsPath, fileName)
if e := utils.VerifyPath(fileName, modelPath); e != nil {
log.Error().Err(e).Str("filepath", modelPath).Msg("error verifying path")
err = errors.Join(err, e)
continue
}
// check if file exists
if _, e := os.Stat(modelPath); errors.Is(e, os.ErrNotExist) {
e := uri.DownloadFile(modelPath, "", 0, 0, func(fileName, current, total string, percent float64) {
utils.DisplayDownloadFunction(fileName, current, total, percent)
})
if e != nil {
log.Error().Err(e).Str("url", url).Str("filepath", modelPath).Msg("error downloading model")
err = errors.Join(err, e)
}
}
// Check if we have the backend installed
if autoloadBackendGalleries && path.Ext(modelPath) == YAML_EXTENSION {
if err := installBackend(modelPath); err != nil {
log.Error().Err(err).Str("filepath", modelPath).Msg("error installing backend")
}
}
default:
if _, e := os.Stat(url); e == nil {
log.Debug().Msgf("[startup] resolved local model: %s", url)
// copy to modelPath
md5Name := utils.MD5(url)
modelYAML, e := os.ReadFile(url)
if e != nil {
log.Error().Err(e).Str("filepath", url).Msg("error reading model definition")
err = errors.Join(err, e)
continue
}
modelDefinitionFilePath := filepath.Join(systemState.Model.ModelsPath, md5Name) + YAML_EXTENSION
if e := os.WriteFile(modelDefinitionFilePath, modelYAML, 0600); e != nil {
log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error loading model: %s")
err = errors.Join(err, e)
}
// Check if we have the backend installed
if autoloadBackendGalleries && path.Ext(modelDefinitionFilePath) == YAML_EXTENSION {
if err := installBackend(modelDefinitionFilePath); err != nil {
log.Error().Err(err).Str("filepath", modelDefinitionFilePath).Msg("error installing backend")
}
}
} else {
// Check if it's a model gallery, or print a warning
e, found := installModel(ctx, galleries, backendGalleries, url, systemState, modelLoader, downloadStatus, enforceScan, autoloadBackendGalleries)
if e != nil && found {
log.Error().Err(err).Msgf("[startup] failed installing model '%s'", url)
err = errors.Join(err, e)
} else if !found {
log.Warn().Msgf("[startup] failed resolving model '%s'", url)
if galleryService == nil {
err = errors.Join(err, fmt.Errorf("cannot start autoimporter, not sure how to handle this uri"))
continue
}
// TODO: we should just use the discoverModelConfig here and default to this.
modelConfig, discoverErr := importers.DiscoverModelConfig(url, json.RawMessage{})
if discoverErr != nil {
err = errors.Join(discoverErr, fmt.Errorf("failed to discover model config: %w", err))
continue
}
uuid, uuidErr := uuid.NewUUID()
if uuidErr != nil {
err = errors.Join(uuidErr, fmt.Errorf("failed to generate UUID: %w", uuidErr))
continue
}
galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Req: gallery.GalleryModel{
Overrides: map[string]interface{}{},
},
ID: uuid.String(),
GalleryElementName: modelConfig.Name,
GalleryElement: &modelConfig,
BackendGalleries: backendGalleries,
}
var status *services.GalleryOpStatus
// wait for op to finish
for {
status = galleryService.GetStatus(uuid.String())
if status != nil && status.Processed {
break
}
time.Sleep(1 * time.Second)
}
if status.Error != nil {
return status.Error
}
log.Info().Msgf("[startup] imported model '%s' from '%s'", modelConfig.Name, url)
}
}
}
}
return err

View File

@@ -7,7 +7,6 @@ import (
"path/filepath"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/services"
. "github.com/mudler/LocalAI/core/startup"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
@@ -20,11 +19,8 @@ var _ = Describe("Preload test", func() {
var tmpdir string
var systemState *system.SystemState
var ml *model.ModelLoader
var ctx context.Context
var cancel context.CancelFunc
BeforeEach(func() {
ctx, cancel = context.WithCancel(context.Background())
var err error
tmpdir, err = os.MkdirTemp("", "")
Expect(err).ToNot(HaveOccurred())
@@ -33,24 +29,13 @@ var _ = Describe("Preload test", func() {
ml = model.NewModelLoader(systemState, true)
})
AfterEach(func() {
cancel()
})
Context("Preloading from strings", func() {
It("loads from embedded full-urls", func() {
url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
fileName := fmt.Sprintf("%s.yaml", "phi-2")
galleryService := services.NewGalleryService(&config.ApplicationConfig{
SystemState: systemState,
}, ml)
galleryService.Start(ctx, config.NewModelConfigLoader(tmpdir), systemState)
InstallModels(context.TODO(), nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
err := InstallModels(ctx, galleryService, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, func(s1, s2, s3 string, f float64) {
fmt.Println(s1, s2, s3, f)
}, url)
Expect(err).ToNot(HaveOccurred())
resultFile := filepath.Join(tmpdir, fileName)
content, err := os.ReadFile(resultFile)
@@ -62,22 +47,13 @@ var _ = Describe("Preload test", func() {
url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")
galleryService := services.NewGalleryService(&config.ApplicationConfig{
SystemState: systemState,
}, ml)
galleryService.Start(ctx, config.NewModelConfigLoader(tmpdir), systemState)
err := InstallModels(ctx, galleryService, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, func(s1, s2, s3 string, f float64) {
fmt.Println(s1, s2, s3, f)
}, url)
err := InstallModels(context.TODO(), nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
Expect(err).ToNot(HaveOccurred())
resultFile := filepath.Join(tmpdir, fileName)
dirs, err := os.ReadDir(tmpdir)
Expect(err).ToNot(HaveOccurred())
_, err = os.Stat(resultFile)
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("%+v", dirs))
Expect(err).ToNot(HaveOccurred())
})
})
})

208
docs/config.toml Normal file
View File

@@ -0,0 +1,208 @@
baseURL = "https://localai.io/"
languageCode = "en-GB"
contentDir = "content"
enableEmoji = true
enableGitInfo = true # N.B. .GitInfo does not currently function with git submodule content directories
defaultContentLanguage = 'en'
[markup]
defaultMarkdownHandler = "goldmark"
[markup.tableOfContents]
endLevel = 3
startLevel = 1
[markup.goldmark]
[markup.goldmark.renderer]
unsafe = true # https://jdhao.github.io/2019/12/29/hugo_html_not_shown/
# [markup.highlight]
# codeFences = false # disables Hugo's default syntax highlighting
# [markup.goldmark.parser]
# [markup.goldmark.parser.attribute]
# block = true
# title = true
[params]
google_fonts = [
["Inter", "300, 400, 600, 700"],
["Fira Code", "500, 700"]
]
sans_serif_font = "Inter" # Default is System font
secondary_font = "Inter" # Default is System font
mono_font = "Fira Code" # Default is System font
[params.footer]
copyright = "© 2023-2025 <a href='https://mudler.pm' target=_blank>Ettore Di Giacinto</a>"
version = true # includes git commit info
[params.social]
github = "mudler/LocalAI" # YOUR_GITHUB_ID or YOUR_GITHUB_URL
twitter = "LocalAI_API" # YOUR_TWITTER_ID
dicord = "uJAeKSAGDy"
# instagram = "colinwilson" # YOUR_INSTAGRAM_ID
rss = true # show rss icon with link
[params.docs] # Parameters for the /docs 'template'
logo = "https://raw.githubusercontent.com/mudler/LocalAI/refs/heads/master/core/http/static/logo.png"
logo_text = ""
title = "LocalAI" # default html title for documentation pages/sections
pathName = "docs" # path name for documentation site | default "docs"
# themeColor = "cyan" # (optional) - Set theme accent colour. Options include: blue (default), green, red, yellow, emerald, cardinal, magenta, cyan
darkMode = true # enable dark mode option? default false
prism = true # enable syntax highlighting via Prism
prismTheme = "solarized-light" # (optional) - Set theme for PrismJS. Options include: lotusdocs (default), solarized-light, twilight, lucario
# gitinfo
repoURL = "https://github.com/mudler/LocalAI" # Git repository URL for your site [support for GitHub, GitLab, and BitBucket]
repoBranch = "master"
editPage = true # enable 'Edit this page' feature - default false
lastMod = true # enable 'Last modified' date on pages - default false
lastModRelative = true # format 'Last modified' time as relative - default true
sidebarIcons = true # enable sidebar icons? default false
breadcrumbs = true # default is true
backToTop = true # enable back-to-top button? default true
# ToC
toc = true # enable table of contents? default is true
tocMobile = true # enable table of contents in mobile view? default is true
scrollSpy = true # enable scrollspy on ToC? default is true
# front matter
descriptions = true # enable front matter descriptions under content title?
titleIcon = true # enable front matter icon title prefix? default is false
# content navigation
navDesc = true # include front matter descriptions in Prev/Next navigation cards
navDescTrunc = 30 # Number of characters by which to truncate the Prev/Next descriptions
listDescTrunc = 100 # Number of characters by which to truncate the list card description
# Link behaviour
intLinkTooltip = true # Enable a tooltip for internal links that displays info about the destination? default false
# extLinkNewTab = false # Open external links in a new Tab? default true
# logoLinkURL = "" # Set a custom URL destination for the top header logo link.
[params.flexsearch] # Parameters for FlexSearch
enabled = true
# tokenize = "full"
# optimize = true
# cache = 100
# minQueryChar = 3 # default is 0 (disabled)
# maxResult = 5 # default is 5
# searchSectionsIndex = []
[params.docsearch] # Parameters for DocSearch
# appID = "" # Algolia Application ID
# apiKey = "" # Algolia Search-Only API (Public) Key
# indexName = "" # Index Name to perform search on (or set env variable HUGO_PARAM_DOCSEARCH_indexName)
[params.analytics] # Parameters for Analytics (Google, Plausible)
# google = "G-XXXXXXXXXX" # Replace with your Google Analytics ID
# plausibleURL = "/docs/s" # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleURL)
# plausibleAPI = "/docs/s" # optional - (or set via env variable HUGO_PARAM_ANALYTICS_plausibleAPI)
# plausibleDomain = "" # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleDomain)
# [params.feedback]
# enabled = true
# emoticonTpl = true
# eventDest = ["plausible","google"]
# emoticonEventName = "Feedback"
# positiveEventName = "Positive Feedback"
# negativeEventName = "Negative Feedback"
# positiveFormTitle = "What did you like?"
# negativeFormTitle = "What went wrong?"
# successMsg = "Thank you for helping to improve Lotus Docs' documentation!"
# errorMsg = "Sorry! There was an error while attempting to submit your feedback!"
# positiveForm = [
# ["Accurate", "Accurately describes the feature or option."],
# ["Solved my problem", "Helped me resolve an issue."],
# ["Easy to understand", "Easy to follow and comprehend."],
# ["Something else"]
# ]
# negativeForm = [
# ["Inaccurate", "Doesn't accurately describe the feature or option."],
# ["Couldn't find what I was looking for", "Missing important information."],
# ["Hard to understand", "Too complicated or unclear."],
# ["Code sample errors", "One or more code samples are incorrect."],
# ["Something else"]
# ]
[menu]
[[menu.primary]]
name = "Docs"
url = "docs/"
identifier = "docs"
weight = 10
[[menu.primary]]
name = "Discord"
url = "https://discord.gg/uJAeKSAGDy"
identifier = "discord"
weight = 20
[languages]
[languages.en]
title = "LocalAI"
languageName = "English"
weight = 10
# [languages.fr]
# title = "LocalAI documentation"
# languageName = "Français"
# contentDir = "content/fr"
# weight = 20
# [languages.de]
# title = "LocalAI documentation"
# languageName = "Deutsch"
# contentDir = "content/de"
# weight = 30
# mounts are only needed in this showcase to access the publicly available screenshots;
# remove this section if you don't need further mounts
[module]
replacements = "github.com/colinwilson/lotusdocs -> lotusdocs"
[[module.mounts]]
source = 'archetypes'
target = 'archetypes'
[[module.mounts]]
source = 'assets'
target = 'assets'
[[module.mounts]]
source = 'content'
target = 'content'
[[module.mounts]]
source = 'data'
target = 'data'
[[module.mounts]]
source = 'i18n'
target = 'i18n'
[[module.mounts]]
source = '../images'
target = 'static/images'
[[module.mounts]]
source = 'layouts'
target = 'layouts'
[[module.mounts]]
source = 'static'
target = 'static'
# uncomment line below for temporary local development of module
# or when using a 'theme' as a git submodule
[[module.imports]]
path = "github.com/colinwilson/lotusdocs"
disable = false
[[module.imports]]
path = "github.com/gohugoio/hugo-mod-bootstrap-scss/v5"
disable = false

View File

@@ -1,61 +0,0 @@
+++
title = "LocalAI"
description = "The free, OpenAI, Anthropic alternative. Your All-in-One Complete AI Stack"
type = "home"
+++
**The free, OpenAI, Anthropic alternative. Your All-in-One Complete AI Stack** - Run powerful language models, autonomous agents, and document intelligence **locally** on your hardware.
**No cloud, no limits, no compromise.**
{{% notice tip %}}
**[⭐ Star us on GitHub](https://github.com/mudler/LocalAI)** - 33.3k+ stars and growing!
**Drop-in replacement for OpenAI API** - modular suite of tools that work seamlessly together or independently.
Start with **[LocalAI](https://localai.io)**'s OpenAI-compatible API, extend with **[LocalAGI](https://github.com/mudler/LocalAGI)**'s autonomous agents, and enhance with **[LocalRecall](https://github.com/mudler/LocalRecall)**'s semantic search - all running locally on your hardware.
**Open Source** MIT Licensed.
{{% /notice %}}
## Why Choose LocalAI?
**OpenAI API Compatible** - Run AI models locally with our modular ecosystem. From language models to autonomous agents and semantic search, build your complete AI stack without the cloud.
### Key Features
- **LLM Inferencing**: LocalAI is a free, **Open Source** OpenAI alternative. Run **LLMs**, generate **images**, **audio** and more **locally** with consumer grade hardware.
- **Agentic-first**: Extend LocalAI with LocalAGI, an autonomous AI agent platform that runs locally, no coding required. Build and deploy autonomous agents with ease.
- **Memory and Knowledge base**: Extend LocalAI with LocalRecall, A local rest api for semantic search and memory management. Perfect for AI applications.
- **OpenAI Compatible**: Drop-in replacement for OpenAI API. Compatible with existing applications and libraries.
- **No GPU Required**: Run on consumer grade hardware. No need for expensive GPUs or cloud services.
- **Multiple Models**: Support for various model families including LLMs, image generation, and audio models. Supports multiple backends for inferencing.
- **Privacy Focused**: Keep your data local. No data leaves your machine, ensuring complete privacy.
- **Easy Setup**: Simple installation and configuration. Get started in minutes with Binaries installation, Docker, Podman, Kubernetes or local installation.
- **Community Driven**: Active community support and regular updates. Contribute and help shape the future of LocalAI.
## Quick Start
**Docker is the recommended installation method** for most users:
```bash
docker run -p 8080:8080 --name local-ai -ti localai/localai:latest
```
For complete installation instructions, see the [Installation guide](/installation/).
## Get Started
1. **[Install LocalAI](/installation/)** - Choose your installation method (Docker recommended)
2. **[Quickstart Guide](/getting-started/quickstart/)** - Get started quickly after installation
3. **[Install and Run Models](/getting-started/models/)** - Learn how to work with AI models
4. **[Try It Out](/getting-started/try-it-out/)** - Explore examples and use cases
## Learn More
- [Explore available models](https://models.localai.io)
- [Model compatibility](/model-compatibility/)
- [Try out examples](https://github.com/mudler/LocalAI-examples)
- [Join the community](https://discord.gg/uJAeKSAGDy)
- [Check the LocalAI Github repository](https://github.com/mudler/LocalAI)
- [Check the LocalAGI Github repository](https://github.com/mudler/LocalAGI)

View File

@@ -1,12 +0,0 @@
---
weight: 20
title: "Advanced"
description: "Advanced usage"
type: chapter
icon: settings
lead: ""
date: 2020-10-06T08:49:15+00:00
lastmod: 2020-10-06T08:49:15+00:00
draft: false
images: []
---

View File

@@ -0,0 +1,38 @@
+++
disableToc = false
title = "Advanced Configuration"
weight = 20
icon = "settings"
description = "Advanced configuration and optimization for LocalAI"
+++
This section covers advanced configuration, optimization, and fine-tuning options for LocalAI.
## Configuration
- **[Model Configuration]({{% relref "docs/advanced/model-configuration" %}})** - Complete model configuration reference
- **[Advanced Usage]({{% relref "docs/advanced/advanced-usage" %}})** - Advanced configuration options
- **[Installer Options]({{% relref "docs/advanced/installer" %}})** - Installer configuration and options
## Performance & Optimization
- **[Performance Tuning]({{% relref "docs/advanced/performance-tuning" %}})** - Optimize for maximum performance
- **[VRAM Management]({{% relref "docs/advanced/vram-management" %}})** - Manage GPU memory efficiently
## Specialized Topics
- **[Fine-tuning]({{% relref "docs/advanced/fine-tuning" %}})** - Fine-tune models for LocalAI
## Before You Begin
Make sure you have:
- LocalAI installed and running
- Basic understanding of YAML configuration
- Familiarity with your system's resources
## Related Documentation
- [Getting Started]({{% relref "docs/getting-started" %}}) - Installation and basics
- [Model Configuration]({{% relref "docs/advanced/model-configuration" %}}) - Configuration reference
- [Troubleshooting]({{% relref "docs/troubleshooting" %}}) - Common issues
- [Performance Tuning]({{% relref "docs/advanced/performance-tuning" %}}) - Optimization guide

View File

@@ -27,7 +27,7 @@ template:
chat: chat
```
For a complete reference of all available configuration options, see the [Model Configuration]({{%relref "advanced/model-configuration" %}}) page.
For a complete reference of all available configuration options, see the [Model Configuration]({{%relref "docs/advanced/model-configuration" %}}) page.
**Configuration File Locations:**
@@ -108,6 +108,7 @@ Similarly it can be specified a path to a YAML configuration file containing a l
```yaml
- url: https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml
name: gpt4all-j
# ...
```
### Automatic prompt caching
@@ -118,6 +119,7 @@ To enable prompt caching, you can control the settings in the model config YAML
```yaml
# Enable prompt caching
prompt_cache_path: "cache"
prompt_cache_all: true
@@ -129,18 +131,20 @@ prompt_cache_all: true
By default LocalAI will try to autoload the model by trying all the backends. This might work for most of models, but some of the backends are NOT configured to autoload.
The available backends are listed in the [model compatibility table]({{%relref "reference/compatibility-table" %}}).
The available backends are listed in the [model compatibility table]({{%relref "docs/reference/compatibility-table" %}}).
In order to specify a backend for your models, create a model config file in your `models` directory specifying the backend:
```yaml
name: gpt-3.5-turbo
# Default model parameters
parameters:
# Relative to the models path
model: ...
backend: llama-stable
# ...
```
### Connect external backends
@@ -179,6 +183,7 @@ make -C backend/python/vllm
When LocalAI runs in a container,
there are additional environment variables available that modify the behavior of LocalAI on startup:
{{< table "table-responsive" >}}
| Environment variable | Default | Description |
|----------------------------|---------|------------------------------------------------------------------------------------------------------------|
| `REBUILD` | `false` | Rebuild LocalAI on startup |
@@ -188,17 +193,20 @@ there are additional environment variables available that modify the behavior of
| `EXTRA_BACKENDS` | | A space separated list of backends to prepare. For example `EXTRA_BACKENDS="backend/python/diffusers backend/python/transformers"` prepares the python environment on start |
| `DISABLE_AUTODETECT` | `false` | Disable autodetect of CPU flagset on start |
| `LLAMACPP_GRPC_SERVERS` | | A list of llama.cpp workers to distribute the workload. For example `LLAMACPP_GRPC_SERVERS="address1:port,address2:port"` |
{{< /table >}}
Here is how to configure these variables:
```bash
# Option 1: command line
docker run --env REBUILD=true localai
# Option 2: set within an env file
docker run --env-file .env localai
```
### CLI Parameters
For a complete reference of all CLI parameters, environment variables, and command-line options, see the [CLI Reference]({{%relref "reference/cli-reference" %}}) page.
For a complete reference of all CLI parameters, environment variables, and command-line options, see the [CLI Reference]({{%relref "docs/reference/cli-reference" %}}) page.
You can control LocalAI with command line arguments to specify a binding address, number of threads, model paths, and many other options. Any command line parameter can be specified via an environment variable.
@@ -274,17 +282,20 @@ A list of the environment variable that tweaks parallelism is the following:
### Python backends GRPC max workers
### Default number of workers for GRPC Python backends.
### This actually controls wether a backend can process multiple requests or not.
# PYTHON_GRPC_MAX_WORKERS=1
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
# LLAMACPP_PARALLEL=1
### Enable to run parallel requests
# LOCALAI_PARALLEL_REQUESTS=true
```
Note that, for llama.cpp you need to set accordingly `LLAMACPP_PARALLEL` to the number of parallel processes your GPU/CPU can handle. For python-based backends (like vLLM) you can set `PYTHON_GRPC_MAX_WORKERS` to the number of parallel requests.
### VRAM and Memory Management
For detailed information on managing VRAM when running multiple models, see the dedicated [VRAM and Memory Management]({{%relref "advanced/vram-management" %}}) page.
For detailed information on managing VRAM when running multiple models, see the dedicated [VRAM and Memory Management]({{%relref "docs/advanced/vram-management" %}}) page.
### Disable CPU flagset auto detection in llama.cpp

View File

@@ -5,9 +5,9 @@ title = "Fine-tuning LLMs for text generation"
weight = 22
+++
{{% notice note %}}
{{% alert note %}}
Section under construction
{{% /notice %}}
{{% /alert %}}
This section covers how to fine-tune a language model for text generation and consume it in LocalAI.
@@ -74,10 +74,12 @@ Prepare a dataset, and upload it to your Google Drive in case you are using the
### Install dependencies
```bash
# Install axolotl and dependencies
git clone https://github.com/OpenAccess-AI-Collective/axolotl && pushd axolotl && git checkout 797f3dd1de8fd8c0eafbd1c9fdb172abd9ff840a && popd #0.3.0
pip install packaging
pushd axolotl && pip install -e '.[flash-attn,deepspeed]' && popd
# https://github.com/oobabooga/text-generation-webui/issues/4238
pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
```
@@ -94,16 +96,19 @@ We will need to configure axolotl. In this example is provided a file to use `ax
If you have a big dataset, you can pre-tokenize it to speedup the fine-tuning process:
```bash
# Optional pre-tokenize (run only if big dataset)
python -m axolotl.cli.preprocess axolotl.yaml
```
Now we are ready to start the fine-tuning process:
```bash
# Fine-tune
accelerate launch -m axolotl.cli.train axolotl.yaml
```
After we have finished the fine-tuning, we merge the Lora base with the model:
```bash
# Merge lora
python3 -m axolotl.cli.merge_lora axolotl.yaml --lora_model_dir="./qlora-out" --load_in_8bit=False --load_in_4bit=False
```
@@ -111,11 +116,17 @@ And we convert it to the gguf format that LocalAI can consume:
```bash
# Convert to gguf
git clone https://github.com/ggerganov/llama.cpp.git
pushd llama.cpp && cmake -B build -DGGML_CUDA=ON && cmake --build build --config Release && popd
# We need to convert the pytorch model into ggml for quantization
# It crates 'ggml-model-f16.bin' in the 'merged' directory.
pushd llama.cpp && python3 convert_hf_to_gguf.py ../qlora-out/merged && popd
# Start off by making a basic q4_0 4-bit quantization.
# It's important to have 'ggml' in the name of the quant for some
# software to recognize it's file format.
pushd llama.cpp/build/bin && ./llama-quantize ../../../qlora-out/merged/Merged-33B-F16.gguf \
../../../custom-model-q4_0.gguf q4_0

View File

@@ -0,0 +1,52 @@
+++
disableToc = false
title = "Installer options"
weight = 24
+++
An installation script is available for quick and hassle-free installations, streamlining the setup process for new users.
Can be used with the following command:
```bash
curl https://localai.io/install.sh | sh
```
Installation can be configured with Environment variables, for example:
```bash
curl https://localai.io/install.sh | VAR=value sh
```
List of the Environment Variables:
| Environment Variable | Description |
|----------------------|--------------------------------------------------------------|
| **DOCKER_INSTALL** | Set to "true" to enable the installation of Docker images. |
| **USE_AIO** | Set to "true" to use the all-in-one LocalAI Docker image. |
| **USE_VULKAN** | Set to "true" to use Vulkan GPU support. |
| **API_KEY** | Specify an API key for accessing LocalAI, if required. |
| **PORT** | Specifies the port on which LocalAI will run (default is 8080). |
| **THREADS** | Number of processor threads the application should use. Defaults to the number of logical cores minus one. |
| **VERSION** | Specifies the version of LocalAI to install. Defaults to the latest available version. |
| **MODELS_PATH** | Directory path where LocalAI models are stored (default is /usr/share/local-ai/models). |
| **P2P_TOKEN** | Token to use for the federation or for starting workers see [documentation]({{%relref "docs/features/distributed_inferencing" %}}) |
| **WORKER** | Set to "true" to make the instance a worker (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}})) |
| **FEDERATED** | Set to "true" to share the instance with the federation (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}})) |
| **FEDERATED_SERVER** | Set to "true" to run the instance as a federation server which forwards requests to the federation (p2p token is required see [documentation]({{%relref "docs/features/distributed_inferencing" %}})) |
## Image Selection
The installer will automatically detect your GPU and select the appropriate image. By default, it uses the standard images without extra Python dependencies. You can customize the image selection using the following environment variables:
- `USE_AIO=true`: Use all-in-one images that include all dependencies
- `USE_VULKAN=true`: Use Vulkan GPU support instead of vendor-specific GPU support
## Uninstallation
To uninstall, run:
```
curl https://localai.io/install.sh | sh -s -- --uninstall
```
We are looking into improving the installer, and as this is a first iteration any feedback is welcome! Open up an [issue](https://github.com/mudler/LocalAI/issues/new/choose) if something doesn't work for you!

View File

@@ -498,7 +498,7 @@ feature_flags:
## Related Documentation
- See [Advanced Usage]({{%relref "advanced/advanced-usage" %}}) for other configuration options
- See [Prompt Templates]({{%relref "advanced/advanced-usage#prompt-templates" %}}) for template examples
- See [CLI Reference]({{%relref "reference/cli-reference" %}}) for command-line options
- See [Advanced Usage]({{%relref "docs/advanced/advanced-usage" %}}) for other configuration options
- See [Prompt Templates]({{%relref "docs/advanced/advanced-usage#prompt-templates" %}}) for template examples
- See [CLI Reference]({{%relref "docs/reference/cli-reference" %}}) for command-line options

View File

@@ -0,0 +1,344 @@
+++
disableToc = false
title = "Performance Tuning"
weight = 22
icon = "speed"
description = "Optimize LocalAI for maximum performance"
+++
This guide covers techniques to optimize LocalAI performance for your specific hardware and use case.
## Performance Metrics
Before optimizing, establish baseline metrics:
- **Tokens per second**: Measure inference speed
- **Memory usage**: Monitor RAM and VRAM
- **Latency**: Time to first token and total response time
- **Throughput**: Requests per second
Enable debug mode to see performance stats:
```bash
DEBUG=true local-ai
```
Look for output like:
```
llm_load_tensors: tok/s: 45.23
```
## CPU Optimization
### Thread Configuration
Match threads to CPU cores:
```yaml
# Model configuration
threads: 4 # For 4-core CPU
```
**Guidelines**:
- Use number of physical cores (not hyperthreads)
- Leave 1-2 cores for system
- Too many threads can hurt performance
### CPU Instructions
Enable appropriate CPU instructions:
```bash
# Check available instructions
cat /proc/cpuinfo | grep flags
# Build with optimizations
CMAKE_ARGS="-DGGML_AVX2=ON -DGGML_AVX512=ON" make build
```
### NUMA Optimization
For multi-socket systems:
```yaml
numa: true
```
### Memory Mapping
Enable memory mapping for faster model loading:
```yaml
mmap: true
mmlock: false # Set to true to lock in memory (faster but uses more RAM)
```
## GPU Optimization
### Layer Offloading
Offload as many layers as GPU memory allows:
```yaml
gpu_layers: 35 # Adjust based on GPU memory
f16: true # Use FP16 for better performance
```
**Finding optimal layers**:
1. Start with 20 layers
2. Monitor GPU memory: `nvidia-smi` or `rocm-smi`
3. Gradually increase until near memory limit
4. For maximum performance, offload all layers if possible
### Batch Processing
GPU excels at batch processing. Process multiple requests together when possible.
### Mixed Precision
Use FP16 when supported:
```yaml
f16: true
```
## Model Optimization
### Quantization
Choose appropriate quantization:
| Quantization | Speed | Quality | Memory | Use Case |
|-------------|-------|---------|--------|----------|
| Q8_0 | Slowest | Highest | Most | Maximum quality |
| Q6_K | Slow | Very High | High | High quality |
| Q4_K_M | Medium | High | Medium | **Recommended** |
| Q4_K_S | Fast | Medium | Low | Balanced |
| Q2_K | Fastest | Lower | Least | Speed priority |
### Context Size
Reduce context size for faster inference:
```yaml
context_size: 2048 # Instead of 4096 or 8192
```
**Trade-off**: Smaller context = faster but less conversation history
### Model Selection
Choose models appropriate for your hardware:
- **Small systems (4GB RAM)**: 1-3B parameter models
- **Medium systems (8-16GB RAM)**: 3-7B parameter models
- **Large systems (32GB+ RAM)**: 7B+ parameter models
## Configuration Optimizations
### Sampling Parameters
Optimize sampling for speed:
```yaml
parameters:
temperature: 0.7
top_p: 0.9
top_k: 40
mirostat: 0 # Disable for speed (enabled by default)
```
**Note**: Disabling mirostat improves speed but may reduce quality.
### Prompt Caching
Enable prompt caching for repeated queries:
```yaml
prompt_cache_path: "cache"
prompt_cache_all: true
```
### Parallel Requests
LocalAI supports parallel requests. Configure appropriately:
```yaml
# In model config
parallel_requests: 4 # Adjust based on hardware
```
## Storage Optimization
### Use SSD
Always use SSD for model storage:
- HDD: Very slow model loading
- SSD: Fast loading, better performance
### Disable MMAP on HDD
If stuck with HDD:
```yaml
mmap: false # Loads entire model into RAM
```
### Model Location
Store models on fastest storage:
- Local SSD: Best performance
- Network storage: Slower, but allows sharing
- External drive: Slowest
## System-Level Optimizations
### Process Priority
Increase process priority (Linux):
```bash
nice -n -10 local-ai
```
### CPU Governor
Set CPU to performance mode (Linux):
```bash
# Check current governor
cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
# Set to performance
echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
```
### Disable Swapping
Prevent swapping for better performance:
```bash
# Linux
sudo swapoff -a
# Or set swappiness to 0
echo 0 | sudo tee /proc/sys/vm/swappiness
```
### Memory Allocation
For large models, consider huge pages (Linux):
```bash
# Allocate huge pages
echo 1024 | sudo tee /proc/sys/vm/nr_hugepages
```
## Benchmarking
### Measure Performance
Create a benchmark script:
```python
import time
import requests
start = time.time()
response = requests.post(
"http://localhost:8080/v1/chat/completions",
json={
"model": "gpt-4",
"messages": [{"role": "user", "content": "Hello"}]
}
)
elapsed = time.time() - start
tokens = response.json()["usage"]["completion_tokens"]
tokens_per_second = tokens / elapsed
print(f"Time: {elapsed:.2f}s")
print(f"Tokens: {tokens}")
print(f"Speed: {tokens_per_second:.2f} tok/s")
```
### Compare Configurations
Test different configurations:
1. Baseline: Default settings
2. Optimized: Your optimizations
3. Measure: Tokens/second, latency, memory
### Load Testing
Test under load:
```bash
# Use Apache Bench or similar
ab -n 100 -c 10 -p request.json -T application/json \
http://localhost:8080/v1/chat/completions
```
## Platform-Specific Tips
### Apple Silicon
- Metal acceleration is automatic
- Use native builds (not Docker) for best performance
- M1/M2/M3 have unified memory - optimize accordingly
### NVIDIA GPUs
- Use CUDA 12 for latest optimizations
- Enable Tensor Cores with appropriate precision
- Monitor with `nvidia-smi` for bottlenecks
### AMD GPUs
- Use ROCm/HIPBLAS backend
- Check ROCm compatibility
- Monitor with `rocm-smi`
### Intel GPUs
- Use oneAPI/SYCL backend
- Check Intel GPU compatibility
- Optimize for F16/F32 precision
## Common Performance Issues
### Slow First Response
**Cause**: Model loading
**Solution**: Pre-load models or use model warming
### Degrading Performance
**Cause**: Memory fragmentation
**Solution**: Restart LocalAI periodically
### Inconsistent Speed
**Cause**: System load, thermal throttling
**Solution**: Monitor system resources, ensure cooling
## Performance Checklist
- [ ] Threads match CPU cores
- [ ] GPU layers optimized
- [ ] Appropriate quantization selected
- [ ] Context size optimized
- [ ] Models on SSD
- [ ] MMAP enabled (if using SSD)
- [ ] Mirostat disabled (if speed priority)
- [ ] System resources monitored
- [ ] Baseline metrics established
- [ ] Optimizations tested and verified
## See Also
- [GPU Acceleration]({{% relref "docs/features/gpu-acceleration" %}}) - GPU setup
- [VRAM Management]({{% relref "docs/advanced/vram-management" %}}) - GPU memory
- [Model Configuration]({{% relref "docs/advanced/model-configuration" %}}) - Configuration options
- [Troubleshooting]({{% relref "docs/troubleshooting" %}}) - Performance issues

View File

@@ -23,8 +23,10 @@ The simplest approach is to ensure only one model is loaded at a time. When a ne
### Configuration
```bash
# Via command line
./local-ai --single-active-backend
# Via environment variable
LOCALAI_SINGLE_ACTIVE_BACKEND=true ./local-ai
```
@@ -37,10 +39,13 @@ LOCALAI_SINGLE_ACTIVE_BACKEND=true ./local-ai
### Example
```bash
# Start LocalAI with single active backend
LOCALAI_SINGLE_ACTIVE_BACKEND=true ./local-ai
# First request loads model A
curl http://localhost:8080/v1/chat/completions -d '{"model": "model-a", ...}'
# Second request automatically unloads model A and loads model B
curl http://localhost:8080/v1/chat/completions -d '{"model": "model-b", ...}'
```
@@ -48,42 +53,40 @@ curl http://localhost:8080/v1/chat/completions -d '{"model": "model-b", ...}'
For more flexible memory management, LocalAI provides watchdog mechanisms that automatically unload models based on their activity state. This allows multiple models to be loaded simultaneously, but automatically frees memory when models become inactive or stuck.
> **Note:** Watchdog settings can be configured via the [Runtime Settings]({{%relref "features/runtime-settings#watchdog-settings" %}}) web interface, which allows you to adjust settings without restarting the application.
### Idle Watchdog
The idle watchdog monitors models that haven't been used for a specified period and automatically unloads them to free VRAM.
#### Configuration
Via environment variables or CLI:
```bash
# Enable idle watchdog with default timeout (15 minutes)
LOCALAI_WATCHDOG_IDLE=true ./local-ai
# Customize the idle timeout (e.g., 10 minutes)
LOCALAI_WATCHDOG_IDLE=true LOCALAI_WATCHDOG_IDLE_TIMEOUT=10m ./local-ai
# Via command line
./local-ai --enable-watchdog-idle --watchdog-idle-timeout=10m
```
Via web UI: Navigate to Settings → Watchdog Settings and enable "Watchdog Idle Enabled" with your desired timeout.
### Busy Watchdog
The busy watchdog monitors models that have been processing requests for an unusually long time and terminates them if they exceed a threshold. This is useful for detecting and recovering from stuck or hung backends.
#### Configuration
Via environment variables or CLI:
```bash
# Enable busy watchdog with default timeout (5 minutes)
LOCALAI_WATCHDOG_BUSY=true ./local-ai
# Customize the busy timeout (e.g., 10 minutes)
LOCALAI_WATCHDOG_BUSY=true LOCALAI_WATCHDOG_BUSY_TIMEOUT=10m ./local-ai
# Via command line
./local-ai --enable-watchdog-busy --watchdog-busy-timeout=10m
```
Via web UI: Navigate to Settings → Watchdog Settings and enable "Watchdog Busy Enabled" with your desired timeout.
### Combined Configuration
You can enable both watchdogs simultaneously for comprehensive memory management:
@@ -114,15 +117,19 @@ Or using command line flags:
### Example
```bash
# Start LocalAI with both watchdogs enabled
LOCALAI_WATCHDOG_IDLE=true \
LOCALAI_WATCHDOG_IDLE_TIMEOUT=10m \
LOCALAI_WATCHDOG_BUSY=true \
LOCALAI_WATCHDOG_BUSY_TIMEOUT=5m \
./local-ai
# Load multiple models
curl http://localhost:8080/v1/chat/completions -d '{"model": "model-a", ...}'
curl http://localhost:8080/v1/chat/completions -d '{"model": "model-b", ...}'
# After 10 minutes of inactivity, model-a will be automatically unloaded
# If a model gets stuck processing for more than 5 minutes, it will be terminated
```
### Timeout Format
@@ -147,6 +154,7 @@ LocalAI cannot reliably estimate VRAM usage of new models to load across differe
If automatic management doesn't meet your needs, you can manually stop models using the LocalAI management API:
```bash
# Stop a specific model
curl -X POST http://localhost:8080/backend/shutdown \
-H "Content-Type: application/json" \
-d '{"model": "model-name"}'
@@ -164,7 +172,7 @@ To stop all models, you'll need to call the endpoint for each loaded model indiv
## Related Documentation
- See [Advanced Usage]({{%relref "advanced/advanced-usage" %}}) for other configuration options
- See [GPU Acceleration]({{%relref "features/GPU-acceleration" %}}) for GPU setup and configuration
- See [Backend Flags]({{%relref "advanced/advanced-usage#backend-flags" %}}) for all available backend configuration options
- See [Advanced Usage]({{%relref "docs/advanced/advanced-usage" %}}) for other configuration options
- See [GPU Acceleration]({{%relref "docs/features/GPU-acceleration" %}}) for GPU setup and configuration
- See [Backend Flags]({{%relref "docs/advanced/advanced-usage#backend-flags" %}}) for all available backend configuration options

215
docs/content/docs/faq.md Normal file
View File

@@ -0,0 +1,215 @@
+++
disableToc = false
title = "FAQ"
weight = 24
icon = "quiz"
url = "/faq/"
+++
## Frequently asked questions
Here are answers to some of the most common questions.
### How do I get models?
There are several ways to get models for LocalAI:
1. **WebUI Import** (Easiest): Use the WebUI's model import interface:
- Open `http://localhost:8080` and navigate to the Models tab
- Click "Import Model" or "New Model"
- Enter a model URI (Hugging Face, OCI, file path, etc.)
- Configure preferences in Simple Mode or edit YAML in Advanced Mode
- The WebUI provides syntax highlighting, validation, and a user-friendly interface
2. **Model Gallery** (Recommended): Use the built-in model gallery accessible via:
- WebUI: Navigate to the Models tab in the LocalAI interface and browse available models
- CLI: `local-ai models list` to see available models, then `local-ai models install <model-name>`
- Online: Browse models at [models.localai.io](https://models.localai.io)
3. **Hugging Face**: Most GGUF-based models from Hugging Face work with LocalAI. You can install them via:
- WebUI: Import using `huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf`
- CLI: `local-ai run huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf`
4. **Manual Installation**: Download model files and place them in your models directory. See [Install and Run Models]({{% relref "docs/getting-started/models" %}}) for details.
5. **OCI Registries**: Install models from OCI-compatible registries:
- WebUI: Import using `ollama://gemma:2b` or `oci://localai/phi-2:latest`
- CLI: `local-ai run ollama://gemma:2b` or `local-ai run oci://localai/phi-2:latest`
**Security Note**: Be cautious when downloading models from the internet. Always verify the source and use trusted repositories when possible.
### Where are models stored?
LocalAI stores downloaded models in the following locations by default:
- **Command line**: `./models` (relative to current working directory)
- **Docker**: `/models` (inside the container, typically mounted to `./models` on host)
- **Launcher application**: `~/.localai/models` (in your home directory)
You can customize the model storage location using the `LOCALAI_MODELS_PATH` environment variable or `--models-path` command line flag. This is useful if you want to store models outside your home directory for backup purposes or to avoid filling up your home directory with large model files.
### How much storage space do models require?
Model sizes vary significantly depending on the model and quantization level:
- **Small models (1-3B parameters)**: 1-3 GB
- **Medium models (7-13B parameters)**: 4-8 GB
- **Large models (30B+ parameters)**: 15-30+ GB
**Quantization levels** (smaller files, slightly reduced quality):
- `Q4_K_M`: ~75% of original size
- `Q4_K_S`: ~60% of original size
- `Q2_K`: ~50% of original size
**Storage recommendations**:
- Ensure you have at least 2-3x the model size available for downloads and temporary files
- Use SSD storage for better performance
- Consider the model size relative to your system RAM - models larger than your RAM may not run efficiently
### Benchmarking LocalAI and llama.cpp shows different results!
LocalAI applies a set of defaults when loading models with the llama.cpp backend, one of these is mirostat sampling - while it achieves better results, it slows down the inference. You can disable this by setting `mirostat: 0` in the model config file. See also the advanced section ({{%relref "docs/advanced/advanced-usage" %}}) for more information and [this issue](https://github.com/mudler/LocalAI/issues/2780).
### What's the difference with Serge, or XXX?
LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference, easy to set up locally and deploy to Kubernetes.
### Everything is slow, how is it possible?
There are few situation why this could occur. Some tips are:
- Don't use HDD to store your models. Prefer SSD over HDD. In case you are stuck with HDD, disable `mmap` in the model config file so it loads everything in memory.
- Watch out CPU overbooking. Ideally the `--threads` should match the number of physical cores. For instance if your CPU has 4 cores, you would ideally allocate `<= 4` threads to a model.
- Run LocalAI with `DEBUG=true`. This gives more information, including stats on the token inference speed.
- Check that you are actually getting an output: run a simple curl request with `"stream": true` to see how fast the model is responding.
### Can I use it with a Discord bot, or XXX?
Yes! If the client uses OpenAI and supports setting a different base URL to send requests to, you can use the LocalAI endpoint. This allows to use this with every application that was supposed to work with OpenAI, but without changing the application!
### Can this leverage GPUs?
There is GPU support, see {{%relref "docs/features/GPU-acceleration" %}}.
### Where is the webUI?
LocalAI includes a built-in WebUI that is automatically available when you start LocalAI. Simply navigate to `http://localhost:8080` in your web browser after starting LocalAI.
The WebUI provides:
- Chat interface for interacting with models
- Model gallery browser and installer
- Backend management
- Configuration tools
If you prefer a different interface, LocalAI is compatible with any OpenAI-compatible UI. You can find examples in the [LocalAI-examples repository](https://github.com/mudler/LocalAI-examples), including integrations with popular UIs like chatbot-ui.
### Does it work with AutoGPT?
Yes, see the [examples](https://github.com/mudler/LocalAI-examples)!
### How can I troubleshoot when something is wrong?
Enable the debug mode by setting `DEBUG=true` in the environment variables. This will give you more information on what's going on.
You can also specify `--debug` in the command line.
### I'm getting 'invalid pitch' error when running with CUDA, what's wrong?
This typically happens when your prompt exceeds the context size. Try to reduce the prompt size, or increase the context size.
### I'm getting a 'SIGILL' error, what's wrong?
Your CPU probably does not have support for certain instructions that are compiled by default in the pre-built binaries. If you are running in a container, try setting `REBUILD=true` and disable the CPU instructions that are not compatible with your CPU. For instance: `CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF" make build`
Alternatively, you can use the backend management system to install a compatible backend for your CPU architecture. See [Backend Management]({{% relref "docs/features/backends" %}}) for more information.
### How do I install backends?
LocalAI now uses a backend management system where backends are automatically downloaded when needed. You can also manually install backends:
```bash
# List available backends
local-ai backends list
# Install a specific backend
local-ai backends install llama-cpp
# Install a backend for a specific GPU type
local-ai backends install llama-cpp --gpu-type nvidia
```
For more details, see the [Backends documentation]({{% relref "docs/features/backends" %}}).
### How do I set up API keys for security?
You can secure your LocalAI instance by setting API keys using the `API_KEY` environment variable:
```bash
# Single API key
API_KEY=your-secret-key local-ai
# Multiple API keys (comma-separated)
API_KEY=key1,key2,key3 local-ai
```
When API keys are set, all requests must include the key in the `Authorization` header:
```bash
curl http://localhost:8080/v1/models \
-H "Authorization: Bearer your-secret-key"
```
**Important**: API keys provide full access to all LocalAI features (admin-level access). Make sure to protect your API keys and use HTTPS when exposing LocalAI remotely.
### My model is not loading or showing errors
Here are common issues and solutions:
1. **Backend not installed**: The required backend may not be installed. Check with `local-ai backends list` and install if needed.
2. **Insufficient memory**: Large models require significant RAM. Check available memory and consider using a smaller quantized model.
3. **Wrong backend specified**: Ensure the backend in your model configuration matches the model type. See the [Compatibility Table]({{% relref "docs/reference/compatibility-table" %}}).
4. **Model file corruption**: Re-download the model file.
5. **Check logs**: Enable debug mode (`DEBUG=true`) to see detailed error messages.
For more troubleshooting help, see the [Troubleshooting Guide]({{% relref "docs/troubleshooting" %}}).
### How do I use GPU acceleration?
LocalAI supports multiple GPU types:
- **NVIDIA (CUDA)**: Use `--gpus all` with Docker and CUDA-enabled images
- **AMD (ROCm)**: Use images with `hipblas` tag
- **Intel**: Use images with `intel` tag or Intel oneAPI
- **Apple Silicon (Metal)**: Automatically detected on macOS
For detailed setup instructions, see [GPU Acceleration]({{% relref "docs/features/gpu-acceleration" %}}).
### Can I use LocalAI with LangChain, AutoGPT, or other frameworks?
Yes! LocalAI is compatible with any framework that supports OpenAI's API. Simply point the framework to your LocalAI endpoint:
```python
# Example with LangChain
from langchain.llms import OpenAI
llm = OpenAI(
openai_api_key="not-needed",
openai_api_base="http://localhost:8080/v1"
)
```
See the [Integrations]({{% relref "docs/integrations" %}}) page for a list of compatible projects and examples.
### What's the difference between AIO images and standard images?
**AIO (All-in-One) images** come pre-configured with:
- Pre-installed models ready to use
- All necessary backends included
- Quick start with no configuration needed
**Standard images** are:
- Smaller in size
- No pre-installed models
- You install models and backends as needed
- More flexible for custom setups
Choose AIO images for quick testing and standard images for production deployments. See [Container Images]({{% relref "docs/getting-started/container-images" %}}) for details.

View File

@@ -5,15 +5,15 @@ weight = 9
url = "/features/gpu-acceleration/"
+++
{{% notice context="warning" %}}
{{% alert context="warning" %}}
Section under construction
{{% /notice %}}
{{% /alert %}}
This section contains instruction on how to use LocalAI with GPU acceleration.
{{% notice icon="⚡" context="warning" %}}
For acceleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "installation/build#Acceleration" %}})
{{% /notice %}}
{{% alert icon="⚡" context="warning" %}}
For acceleration for AMD or Metal HW is still in development, for additional details see the [build]({{%relref "docs/getting-started/build#Acceleration" %}})
{{% /alert %}}
## Automatic Backend Detection
@@ -32,6 +32,7 @@ Depending on the model architecture and backend used, there might be different w
```yaml
name: my-model-name
# Default model parameters
parameters:
# Relative to the models path
model: llama.cpp-model.ggmlv3.q5_K_M.bin
@@ -123,7 +124,7 @@ llama_init_from_file: kv self size = 512.00 MB
There are a limited number of tested configurations for ROCm systems however most newer deditated GPU consumer grade devices seem to be supported under the current ROCm6 implementation.
Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatibility and package versions for dependencies across all variations of OS must be tested independently if desired, please refer to the [build]({{%relref "installation/build#Acceleration" %}}) documentation.
Due to the nature of ROCm it is best to run all implementations in containers as this limits the number of packages required for installation on host system, compatibility and package versions for dependencies across all variations of OS must be tested independently if desired, please refer to the [build]({{%relref "docs/getting-started/build#Acceleration" %}}) documentation.
### Requirements
@@ -180,6 +181,7 @@ The devices in the following list have been tested with `hipblas` images running
The following are examples of the ROCm specific configuration elements required.
```yaml
# docker-compose.yaml
# For full functionality select a non-'core' image, version locking the image is recommended for debug purposes.
image: quay.io/go-skynet/local-ai:master-aio-gpu-hipblas
environment:

Some files were not shown because too many files have changed in this diff Show More