mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-27 09:57:14 -04:00
Merge remote-tracking branch 'origin/master' into worktree-feat+paged-attention
# Conflicts: # gallery/index.yaml
This commit is contained in:
16
.github/workflows/test.yml
vendored
16
.github/workflows/test.yml
vendored
@@ -121,3 +121,19 @@ jobs:
|
||||
detached: true
|
||||
connect-timeout-seconds: 180
|
||||
limit-access-to-actor: true
|
||||
|
||||
# Fast standalone unit tests for the backends' pure C++ helpers - currently the
|
||||
# llama-cpp message reconstruction (backend/cpp/llama-cpp/message_content.h),
|
||||
# which guards the OpenAI chat content normalization (mudler/LocalAI#10524,
|
||||
# #7324, #7528). The runner discovers every *_test.cpp under backend/cpp/, so
|
||||
# new pure-C++ unit tests are picked up with no CI changes. These need only the
|
||||
# C++ stdlib + nlohmann/json, so they run on every PR without the full
|
||||
# llama.cpp + gRPC backend build. (The same suite is also wired as an opt-in
|
||||
# CMake/ctest target, -DLLAMA_GRPC_BUILD_TESTS=ON, for in-backend-build runs.)
|
||||
tests-backend-cpp:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v7
|
||||
- name: Run backend C++ unit tests
|
||||
run: make test-backend-cpp
|
||||
|
||||
9
Makefile
9
Makefile
@@ -103,7 +103,7 @@ COVERAGE_E2E_LABELS?=!real-models
|
||||
COVERAGE_EXCLUDE_RE?=grpc/proto/.*[.]pb[.]go
|
||||
|
||||
|
||||
.PHONY: all test test-coverage test-coverage-baseline test-coverage-check test-ui test-ui-coverage-baseline test-ui-coverage-check install-hooks build vendor lint lint-all
|
||||
.PHONY: all test test-coverage test-coverage-baseline test-coverage-check test-backend-cpp test-ui test-ui-coverage-baseline test-ui-coverage-check install-hooks build vendor lint lint-all
|
||||
|
||||
all: help
|
||||
|
||||
@@ -201,6 +201,13 @@ test: prepare-test
|
||||
OPUS_SHIM_LIBRARY=$(abspath ./pkg/opus/shim/libopusshim.so) \
|
||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||
|
||||
## Compiles and runs the standalone C++ unit tests for the backends (pure
|
||||
## helpers that depend only on the stdlib + nlohmann/json, no full backend
|
||||
## build). Discovers every *_test.cpp under backend/cpp/ - see
|
||||
## backend/cpp/run-unit-tests.sh. Set NLOHMANN_INCLUDE to skip the header fetch.
|
||||
test-backend-cpp:
|
||||
bash backend/cpp/run-unit-tests.sh
|
||||
|
||||
## Runs the core suite ($(TEST_PATHS)) with statement-coverage instrumentation
|
||||
## and writes a merged profile to $(COVERAGE_PROFILE). Deliberately omits
|
||||
## --fail-fast so a single failure doesn't truncate the coverage number, and
|
||||
|
||||
@@ -87,3 +87,18 @@ target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
endif()
|
||||
|
||||
# Unit test for the message-content normalization helper (message_content.h).
|
||||
# Off by default so the normal backend build is untouched; enable with
|
||||
# -DLLAMA_GRPC_BUILD_TESTS=ON and run via ctest. It reuses llama.cpp's vendored
|
||||
# <nlohmann/json.hpp> (propagated by the common helpers library) so it has no
|
||||
# extra dependency beyond what the backend already builds against.
|
||||
option(LLAMA_GRPC_BUILD_TESTS "Build grpc-server unit tests" OFF)
|
||||
if(LLAMA_GRPC_BUILD_TESTS)
|
||||
enable_testing()
|
||||
add_executable(message_content_test message_content_test.cpp message_content.h)
|
||||
target_include_directories(message_content_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_link_libraries(message_content_test PRIVATE ${_LLAMA_COMMON_TARGET})
|
||||
target_compile_features(message_content_test PRIVATE cxx_std_17)
|
||||
add_test(NAME message_content_test COMMAND message_content_test)
|
||||
endif()
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#include "common.h"
|
||||
#include "arg.h"
|
||||
#include "chat-auto-parser.h"
|
||||
#include "message_content.h"
|
||||
#include <getopt.h>
|
||||
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
||||
#include <grpcpp/grpcpp.h>
|
||||
@@ -1728,242 +1729,20 @@ public:
|
||||
|
||||
for (int i = 0; i < request->messages_size(); i++) {
|
||||
const auto& msg = request->messages(i);
|
||||
json msg_json;
|
||||
msg_json["role"] = msg.role();
|
||||
|
||||
bool is_last_user_msg = (i == last_user_msg_idx);
|
||||
bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0 || request->videos_size() > 0);
|
||||
|
||||
// Handle content - can be string, null, or array
|
||||
// For multimodal content, we'll embed images/audio from separate fields
|
||||
if (!msg.content().empty()) {
|
||||
// Try to parse content as JSON to see if it's already an array
|
||||
json content_val;
|
||||
try {
|
||||
content_val = json::parse(msg.content());
|
||||
// Handle null values - convert to empty string to avoid template errors
|
||||
if (content_val.is_null()) {
|
||||
content_val = "";
|
||||
}
|
||||
} catch (const json::parse_error&) {
|
||||
// Not JSON, treat as plain string
|
||||
content_val = msg.content();
|
||||
}
|
||||
|
||||
// If content is an object (e.g., from tool call failures), convert to string
|
||||
if (content_val.is_object()) {
|
||||
content_val = content_val.dump();
|
||||
}
|
||||
|
||||
// If content is a string and this is the last user message with images/audio, combine them
|
||||
if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
|
||||
json content_array = json::array();
|
||||
// Add text first
|
||||
content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
|
||||
// Add images
|
||||
if (request->images_size() > 0) {
|
||||
for (int j = 0; j < request->images_size(); j++) {
|
||||
json image_chunk;
|
||||
image_chunk["type"] = "image_url";
|
||||
json image_url;
|
||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
||||
image_chunk["image_url"] = image_url;
|
||||
content_array.push_back(image_chunk);
|
||||
}
|
||||
}
|
||||
// Add audios
|
||||
if (request->audios_size() > 0) {
|
||||
for (int j = 0; j < request->audios_size(); j++) {
|
||||
json audio_chunk;
|
||||
audio_chunk["type"] = "input_audio";
|
||||
json input_audio;
|
||||
input_audio["data"] = request->audios(j);
|
||||
input_audio["format"] = "wav"; // default, could be made configurable
|
||||
audio_chunk["input_audio"] = input_audio;
|
||||
content_array.push_back(audio_chunk);
|
||||
}
|
||||
}
|
||||
if (request->videos_size() > 0) {
|
||||
for (int j = 0; j < request->videos_size(); j++) {
|
||||
json video_chunk;
|
||||
video_chunk["type"] = "input_video";
|
||||
json input_video;
|
||||
input_video["data"] = request->videos(j);
|
||||
video_chunk["input_video"] = input_video;
|
||||
content_array.push_back(video_chunk);
|
||||
}
|
||||
}
|
||||
msg_json["content"] = content_array;
|
||||
} else {
|
||||
// Use content as-is (already array or not last user message)
|
||||
// Ensure null values are converted to empty string
|
||||
if (content_val.is_null()) {
|
||||
msg_json["content"] = "";
|
||||
} else {
|
||||
msg_json["content"] = content_val;
|
||||
}
|
||||
}
|
||||
} else if (is_last_user_msg && has_images_or_audio) {
|
||||
// If no content but this is the last user message with images/audio, create content array
|
||||
json content_array = json::array();
|
||||
if (request->images_size() > 0) {
|
||||
for (int j = 0; j < request->images_size(); j++) {
|
||||
json image_chunk;
|
||||
image_chunk["type"] = "image_url";
|
||||
json image_url;
|
||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
||||
image_chunk["image_url"] = image_url;
|
||||
content_array.push_back(image_chunk);
|
||||
}
|
||||
}
|
||||
if (request->audios_size() > 0) {
|
||||
for (int j = 0; j < request->audios_size(); j++) {
|
||||
json audio_chunk;
|
||||
audio_chunk["type"] = "input_audio";
|
||||
json input_audio;
|
||||
input_audio["data"] = request->audios(j);
|
||||
input_audio["format"] = "wav"; // default, could be made configurable
|
||||
audio_chunk["input_audio"] = input_audio;
|
||||
content_array.push_back(audio_chunk);
|
||||
}
|
||||
}
|
||||
if (request->videos_size() > 0) {
|
||||
for (int j = 0; j < request->videos_size(); j++) {
|
||||
json video_chunk;
|
||||
video_chunk["type"] = "input_video";
|
||||
json input_video;
|
||||
input_video["data"] = request->videos(j);
|
||||
video_chunk["input_video"] = input_video;
|
||||
content_array.push_back(video_chunk);
|
||||
}
|
||||
}
|
||||
msg_json["content"] = content_array;
|
||||
} else if (msg.role() == "tool") {
|
||||
// Tool role messages must have content field set, even if empty
|
||||
// Jinja templates expect content to be a string, not null or object
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
|
||||
if (msg.content().empty()) {
|
||||
msg_json["content"] = "";
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): empty content, set to empty string\n", i);
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): content exists: %s\n",
|
||||
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
|
||||
// Content exists, parse and ensure it's a string
|
||||
json content_val;
|
||||
try {
|
||||
content_val = json::parse(msg.content());
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): parsed JSON, type=%s\n",
|
||||
i, content_val.is_null() ? "null" :
|
||||
content_val.is_object() ? "object" :
|
||||
content_val.is_string() ? "string" :
|
||||
content_val.is_array() ? "array" : "other");
|
||||
// Handle null values - Jinja templates expect content to be a string, not null
|
||||
if (content_val.is_null()) {
|
||||
msg_json["content"] = "";
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): null content, converted to empty string\n", i);
|
||||
} else if (content_val.is_object()) {
|
||||
// If content is an object (e.g., from tool call failures/errors), convert to string
|
||||
msg_json["content"] = content_val.dump();
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): object content, converted to string: %s\n",
|
||||
i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
|
||||
} else if (content_val.is_string()) {
|
||||
msg_json["content"] = content_val.get<std::string>();
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): string content, using as-is\n", i);
|
||||
} else {
|
||||
// For arrays or other types, convert to string
|
||||
msg_json["content"] = content_val.dump();
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): %s content, converted to string\n",
|
||||
i, content_val.is_array() ? "array" : "other type");
|
||||
}
|
||||
} catch (const json::parse_error&) {
|
||||
// Not JSON, treat as plain string
|
||||
msg_json["content"] = msg.content();
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): not JSON, using as string\n", i);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Ensure all messages have content set (fallback for any unhandled cases)
|
||||
// Jinja templates expect content to be present, default to empty string if not set
|
||||
if (!msg_json.contains("content")) {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (role=%s): no content field, adding empty string\n",
|
||||
i, msg.role().c_str());
|
||||
msg_json["content"] = "";
|
||||
}
|
||||
llama_grpc::ReconstructedMessageInput rin;
|
||||
rin.role = msg.role();
|
||||
rin.content = msg.content();
|
||||
rin.name = msg.name();
|
||||
rin.tool_call_id = msg.tool_call_id();
|
||||
rin.reasoning_content = msg.reasoning_content();
|
||||
rin.tool_calls = msg.tool_calls();
|
||||
rin.is_last_user_msg = (i == last_user_msg_idx);
|
||||
if (rin.is_last_user_msg) {
|
||||
for (int j = 0; j < request->images_size(); j++) rin.images.push_back(request->images(j));
|
||||
for (int j = 0; j < request->audios_size(); j++) rin.audios.push_back(request->audios(j));
|
||||
for (int j = 0; j < request->videos_size(); j++) rin.videos.push_back(request->videos(j));
|
||||
}
|
||||
|
||||
// Add optional fields for OpenAI-compatible message format
|
||||
if (!msg.name().empty()) {
|
||||
msg_json["name"] = msg.name();
|
||||
}
|
||||
if (!msg.tool_call_id().empty()) {
|
||||
msg_json["tool_call_id"] = msg.tool_call_id();
|
||||
}
|
||||
if (!msg.reasoning_content().empty()) {
|
||||
msg_json["reasoning_content"] = msg.reasoning_content();
|
||||
}
|
||||
if (!msg.tool_calls().empty()) {
|
||||
// Parse tool_calls JSON string and add to message
|
||||
try {
|
||||
json tool_calls = json::parse(msg.tool_calls());
|
||||
msg_json["tool_calls"] = tool_calls;
|
||||
SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
|
||||
// IMPORTANT: If message has tool_calls but content is empty or not set,
|
||||
// set content to space " " instead of empty string "", because llama.cpp's
|
||||
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
|
||||
// which causes template errors when accessing message.content[:tool_start_length]
|
||||
if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d has tool_calls but empty content, setting to space\n", i);
|
||||
msg_json["content"] = " ";
|
||||
}
|
||||
// Log each tool call with name and arguments
|
||||
if (tool_calls.is_array()) {
|
||||
for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
|
||||
const auto& tc = tool_calls[tc_idx];
|
||||
std::string tool_name = "unknown";
|
||||
std::string tool_args = "{}";
|
||||
if (tc.contains("function")) {
|
||||
const auto& func = tc["function"];
|
||||
if (func.contains("name")) {
|
||||
tool_name = func["name"].get<std::string>();
|
||||
}
|
||||
if (func.contains("arguments")) {
|
||||
tool_args = func["arguments"].is_string() ?
|
||||
func["arguments"].get<std::string>() :
|
||||
func["arguments"].dump();
|
||||
}
|
||||
} else if (tc.contains("name")) {
|
||||
tool_name = tc["name"].get<std::string>();
|
||||
if (tc.contains("arguments")) {
|
||||
tool_args = tc["arguments"].is_string() ?
|
||||
tc["arguments"].get<std::string>() :
|
||||
tc["arguments"].dump();
|
||||
}
|
||||
}
|
||||
SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d, tool_call %zu: name=%s, arguments=%s\n",
|
||||
i, tc_idx, tool_name.c_str(), tool_args.c_str());
|
||||
}
|
||||
}
|
||||
} catch (const json::parse_error& e) {
|
||||
SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
// Debug: Log final content state before adding to array
|
||||
if (msg_json.contains("content")) {
|
||||
if (msg_json["content"].is_null()) {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content type=%s, has_value=%d\n",
|
||||
i, msg_json["content"].is_string() ? "string" :
|
||||
msg_json["content"].is_array() ? "array" :
|
||||
msg_json["content"].is_object() ? "object" : "other",
|
||||
msg_json["content"].is_null() ? 0 : 1);
|
||||
}
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
|
||||
}
|
||||
|
||||
messages_json.push_back(msg_json);
|
||||
messages_json.push_back(llama_grpc::build_reconstructed_message(rin));
|
||||
}
|
||||
|
||||
// Final safety check: Ensure no message has null content (Jinja templates require strings)
|
||||
@@ -2184,36 +1963,7 @@ public:
|
||||
if (body_json.contains("messages") && body_json["messages"].is_array()) {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
|
||||
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
|
||||
auto& msg = body_json["messages"][idx];
|
||||
std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
|
||||
if (msg.contains("content")) {
|
||||
if (msg["content"].is_null()) {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
|
||||
msg["content"] = ""; // Fix null content
|
||||
} else if (role_str == "tool" && msg["content"].is_array()) {
|
||||
// Tool messages must have string content, not array
|
||||
// oaicompat_chat_params_parse expects tool messages to have string content
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=tool) has array content, converting to string\n", idx);
|
||||
msg["content"] = msg["content"].dump();
|
||||
} else if (!msg["content"].is_string() && !msg["content"].is_array()) {
|
||||
// If content is object or other non-string type, convert to string for templates
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
|
||||
if (msg["content"].is_object()) {
|
||||
msg["content"] = msg["content"].dump();
|
||||
} else {
|
||||
msg["content"] = "";
|
||||
}
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n",
|
||||
idx, role_str.c_str(),
|
||||
msg["content"].is_string() ? "string" :
|
||||
msg["content"].is_array() ? "array" :
|
||||
msg["content"].is_object() ? "object" : "other");
|
||||
}
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
|
||||
msg["content"] = ""; // Add missing content
|
||||
}
|
||||
llama_grpc::normalize_template_message(body_json["messages"][idx]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2545,264 +2295,20 @@ public:
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Processing %d messages\n", request->messages_size());
|
||||
for (int i = 0; i < request->messages_size(); i++) {
|
||||
const auto& msg = request->messages(i);
|
||||
json msg_json;
|
||||
msg_json["role"] = msg.role();
|
||||
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d: role=%s, content_empty=%d, content_length=%zu\n",
|
||||
i, msg.role().c_str(), msg.content().empty() ? 1 : 0, msg.content().size());
|
||||
if (!msg.content().empty()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content (first 200 chars): %s\n",
|
||||
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
|
||||
llama_grpc::ReconstructedMessageInput rin;
|
||||
rin.role = msg.role();
|
||||
rin.content = msg.content();
|
||||
rin.name = msg.name();
|
||||
rin.tool_call_id = msg.tool_call_id();
|
||||
rin.reasoning_content = msg.reasoning_content();
|
||||
rin.tool_calls = msg.tool_calls();
|
||||
rin.is_last_user_msg = (i == last_user_msg_idx);
|
||||
if (rin.is_last_user_msg) {
|
||||
for (int j = 0; j < request->images_size(); j++) rin.images.push_back(request->images(j));
|
||||
for (int j = 0; j < request->audios_size(); j++) rin.audios.push_back(request->audios(j));
|
||||
for (int j = 0; j < request->videos_size(); j++) rin.videos.push_back(request->videos(j));
|
||||
}
|
||||
|
||||
bool is_last_user_msg = (i == last_user_msg_idx);
|
||||
bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0 || request->videos_size() > 0);
|
||||
|
||||
// Handle content - can be string, null, or array
|
||||
// For multimodal content, we'll embed images/audio from separate fields
|
||||
if (!msg.content().empty()) {
|
||||
// Try to parse content as JSON to see if it's already an array
|
||||
json content_val;
|
||||
try {
|
||||
content_val = json::parse(msg.content());
|
||||
// Handle null values - convert to empty string to avoid template errors
|
||||
if (content_val.is_null()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d parsed JSON is null, converting to empty string\n", i);
|
||||
content_val = "";
|
||||
}
|
||||
} catch (const json::parse_error&) {
|
||||
// Not JSON, treat as plain string
|
||||
content_val = msg.content();
|
||||
}
|
||||
|
||||
// If content is an object (e.g., from tool call failures), convert to string
|
||||
if (content_val.is_object()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content is object, converting to string\n", i);
|
||||
content_val = content_val.dump();
|
||||
}
|
||||
|
||||
// If content is a string and this is the last user message with images/audio, combine them
|
||||
if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
|
||||
json content_array = json::array();
|
||||
// Add text first
|
||||
content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
|
||||
// Add images
|
||||
if (request->images_size() > 0) {
|
||||
for (int j = 0; j < request->images_size(); j++) {
|
||||
json image_chunk;
|
||||
image_chunk["type"] = "image_url";
|
||||
json image_url;
|
||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
||||
image_chunk["image_url"] = image_url;
|
||||
content_array.push_back(image_chunk);
|
||||
}
|
||||
}
|
||||
// Add audios
|
||||
if (request->audios_size() > 0) {
|
||||
for (int j = 0; j < request->audios_size(); j++) {
|
||||
json audio_chunk;
|
||||
audio_chunk["type"] = "input_audio";
|
||||
json input_audio;
|
||||
input_audio["data"] = request->audios(j);
|
||||
input_audio["format"] = "wav"; // default, could be made configurable
|
||||
audio_chunk["input_audio"] = input_audio;
|
||||
content_array.push_back(audio_chunk);
|
||||
}
|
||||
}
|
||||
if (request->videos_size() > 0) {
|
||||
for (int j = 0; j < request->videos_size(); j++) {
|
||||
json video_chunk;
|
||||
video_chunk["type"] = "input_video";
|
||||
json input_video;
|
||||
input_video["data"] = request->videos(j);
|
||||
video_chunk["input_video"] = input_video;
|
||||
content_array.push_back(video_chunk);
|
||||
}
|
||||
}
|
||||
msg_json["content"] = content_array;
|
||||
} else {
|
||||
// Use content as-is (already array or not last user message)
|
||||
// Ensure null values are converted to empty string
|
||||
if (content_val.is_null()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content_val was null, setting to empty string\n", i);
|
||||
msg_json["content"] = "";
|
||||
} else {
|
||||
msg_json["content"] = content_val;
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content set, type=%s\n",
|
||||
i, content_val.is_string() ? "string" :
|
||||
content_val.is_array() ? "array" :
|
||||
content_val.is_object() ? "object" : "other");
|
||||
}
|
||||
}
|
||||
} else if (is_last_user_msg && has_images_or_audio) {
|
||||
// If no content but this is the last user message with images/audio, create content array
|
||||
json content_array = json::array();
|
||||
if (request->images_size() > 0) {
|
||||
for (int j = 0; j < request->images_size(); j++) {
|
||||
json image_chunk;
|
||||
image_chunk["type"] = "image_url";
|
||||
json image_url;
|
||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
||||
image_chunk["image_url"] = image_url;
|
||||
content_array.push_back(image_chunk);
|
||||
}
|
||||
}
|
||||
if (request->audios_size() > 0) {
|
||||
for (int j = 0; j < request->audios_size(); j++) {
|
||||
json audio_chunk;
|
||||
audio_chunk["type"] = "input_audio";
|
||||
json input_audio;
|
||||
input_audio["data"] = request->audios(j);
|
||||
input_audio["format"] = "wav"; // default, could be made configurable
|
||||
audio_chunk["input_audio"] = input_audio;
|
||||
content_array.push_back(audio_chunk);
|
||||
}
|
||||
}
|
||||
if (request->videos_size() > 0) {
|
||||
for (int j = 0; j < request->videos_size(); j++) {
|
||||
json video_chunk;
|
||||
video_chunk["type"] = "input_video";
|
||||
json input_video;
|
||||
input_video["data"] = request->videos(j);
|
||||
video_chunk["input_video"] = input_video;
|
||||
content_array.push_back(video_chunk);
|
||||
}
|
||||
}
|
||||
msg_json["content"] = content_array;
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d created content array with media\n", i);
|
||||
} else if (!msg.tool_calls().empty()) {
|
||||
// Tool call messages may have null content, but templates expect string
|
||||
// IMPORTANT: Set to space " " instead of empty string "", because llama.cpp's
|
||||
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
|
||||
// which causes template errors when accessing message.content[:tool_start_length]
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls, setting content to space (not empty string)\n", i);
|
||||
msg_json["content"] = " ";
|
||||
} else if (msg.role() == "tool") {
|
||||
// Tool role messages must have content field set, even if empty
|
||||
// Jinja templates expect content to be a string, not null or object
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
|
||||
if (msg.content().empty()) {
|
||||
msg_json["content"] = "";
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): empty content, set to empty string\n", i);
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): content exists: %s\n",
|
||||
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
|
||||
// Content exists, parse and ensure it's a string
|
||||
json content_val;
|
||||
try {
|
||||
content_val = json::parse(msg.content());
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): parsed JSON, type=%s\n",
|
||||
i, content_val.is_null() ? "null" :
|
||||
content_val.is_object() ? "object" :
|
||||
content_val.is_string() ? "string" :
|
||||
content_val.is_array() ? "array" : "other");
|
||||
// Handle null values - Jinja templates expect content to be a string, not null
|
||||
if (content_val.is_null()) {
|
||||
msg_json["content"] = "";
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): null content, converted to empty string\n", i);
|
||||
} else if (content_val.is_object()) {
|
||||
// If content is an object (e.g., from tool call failures/errors), convert to string
|
||||
msg_json["content"] = content_val.dump();
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): object content, converted to string: %s\n",
|
||||
i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
|
||||
} else if (content_val.is_string()) {
|
||||
msg_json["content"] = content_val.get<std::string>();
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): string content, using as-is\n", i);
|
||||
} else {
|
||||
// For arrays or other types, convert to string
|
||||
msg_json["content"] = content_val.dump();
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): %s content, converted to string\n",
|
||||
i, content_val.is_array() ? "array" : "other type");
|
||||
}
|
||||
} catch (const json::parse_error&) {
|
||||
// Not JSON, treat as plain string
|
||||
msg_json["content"] = msg.content();
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): not JSON, using as string\n", i);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Ensure all messages have content set (fallback for any unhandled cases)
|
||||
// Jinja templates expect content to be present, default to empty string if not set
|
||||
if (!msg_json.contains("content")) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (role=%s): no content field, adding empty string\n",
|
||||
i, msg.role().c_str());
|
||||
msg_json["content"] = "";
|
||||
}
|
||||
}
|
||||
|
||||
// Add optional fields for OpenAI-compatible message format
|
||||
if (!msg.name().empty()) {
|
||||
msg_json["name"] = msg.name();
|
||||
}
|
||||
if (!msg.tool_call_id().empty()) {
|
||||
msg_json["tool_call_id"] = msg.tool_call_id();
|
||||
}
|
||||
if (!msg.reasoning_content().empty()) {
|
||||
msg_json["reasoning_content"] = msg.reasoning_content();
|
||||
}
|
||||
if (!msg.tool_calls().empty()) {
|
||||
// Parse tool_calls JSON string and add to message
|
||||
try {
|
||||
json tool_calls = json::parse(msg.tool_calls());
|
||||
msg_json["tool_calls"] = tool_calls;
|
||||
SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
|
||||
// IMPORTANT: If message has tool_calls but content is empty or not set,
|
||||
// set content to space " " instead of empty string "", because llama.cpp's
|
||||
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
|
||||
// which causes template errors when accessing message.content[:tool_start_length]
|
||||
if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls but empty content, setting to space\n", i);
|
||||
msg_json["content"] = " ";
|
||||
}
|
||||
// Log each tool call with name and arguments
|
||||
if (tool_calls.is_array()) {
|
||||
for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
|
||||
const auto& tc = tool_calls[tc_idx];
|
||||
std::string tool_name = "unknown";
|
||||
std::string tool_args = "{}";
|
||||
if (tc.contains("function")) {
|
||||
const auto& func = tc["function"];
|
||||
if (func.contains("name")) {
|
||||
tool_name = func["name"].get<std::string>();
|
||||
}
|
||||
if (func.contains("arguments")) {
|
||||
tool_args = func["arguments"].is_string() ?
|
||||
func["arguments"].get<std::string>() :
|
||||
func["arguments"].dump();
|
||||
}
|
||||
} else if (tc.contains("name")) {
|
||||
tool_name = tc["name"].get<std::string>();
|
||||
if (tc.contains("arguments")) {
|
||||
tool_args = tc["arguments"].is_string() ?
|
||||
tc["arguments"].get<std::string>() :
|
||||
tc["arguments"].dump();
|
||||
}
|
||||
}
|
||||
SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d, tool_call %zu: name=%s, arguments=%s\n",
|
||||
i, tc_idx, tool_name.c_str(), tool_args.c_str());
|
||||
}
|
||||
}
|
||||
} catch (const json::parse_error& e) {
|
||||
SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
// Debug: Log final content state before adding to array
|
||||
if (msg_json.contains("content")) {
|
||||
if (msg_json["content"].is_null()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content type=%s, has_value=%d\n",
|
||||
i, msg_json["content"].is_string() ? "string" :
|
||||
msg_json["content"].is_array() ? "array" :
|
||||
msg_json["content"].is_object() ? "object" : "other",
|
||||
msg_json["content"].is_null() ? 0 : 1);
|
||||
}
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
|
||||
}
|
||||
|
||||
messages_json.push_back(msg_json);
|
||||
messages_json.push_back(llama_grpc::build_reconstructed_message(rin));
|
||||
}
|
||||
|
||||
// Final safety check: Ensure no message has null content (Jinja templates require strings)
|
||||
@@ -3023,36 +2529,7 @@ public:
|
||||
if (body_json.contains("messages") && body_json["messages"].is_array()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
|
||||
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
|
||||
auto& msg = body_json["messages"][idx];
|
||||
std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
|
||||
if (msg.contains("content")) {
|
||||
if (msg["content"].is_null()) {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
|
||||
msg["content"] = ""; // Fix null content
|
||||
} else if (role_str == "tool" && msg["content"].is_array()) {
|
||||
// Tool messages must have string content, not array
|
||||
// oaicompat_chat_params_parse expects tool messages to have string content
|
||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=tool) has array content, converting to string\n", idx);
|
||||
msg["content"] = msg["content"].dump();
|
||||
} else if (!msg["content"].is_string() && !msg["content"].is_array()) {
|
||||
// If content is object or other non-string type, convert to string for templates
|
||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
|
||||
if (msg["content"].is_object()) {
|
||||
msg["content"] = msg["content"].dump();
|
||||
} else {
|
||||
msg["content"] = "";
|
||||
}
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n",
|
||||
idx, role_str.c_str(),
|
||||
msg["content"].is_string() ? "string" :
|
||||
msg["content"].is_array() ? "array" :
|
||||
msg["content"].is_object() ? "object" : "other");
|
||||
}
|
||||
} else {
|
||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
|
||||
msg["content"] = ""; // Add missing content
|
||||
}
|
||||
llama_grpc::normalize_template_message(body_json["messages"][idx]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
192
backend/cpp/llama-cpp/message_content.h
Normal file
192
backend/cpp/llama-cpp/message_content.h
Normal file
@@ -0,0 +1,192 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace llama_grpc {
|
||||
|
||||
// Normalizes a proto message's content string into the JSON value used when
|
||||
// reconstructing OpenAI-format messages for the tokenizer (jinja) template.
|
||||
//
|
||||
// Shared by the streaming (PredictStream) and non-streaming (Predict) message
|
||||
// reconstruction paths so the two cannot drift.
|
||||
//
|
||||
// LocalAI's Go layer (schema.Messages.ToProto) always sends content as a plain
|
||||
// text string; multimodal media travels in separate proto fields, never inside
|
||||
// content. So user/system/developer content is *only ever* opaque text and must
|
||||
// NOT be JSON-sniffed: a prompt that merely looks like JSON (e.g. an ingredient
|
||||
// list ["1/4 cup sugar", ...]) would otherwise be reinterpreted as structured
|
||||
// content parts and rejected by oaicompat_chat_params_parse with
|
||||
// "unsupported content[].type" (https://github.com/mudler/LocalAI/issues/10524).
|
||||
// (developer is OpenAI's modern system alias - same "human-authored text" nature.)
|
||||
//
|
||||
// For assistant/tool messages we still collapse a literal JSON null/object
|
||||
// (tool-call bookkeeping) to a string, but we never turn a plain string into an
|
||||
// array/scalar. The array defense is therefore role-independent (arrays/scalars
|
||||
// fall through for every role); the role gate only governs the null/object case.
|
||||
inline nlohmann::ordered_json normalize_message_content(const std::string& role,
|
||||
const std::string& content) {
|
||||
nlohmann::ordered_json content_val = content;
|
||||
if (role != "user" && role != "system" && role != "developer") {
|
||||
try {
|
||||
nlohmann::ordered_json parsed = nlohmann::ordered_json::parse(content);
|
||||
if (parsed.is_null()) {
|
||||
content_val = "";
|
||||
} else if (parsed.is_object()) {
|
||||
content_val = parsed.dump();
|
||||
}
|
||||
// arrays / scalars: keep the original plain-text string as-is
|
||||
} catch (const nlohmann::ordered_json::parse_error&) {
|
||||
// Not JSON, already the plain string
|
||||
}
|
||||
}
|
||||
return content_val;
|
||||
}
|
||||
|
||||
// Final safety pass applied to each reconstructed OpenAI message right before it
|
||||
// is handed to oaicompat_chat_params_parse (jinja templating). Jinja templates
|
||||
// assume content is a string: a literal null breaks slicing such as
|
||||
// message.content[:N] (#7324), and a tool message with array content is rejected
|
||||
// (#7528). A multimodal user message legitimately carries a typed-part array
|
||||
// ({type:text}, {type:image_url}, ...), which must be left intact. Shared by the
|
||||
// streaming and non-streaming paths so this invariant cannot drift between them.
|
||||
inline void normalize_template_message(nlohmann::ordered_json& msg) {
|
||||
if (!msg.contains("content")) {
|
||||
msg["content"] = ""; // templates expect the field to exist
|
||||
return;
|
||||
}
|
||||
nlohmann::ordered_json& content = msg["content"];
|
||||
const std::string role = (msg.contains("role") && msg["role"].is_string())
|
||||
? msg["role"].get<std::string>()
|
||||
: std::string();
|
||||
if (content.is_null()) {
|
||||
content = ""; // #7324: null would crash content[:N] slicing
|
||||
} else if (role == "tool" && content.is_array()) {
|
||||
content = content.dump(); // #7528: tool messages must have string content
|
||||
} else if (!content.is_string() && !content.is_array()) {
|
||||
if (content.is_object()) {
|
||||
content = content.dump(); // tool-call bookkeeping object -> string
|
||||
} else {
|
||||
content = ""; // other scalar (number/bool) -> empty
|
||||
}
|
||||
}
|
||||
// string, or a non-tool (multimodal) typed-part array: leave untouched
|
||||
}
|
||||
|
||||
// One proto message's data, flattened to plain types so the reconstruction logic
|
||||
// can be shared and unit-tested without protobuf. The streaming and non-streaming
|
||||
// predict paths both populate this from proto::Message + the request's media.
|
||||
struct ReconstructedMessageInput {
|
||||
std::string role;
|
||||
std::string content; // proto.Message.content (always a plain string)
|
||||
std::string name;
|
||||
std::string tool_call_id;
|
||||
std::string reasoning_content;
|
||||
std::string tool_calls; // tool_calls as a JSON string, or empty
|
||||
bool is_last_user_msg = false; // attach request media to this message
|
||||
std::vector<std::string> images; // base64 (jpeg)
|
||||
std::vector<std::string> audios; // base64 (wav)
|
||||
std::vector<std::string> videos; // base64
|
||||
};
|
||||
|
||||
// Appends the request's media as OpenAI typed content parts. Imperative (not
|
||||
// brace-init) to avoid nlohmann's object-vs-array initializer-list ambiguity.
|
||||
inline void append_media_parts(nlohmann::ordered_json& content_array,
|
||||
const std::vector<std::string>& images,
|
||||
const std::vector<std::string>& audios,
|
||||
const std::vector<std::string>& videos) {
|
||||
for (const auto& img : images) {
|
||||
nlohmann::ordered_json image_chunk;
|
||||
image_chunk["type"] = "image_url";
|
||||
nlohmann::ordered_json image_url;
|
||||
image_url["url"] = "data:image/jpeg;base64," + img;
|
||||
image_chunk["image_url"] = image_url;
|
||||
content_array.push_back(image_chunk);
|
||||
}
|
||||
for (const auto& aud : audios) {
|
||||
nlohmann::ordered_json audio_chunk;
|
||||
audio_chunk["type"] = "input_audio";
|
||||
nlohmann::ordered_json input_audio;
|
||||
input_audio["data"] = aud;
|
||||
input_audio["format"] = "wav"; // default; could be made configurable
|
||||
audio_chunk["input_audio"] = input_audio;
|
||||
content_array.push_back(audio_chunk);
|
||||
}
|
||||
for (const auto& vid : videos) {
|
||||
nlohmann::ordered_json video_chunk;
|
||||
video_chunk["type"] = "input_video";
|
||||
nlohmann::ordered_json input_video;
|
||||
input_video["data"] = vid;
|
||||
video_chunk["input_video"] = input_video;
|
||||
content_array.push_back(video_chunk);
|
||||
}
|
||||
}
|
||||
|
||||
// Reconstructs a single OpenAI-format message (the object fed to
|
||||
// oaicompat_chat_params_parse) from a proto message. Shared by PredictStream and
|
||||
// Predict so the content/multimodal/tool_calls handling cannot drift between the
|
||||
// two stream modes (it previously lived as two ~150-line copies with a redundant
|
||||
// Predict-only tool_calls->" " branch). Guarantees content is always a string or
|
||||
// a typed-part array, never null/missing.
|
||||
inline nlohmann::ordered_json build_reconstructed_message(const ReconstructedMessageInput& in) {
|
||||
nlohmann::ordered_json msg_json;
|
||||
msg_json["role"] = in.role;
|
||||
const bool has_media = !in.images.empty() || !in.audios.empty() || !in.videos.empty();
|
||||
|
||||
if (!in.content.empty()) {
|
||||
nlohmann::ordered_json content_val = normalize_message_content(in.role, in.content);
|
||||
if (content_val.is_string() && in.is_last_user_msg && has_media) {
|
||||
// Last user message + media: build a typed-part array (text first).
|
||||
nlohmann::ordered_json content_array = nlohmann::ordered_json::array();
|
||||
nlohmann::ordered_json text_part;
|
||||
text_part["type"] = "text";
|
||||
text_part["text"] = content_val.get<std::string>();
|
||||
content_array.push_back(text_part);
|
||||
append_media_parts(content_array, in.images, in.audios, in.videos);
|
||||
msg_json["content"] = content_array;
|
||||
} else if (content_val.is_null()) {
|
||||
msg_json["content"] = "";
|
||||
} else {
|
||||
msg_json["content"] = content_val;
|
||||
}
|
||||
} else if (in.is_last_user_msg && has_media) {
|
||||
// No text but media on the last user message: media-only typed array.
|
||||
nlohmann::ordered_json content_array = nlohmann::ordered_json::array();
|
||||
append_media_parts(content_array, in.images, in.audios, in.videos);
|
||||
msg_json["content"] = content_array;
|
||||
} else {
|
||||
// Empty content (any role, incl. tool/assistant): templates need a string.
|
||||
msg_json["content"] = "";
|
||||
}
|
||||
|
||||
if (!in.name.empty()) {
|
||||
msg_json["name"] = in.name;
|
||||
}
|
||||
if (!in.tool_call_id.empty()) {
|
||||
msg_json["tool_call_id"] = in.tool_call_id;
|
||||
}
|
||||
if (!in.reasoning_content.empty()) {
|
||||
msg_json["reasoning_content"] = in.reasoning_content;
|
||||
}
|
||||
if (!in.tool_calls.empty()) {
|
||||
try {
|
||||
nlohmann::ordered_json tool_calls = nlohmann::ordered_json::parse(in.tool_calls);
|
||||
msg_json["tool_calls"] = tool_calls;
|
||||
// tool_calls + empty/blank content: use " " not "", because llama.cpp's
|
||||
// common_chat_msgs_to_json_oaicompat turns "" into null, which breaks
|
||||
// templates that slice message.content[:tool_start_length] (#7324).
|
||||
if (!msg_json.contains("content") ||
|
||||
(msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
|
||||
msg_json["content"] = " ";
|
||||
}
|
||||
} catch (const nlohmann::ordered_json::parse_error&) {
|
||||
// Malformed tool_calls JSON: leave content as-is (prior behavior).
|
||||
}
|
||||
}
|
||||
|
||||
return msg_json;
|
||||
}
|
||||
|
||||
} // namespace llama_grpc
|
||||
234
backend/cpp/llama-cpp/message_content_test.cpp
Normal file
234
backend/cpp/llama-cpp/message_content_test.cpp
Normal file
@@ -0,0 +1,234 @@
|
||||
// Unit tests for the shared message-reconstruction helpers (message_content.h).
|
||||
//
|
||||
// Build & run standalone (nlohmann/json single header on the include path):
|
||||
// g++ -std=c++17 -I<dir-with-nlohmann> message_content_test.cpp -o t && ./t
|
||||
// or via CMake: -DLLAMA_GRPC_BUILD_TESTS=ON then ctest.
|
||||
//
|
||||
// Regression coverage for:
|
||||
// #10524 - a user/system prompt that is itself a JSON-array string must stay
|
||||
// plain text, never be reinterpreted as OpenAI structured parts.
|
||||
// #7324 - assistant/tool null content -> "" (templates slice content[:N]);
|
||||
// assistant+tool_calls+empty content -> " " (not "", which becomes null).
|
||||
// #7528 - tool message array content must reach the template as a string.
|
||||
// multimodal - last user message text + media -> typed-part array, media kept.
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "message_content.h"
|
||||
|
||||
using nlohmann::ordered_json;
|
||||
using llama_grpc::normalize_message_content;
|
||||
using llama_grpc::normalize_template_message;
|
||||
using llama_grpc::build_reconstructed_message;
|
||||
using llama_grpc::ReconstructedMessageInput;
|
||||
|
||||
static int failures = 0;
|
||||
|
||||
static void check(bool ok, const std::string& name, const std::string& detail = "") {
|
||||
if (!ok) {
|
||||
std::cerr << "FAIL " << name << (detail.empty() ? "" : ": " + detail) << "\n";
|
||||
failures++;
|
||||
}
|
||||
}
|
||||
|
||||
// ---- normalize_message_content -------------------------------------------
|
||||
|
||||
static void expect_norm_string(const char* name, const std::string& role,
|
||||
const std::string& content, const std::string& want) {
|
||||
auto got = normalize_message_content(role, content);
|
||||
if (!got.is_string()) {
|
||||
check(false, name, "expected a JSON string, got " +
|
||||
std::string(got.is_array() ? "array" : got.is_object() ? "object" : "other") +
|
||||
" (" + got.dump() + ")");
|
||||
return;
|
||||
}
|
||||
check(got.get<std::string>() == want, name, "expected \"" + want + "\", got \"" + got.get<std::string>() + "\"");
|
||||
}
|
||||
|
||||
static void test_normalize() {
|
||||
const std::string ingredients = R"(["1/4 cup brown sugar, packed","1 pound ground beef"])";
|
||||
|
||||
// #10524 - JSON-array text must stay a string. Role-INDEPENDENT array defense.
|
||||
for (const char* role : {"user", "system", "developer", "function", "assistant", "tool"}) {
|
||||
expect_norm_string((std::string("json_array_stays_text:") + role).c_str(), role, ingredients, ingredients);
|
||||
}
|
||||
|
||||
// #10524 - user/system/developer JSON-object text stays verbatim (NOT re-dumped).
|
||||
expect_norm_string("user_json_object_verbatim", "user", R"({"a":1})", R"({"a":1})");
|
||||
expect_norm_string("system_json_object_verbatim", "system", R"({"a":1})", R"({"a":1})");
|
||||
expect_norm_string("developer_json_object_verbatim", "developer", R"({"a":1})", R"({"a":1})");
|
||||
|
||||
// Plain text unchanged for all roles.
|
||||
expect_norm_string("user_plain_text", "user", "hello world", "hello world");
|
||||
expect_norm_string("assistant_non_json_text_kept", "assistant", "hi [unclosed", "hi [unclosed");
|
||||
|
||||
// #7324 boundary - user/system/developer literal "null" preserved (never parsed).
|
||||
expect_norm_string("user_literal_null_stays", "user", "null", "null");
|
||||
expect_norm_string("system_literal_null_stays", "system", "null", "null");
|
||||
expect_norm_string("developer_literal_null_stays", "developer", "null", "null");
|
||||
|
||||
// #7324 - assistant/tool literal null collapses to empty string.
|
||||
expect_norm_string("assistant_null_to_empty", "assistant", "null", "");
|
||||
expect_norm_string("tool_null_to_empty", "tool", "null", "");
|
||||
|
||||
// #7324/#7528 - assistant/tool object bookkeeping stringified (stays a string).
|
||||
check(normalize_message_content("assistant", R"({"tool":"x"})").is_string(), "assistant_object_stringified");
|
||||
check(normalize_message_content("tool", R"({"error":"boom"})").is_string(), "tool_object_stringified");
|
||||
|
||||
// #10524-family - a bare scalar that parses as a JSON number stays the string.
|
||||
expect_norm_string("assistant_scalar_number_stays_string", "assistant", "42", "42");
|
||||
|
||||
// baseline - empty content stays empty.
|
||||
expect_norm_string("user_empty_stays_empty", "user", "", "");
|
||||
}
|
||||
|
||||
// ---- normalize_template_message (BEFORE TEMPLATE sanitizer) ---------------
|
||||
|
||||
static void test_template_sanitizer() {
|
||||
// #7528 - a tool message with an ACTUAL array becomes a string.
|
||||
{
|
||||
ordered_json msg = {{"role", "tool"}, {"content", ordered_json::array({{{"type", "text"}, {"text", "r"}}})}};
|
||||
normalize_template_message(msg);
|
||||
check(msg["content"].is_string(), "before_template_tool_array_to_string", "got " + msg["content"].dump());
|
||||
}
|
||||
// #7324 - null content -> "" for any role.
|
||||
{
|
||||
ordered_json msg = {{"role", "assistant"}, {"content", nullptr}};
|
||||
normalize_template_message(msg);
|
||||
check(msg["content"].is_string() && msg["content"] == "", "before_template_null_to_empty");
|
||||
}
|
||||
// object content -> dumped string (would otherwise throw at the template).
|
||||
{
|
||||
ordered_json msg = {{"role", "assistant"}, {"content", {{"x", 1}}}};
|
||||
normalize_template_message(msg);
|
||||
check(msg["content"].is_string(), "before_template_object_to_string", "got " + msg["content"].dump());
|
||||
}
|
||||
// missing content field -> "".
|
||||
{
|
||||
ordered_json msg = {{"role", "user"}};
|
||||
normalize_template_message(msg);
|
||||
check(msg.contains("content") && msg["content"] == "", "before_template_missing_to_empty");
|
||||
}
|
||||
// multimodal: a well-typed user array must be left UNTOUCHED (role!=tool).
|
||||
{
|
||||
ordered_json parts = ordered_json::array();
|
||||
parts.push_back({{"type", "text"}, {"text", "x"}});
|
||||
ordered_json img; img["type"] = "image_url"; img["image_url"] = {{"url", "data:..."}};
|
||||
parts.push_back(img);
|
||||
ordered_json msg = {{"role", "user"}, {"content", parts}};
|
||||
normalize_template_message(msg);
|
||||
check(msg["content"].is_array() && msg["content"].size() == 2, "before_template_user_typed_array_preserved",
|
||||
"got " + msg["content"].dump());
|
||||
}
|
||||
// a plain string is left untouched.
|
||||
{
|
||||
ordered_json msg = {{"role", "user"}, {"content", "hello"}};
|
||||
normalize_template_message(msg);
|
||||
check(msg["content"] == "hello", "before_template_string_untouched");
|
||||
}
|
||||
}
|
||||
|
||||
// ---- build_reconstructed_message ----------------------------------------
|
||||
|
||||
static void test_reconstruction() {
|
||||
const std::string ingredients = R"(["1/4 cup brown sugar","1 pound ground beef"])";
|
||||
|
||||
// #10524 end-state - user JSON-array text, no media -> string content.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "user"; in.content = ingredients;
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"].is_string() && m["content"] == ingredients, "recon_user_json_array_string",
|
||||
"got " + m["content"].dump());
|
||||
}
|
||||
// multimodal - user text + one image on last user msg -> typed array, image kept.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "user"; in.content = ingredients; in.is_last_user_msg = true;
|
||||
in.images.push_back("BASE64IMG");
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"].is_array() && m["content"].size() == 2, "recon_multimodal_text_plus_image",
|
||||
"got " + m["content"].dump());
|
||||
check(m["content"][0]["type"] == "text" && m["content"][0]["text"] == ingredients, "recon_multimodal_text_first");
|
||||
check(m["content"][1]["type"] == "image_url", "recon_multimodal_image_kept");
|
||||
}
|
||||
// multimodal media-only - empty text + image on last user msg.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "user"; in.content = ""; in.is_last_user_msg = true;
|
||||
in.images.push_back("BASE64IMG");
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"].is_array() && m["content"].size() == 1 && m["content"][0]["type"] == "image_url",
|
||||
"recon_media_only", "got " + m["content"].dump());
|
||||
}
|
||||
// #7528 - tool array-string content stays a string.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "tool"; in.content = R"(["a","b"])"; in.tool_call_id = "call_1";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"].is_string() && m["content"] == R"(["a","b"])", "recon_tool_array_string",
|
||||
"got " + m["content"].dump());
|
||||
check(m["tool_call_id"] == "call_1", "recon_tool_call_id_set");
|
||||
}
|
||||
// tool empty content -> "".
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "tool"; in.content = "";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"].is_string() && m["content"] == "", "recon_tool_empty_to_string");
|
||||
}
|
||||
// #7324 - assistant + tool_calls + empty content -> " " (single space, not "").
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "assistant"; in.content = "";
|
||||
in.tool_calls = R"([{"id":"c1","type":"function","function":{"name":"f","arguments":"{}"}}])";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"].is_string() && m["content"] == " ", "recon_toolcalls_empty_content_space",
|
||||
"got " + m["content"].dump());
|
||||
check(m["tool_calls"].is_array() && m["tool_calls"].size() == 1, "recon_toolcalls_parsed");
|
||||
}
|
||||
// assistant + tool_calls + real content keeps the content.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "assistant"; in.content = "I'll call f";
|
||||
in.tool_calls = R"([{"id":"c1","type":"function","function":{"name":"f","arguments":"{}"}}])";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"] == "I'll call f", "recon_toolcalls_with_content_kept");
|
||||
}
|
||||
// assistant null content -> "".
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "assistant"; in.content = "null";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"] == "", "recon_assistant_null_to_empty");
|
||||
}
|
||||
// malformed tool_calls JSON must not throw; content preserved.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "assistant"; in.content = "hi"; in.tool_calls = "{not json";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["content"] == "hi" && !m.contains("tool_calls"), "recon_malformed_toolcalls_safe");
|
||||
}
|
||||
// optional fields: name + reasoning carried through.
|
||||
{
|
||||
ReconstructedMessageInput in;
|
||||
in.role = "tool"; in.content = "result"; in.name = "get_weather"; in.reasoning_content = "thinking";
|
||||
auto m = build_reconstructed_message(in);
|
||||
check(m["name"] == "get_weather" && m["reasoning_content"] == "thinking", "recon_optional_fields");
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
test_normalize();
|
||||
test_template_sanitizer();
|
||||
test_reconstruction();
|
||||
|
||||
if (failures == 0) {
|
||||
std::cout << "OK: all message_content tests passed\n";
|
||||
return 0;
|
||||
}
|
||||
std::cerr << failures << " test(s) failed\n";
|
||||
return 1;
|
||||
}
|
||||
@@ -36,6 +36,10 @@ done
|
||||
|
||||
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
||||
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
||||
# Shared message-reconstruction helpers (included by grpc-server.cpp) and their
|
||||
# unit test (compiled only when -DLLAMA_GRPC_BUILD_TESTS=ON).
|
||||
cp -r message_content.h llama.cpp/tools/grpc-server/
|
||||
cp -r message_content_test.cpp llama.cpp/tools/grpc-server/
|
||||
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
|
||||
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
|
||||
|
||||
|
||||
71
backend/cpp/run-unit-tests.sh
Executable file
71
backend/cpp/run-unit-tests.sh
Executable file
@@ -0,0 +1,71 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Discovers and runs every standalone C++ unit test under backend/cpp/.
|
||||
#
|
||||
# A "standalone" unit test is a *_test.cpp that depends only on the C++ standard
|
||||
# library and nlohmann/json (single header) - i.e. it exercises pure helpers and
|
||||
# does not need the full llama.cpp + gRPC backend build. Tests that DO need the
|
||||
# backend build use the CMake/ctest path (e.g. -DLLAMA_GRPC_BUILD_TESTS=ON)
|
||||
# instead and are skipped here.
|
||||
#
|
||||
# This keeps CI generic: adding a new pure-C++ unit test file named *_test.cpp in
|
||||
# an active backend source dir is picked up automatically, with no CI edits.
|
||||
#
|
||||
# Env:
|
||||
# NLOHMANN_INCLUDE include dir that contains nlohmann/json.hpp. If unset, the
|
||||
# nlohmann/json single header is fetched to a temp dir.
|
||||
# CXX compiler (default: g++).
|
||||
# JSON_VERSION nlohmann/json tag to fetch when NLOHMANN_INCLUDE is unset
|
||||
# (default: v3.11.3).
|
||||
set -uo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "$0")" && pwd)"
|
||||
CXX="${CXX:-g++}"
|
||||
JSON_VERSION="${JSON_VERSION:-v3.11.3}"
|
||||
|
||||
JSON_INC="${NLOHMANN_INCLUDE:-}"
|
||||
if [ -z "$JSON_INC" ]; then
|
||||
JSON_INC="$(mktemp -d)"
|
||||
mkdir -p "$JSON_INC/nlohmann"
|
||||
echo "Fetching nlohmann/json ${JSON_VERSION} single header..."
|
||||
if ! curl -L -sf \
|
||||
"https://raw.githubusercontent.com/nlohmann/json/${JSON_VERSION}/single_include/nlohmann/json.hpp" \
|
||||
-o "$JSON_INC/nlohmann/json.hpp"; then
|
||||
echo "ERROR: failed to fetch nlohmann/json header" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Active source dirs only - exclude per-variant build copies, dev snapshots and
|
||||
# the vendored upstream llama.cpp tree.
|
||||
mapfile -t tests < <(find "$ROOT" -name '*_test.cpp' \
|
||||
-not -path '*/llama.cpp/*' \
|
||||
-not -path '*-build/*' \
|
||||
-not -path '*-dev/*' \
|
||||
-not -path '*fallback*' | sort)
|
||||
|
||||
if [ "${#tests[@]}" -eq 0 ]; then
|
||||
echo "No standalone C++ unit tests found under $ROOT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
fail=0
|
||||
for test_src in "${tests[@]}"; do
|
||||
name="$(basename "$test_src" .cpp)"
|
||||
bin="$(mktemp -d)/$name"
|
||||
echo "==> $test_src"
|
||||
if ! "$CXX" -std=c++17 -Wall -Wextra \
|
||||
-I"$JSON_INC" -I"$(dirname "$test_src")" \
|
||||
"$test_src" -o "$bin"; then
|
||||
echo "COMPILE FAILED: $test_src" >&2
|
||||
fail=1
|
||||
continue
|
||||
fi
|
||||
if ! "$bin"; then
|
||||
echo "TEST FAILED: $test_src" >&2
|
||||
fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Ran ${#tests[@]} standalone C++ unit test file(s)"
|
||||
exit "$fail"
|
||||
@@ -1,6 +1,6 @@
|
||||
# parakeet-cpp backend Makefile.
|
||||
#
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
||||
# Upstream pin lives below as PARAKEET_VERSION?=f469a57270a1cc4554acb15febf60e56619673b9
|
||||
# (.github/bump_deps.sh) can find and update it - matches the
|
||||
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
||||
#
|
||||
@@ -15,7 +15,7 @@
|
||||
# That's what the L0 smoke test uses. The default target below does the
|
||||
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
||||
|
||||
PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
||||
PARAKEET_VERSION?=f469a57270a1cc4554acb15febf60e56619673b9
|
||||
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
||||
|
||||
GOCMD?=go
|
||||
|
||||
@@ -7,3 +7,7 @@ setuptools
|
||||
six
|
||||
scipy
|
||||
numpy
|
||||
# fish-speech is installed editable with --no-build-isolation, so the build
|
||||
# backends of its transitive deps must already be in the venv. One of them
|
||||
# builds a Rust extension and needs setuptools-rust present at metadata time.
|
||||
setuptools-rust
|
||||
|
||||
@@ -11,14 +11,31 @@ fi
|
||||
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade "
|
||||
installRequirements
|
||||
|
||||
# Fetch convert_hf_to_gguf.py from llama.cpp
|
||||
# Fetch convert_hf_to_gguf.py from llama.cpp.
|
||||
# Upstream split the model-specific logic out of the single file into a
|
||||
# sibling `conversion/` package (convert_hf_to_gguf.py now does
|
||||
# `from conversion import ...`), so a single-file download no longer runs —
|
||||
# it fails with `ModuleNotFoundError: No module named 'conversion'`. We clone
|
||||
# the repo and copy both the script and the package; Python puts the script's
|
||||
# own directory on sys.path[0], so the package resolves when placed beside it.
|
||||
LLAMA_CPP_CONVERT_VERSION="${LLAMA_CPP_CONVERT_VERSION:-master}"
|
||||
LLAMA_CPP_SRC="${EDIR}/llama.cpp"
|
||||
CONVERT_SCRIPT="${EDIR}/convert_hf_to_gguf.py"
|
||||
if [ ! -f "${CONVERT_SCRIPT}" ]; then
|
||||
echo "Downloading convert_hf_to_gguf.py from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
||||
curl -L --fail --retry 3 \
|
||||
"https://raw.githubusercontent.com/ggml-org/llama.cpp/${LLAMA_CPP_CONVERT_VERSION}/convert_hf_to_gguf.py" \
|
||||
-o "${CONVERT_SCRIPT}" || echo "Warning: Failed to download convert_hf_to_gguf.py."
|
||||
|
||||
cloneLlamaCpp() {
|
||||
if [ ! -d "${LLAMA_CPP_SRC}/.git" ]; then
|
||||
git clone --depth 1 --branch "${LLAMA_CPP_CONVERT_VERSION}" \
|
||||
https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}" 2>/dev/null || \
|
||||
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}"
|
||||
fi
|
||||
}
|
||||
|
||||
if [ ! -f "${CONVERT_SCRIPT}" ] || [ ! -d "${EDIR}/conversion" ]; then
|
||||
echo "Fetching convert_hf_to_gguf.py + conversion/ from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
||||
cloneLlamaCpp
|
||||
cp "${LLAMA_CPP_SRC}/convert_hf_to_gguf.py" "${CONVERT_SCRIPT}"
|
||||
rm -rf "${EDIR}/conversion"
|
||||
cp -r "${LLAMA_CPP_SRC}/conversion" "${EDIR}/conversion"
|
||||
fi
|
||||
|
||||
# Install gguf package from the same llama.cpp commit to keep them in sync
|
||||
@@ -41,12 +58,7 @@ QUANTIZE_BIN="${EDIR}/llama-quantize"
|
||||
if [ ! -x "${QUANTIZE_BIN}" ] && ! command -v llama-quantize &>/dev/null; then
|
||||
if command -v cmake &>/dev/null; then
|
||||
echo "Building llama-quantize from llama.cpp (${LLAMA_CPP_CONVERT_VERSION})..."
|
||||
LLAMA_CPP_SRC="${EDIR}/llama.cpp"
|
||||
if [ ! -d "${LLAMA_CPP_SRC}" ]; then
|
||||
git clone --depth 1 --branch "${LLAMA_CPP_CONVERT_VERSION}" \
|
||||
https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}" 2>/dev/null || \
|
||||
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git "${LLAMA_CPP_SRC}"
|
||||
fi
|
||||
cloneLlamaCpp # reuses the clone fetched for convert_hf_to_gguf.py
|
||||
cmake -B "${LLAMA_CPP_SRC}/build" -S "${LLAMA_CPP_SRC}" -DGGML_NATIVE=OFF -DBUILD_SHARED_LIBS=OFF
|
||||
cmake --build "${LLAMA_CPP_SRC}/build" --target llama-quantize -j"$(nproc 2>/dev/null || echo 2)"
|
||||
cp "${LLAMA_CPP_SRC}/build/bin/llama-quantize" "${QUANTIZE_BIN}"
|
||||
|
||||
@@ -85,9 +85,15 @@ if [ "x${BUILD_TYPE}" == "x" ] || [ "x${FROM_SOURCE:-}" == "xtrue" ]; then
|
||||
# The resulting binary still requires an AVX-512 capable CPU at runtime,
|
||||
# same constraint sglang upstream documents in docker/xeon.Dockerfile.
|
||||
|
||||
# Pin the source build to the same release the GPU path floors on
|
||||
# (0.5.11, see requirements-cublas12-after.txt). An unpinned master clone
|
||||
# pulls in newer CPU kernels (e.g. mamba/fla.cpp) that fail to compile
|
||||
# (constexpr non-constant + kineto_LIBRARY-NOTFOUND). Bump deliberately.
|
||||
SGLANG_VERSION="${SGLANG_VERSION:-v0.5.11}"
|
||||
_sgl_src=$(mktemp -d)
|
||||
trap 'rm -rf "${_sgl_src}"' EXIT
|
||||
git clone --depth 1 https://github.com/sgl-project/sglang "${_sgl_src}/sglang"
|
||||
git clone --depth 1 --branch "${SGLANG_VERSION}" \
|
||||
https://github.com/sgl-project/sglang "${_sgl_src}/sglang"
|
||||
|
||||
# Patch -march=native → -march=sapphirerapids in the CPU kernel CMakeLists
|
||||
sed -i 's/-march=native/-march=sapphirerapids/g' \
|
||||
|
||||
@@ -570,6 +570,43 @@ impl Backend for KokorosService {
|
||||
) -> Result<Response<backend::Result>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
|
||||
async fn sound_detection(
|
||||
&self,
|
||||
_: Request<backend::SoundDetectionRequest>,
|
||||
) -> Result<Response<backend::SoundDetectionResponse>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
|
||||
async fn depth(
|
||||
&self,
|
||||
_: Request<backend::DepthRequest>,
|
||||
) -> Result<Response<backend::DepthResponse>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
|
||||
async fn token_classify(
|
||||
&self,
|
||||
_: Request<backend::TokenClassifyRequest>,
|
||||
) -> Result<Response<backend::TokenClassifyResponse>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
|
||||
async fn score(
|
||||
&self,
|
||||
_: Request<backend::ScoreRequest>,
|
||||
) -> Result<Response<backend::ScoreResponse>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
|
||||
type ForwardStream = ReceiverStream<Result<backend::ForwardReply, Status>>;
|
||||
|
||||
async fn forward(
|
||||
&self,
|
||||
_: Request<tonic::Streaming<backend::ForwardRequest>>,
|
||||
) -> Result<Response<Self::ForwardStream>, Status> {
|
||||
Err(Status::unimplemented("Not supported"))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -16,6 +16,7 @@ import (
|
||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||
"github.com/mudler/LocalAI/core/services/jobs"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/core/services/modeladmin"
|
||||
"github.com/mudler/LocalAI/core/services/monitoring"
|
||||
"github.com/mudler/LocalAI/core/services/nodes"
|
||||
"github.com/mudler/LocalAI/core/services/routing/admission"
|
||||
@@ -330,9 +331,14 @@ func New(opts ...config.AppOption) (*Application, error) {
|
||||
gs := application.galleryService
|
||||
sys := options.SystemState
|
||||
cfgLoaderOpts := options.ToConfigLoaderOptions()
|
||||
gs.OnModelsChanged = func(_ messaging.CacheInvalidateEvent) {
|
||||
if err := application.ModelConfigLoader().LoadModelConfigsFromPath(sys.Model.ModelsPath, cfgLoaderOpts...); err != nil {
|
||||
xlog.Warn("Failed to reload model configs after peer invalidation", "error", err)
|
||||
gs.OnModelsChanged = func(evt messaging.CacheInvalidateEvent) {
|
||||
// ApplyRemoteChange honors the op: a "delete" prunes the element
|
||||
// (a reload-from-path is additive and cannot drop it), anything
|
||||
// else reloads from disk; a named element's running instance is
|
||||
// shut down so the new config takes effect. The originating
|
||||
// replica reloads inline and never depends on this path.
|
||||
if err := modeladmin.ApplyRemoteChange(application.ModelConfigLoader(), application.modelLoader, sys.Model.ModelsPath, evt, cfgLoaderOpts...); err != nil {
|
||||
xlog.Warn("Failed to apply peer model config change", "error", err)
|
||||
}
|
||||
}
|
||||
if err := application.galleryService.SubscribeBroadcasts(); err != nil {
|
||||
|
||||
@@ -155,7 +155,7 @@ func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, a
|
||||
// @Param name path string true "Model name"
|
||||
// @Success 200 {object} map[string]any "success message"
|
||||
// @Router /api/models/config-json/{name} [patch]
|
||||
func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
svc := modeladmin.NewConfigService(cl, appConfig)
|
||||
return func(c echo.Context) error {
|
||||
modelName := c.Param("name")
|
||||
@@ -173,6 +173,14 @@ func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, app
|
||||
if _, err := svc.PatchConfig(c.Request().Context(), modelName, patchMap); err != nil {
|
||||
return c.JSON(httpStatusForModelAdminError(err), map[string]any{"error": err.Error()})
|
||||
}
|
||||
|
||||
// Patch rewrites the config on disk and reloads only the local loader;
|
||||
// tell peers to refresh so the change is consistent across replicas.
|
||||
// No-op in standalone mode.
|
||||
if gs != nil {
|
||||
gs.BroadcastModelsChanged(modelName, "install")
|
||||
}
|
||||
|
||||
return c.JSON(http.StatusOK, map[string]any{
|
||||
"success": true,
|
||||
"message": fmt.Sprintf("Model '%s' updated successfully", modelName),
|
||||
|
||||
@@ -45,7 +45,7 @@ var _ = Describe("Config Metadata Endpoints", func() {
|
||||
app = echo.New()
|
||||
app.GET("/api/models/config-metadata", ConfigMetadataEndpoint())
|
||||
app.GET("/api/models/config-metadata/autocomplete/:provider", AutocompleteEndpoint(configLoader, modelLoader, appConfig))
|
||||
app.PATCH("/api/models/config-json/:name", PatchConfigEndpoint(configLoader, modelLoader, appConfig))
|
||||
app.PATCH("/api/models/config-json/:name", PatchConfigEndpoint(configLoader, modelLoader, nil, appConfig))
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
httpUtils "github.com/mudler/LocalAI/core/http/middleware"
|
||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||
"github.com/mudler/LocalAI/core/services/modeladmin"
|
||||
"github.com/mudler/LocalAI/internal"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
@@ -55,7 +56,7 @@ func GetEditModelPage(cl *config.ModelConfigLoader, appConfig *config.Applicatio
|
||||
}
|
||||
|
||||
// EditModelEndpoint handles updating existing model configurations
|
||||
func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
svc := modeladmin.NewConfigService(cl, appConfig)
|
||||
return func(c echo.Context) error {
|
||||
modelName := c.Param("name")
|
||||
@@ -70,6 +71,17 @@ func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appC
|
||||
if err != nil {
|
||||
return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
|
||||
}
|
||||
|
||||
// Tell peer replicas to refresh their in-memory config: this endpoint
|
||||
// only reloaded the local loader. A rename is a delete of the old name
|
||||
// plus an install of the new one. No-op in standalone mode.
|
||||
if gs != nil {
|
||||
if result.Renamed {
|
||||
gs.BroadcastModelsChanged(result.OldName, "delete")
|
||||
}
|
||||
gs.BroadcastModelsChanged(result.NewName, "install")
|
||||
}
|
||||
|
||||
msg := fmt.Sprintf("Model '%s' updated successfully. Model has been reloaded with new configuration.", result.NewName)
|
||||
if result.Renamed {
|
||||
msg = fmt.Sprintf("Model '%s' renamed to '%s' and updated successfully.", result.OldName, result.NewName)
|
||||
|
||||
@@ -56,7 +56,7 @@ var _ = Describe("Edit Model test", func() {
|
||||
app := echo.New()
|
||||
// Set up a simple renderer for the test
|
||||
app.Renderer = &testRenderer{}
|
||||
app.POST("/import-model", ImportModelEndpoint(modelConfigLoader, applicationConfig))
|
||||
app.POST("/import-model", ImportModelEndpoint(modelConfigLoader, nil, applicationConfig))
|
||||
app.GET("/edit-model/:name", GetEditModelPage(modelConfigLoader, applicationConfig))
|
||||
|
||||
requestBody := bytes.NewBufferString(`{"name": "foo", "backend": "foo", "model": "foo"}`)
|
||||
@@ -106,7 +106,7 @@ var _ = Describe("Edit Model test", func() {
|
||||
Expect(exists).To(BeTrue())
|
||||
|
||||
app := echo.New()
|
||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))
|
||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
|
||||
|
||||
newYAML := "name: newname\nbackend: llama\nmodel: foo\n"
|
||||
req := httptest.NewRequest("POST", "/models/edit/oldname", bytes.NewBufferString(newYAML))
|
||||
@@ -163,7 +163,7 @@ var _ = Describe("Edit Model test", func() {
|
||||
Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
||||
|
||||
app := echo.New()
|
||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))
|
||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
|
||||
|
||||
req := httptest.NewRequest(
|
||||
"POST",
|
||||
@@ -204,7 +204,7 @@ var _ = Describe("Edit Model test", func() {
|
||||
Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
||||
|
||||
app := echo.New()
|
||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))
|
||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
|
||||
|
||||
req := httptest.NewRequest(
|
||||
"POST",
|
||||
|
||||
@@ -125,7 +125,7 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
|
||||
}
|
||||
|
||||
// ImportModelEndpoint handles creating new model configurations
|
||||
func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
func ImportModelEndpoint(cl *config.ModelConfigLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
return func(c echo.Context) error {
|
||||
// Get the raw body
|
||||
body, err := io.ReadAll(c.Request().Body)
|
||||
@@ -245,6 +245,13 @@ func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applica
|
||||
}
|
||||
return c.JSON(http.StatusInternalServerError, response)
|
||||
}
|
||||
// Tell peer replicas to load the newly-created config from the shared
|
||||
// models dir: this endpoint only reloaded the local loader. No-op in
|
||||
// standalone mode.
|
||||
if gs != nil {
|
||||
gs.BroadcastModelsChanged(modelConfig.Name, "install")
|
||||
}
|
||||
|
||||
// Return success response
|
||||
response := ModelResponse{
|
||||
Success: true,
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
|
||||
"github.com/labstack/echo/v4"
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||
"github.com/mudler/LocalAI/core/services/modeladmin"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
)
|
||||
@@ -24,7 +25,7 @@ import (
|
||||
// @Failure 404 {object} ModelResponse
|
||||
// @Failure 500 {object} ModelResponse
|
||||
// @Router /api/models/{name}/{action} [put]
|
||||
func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||
svc := modeladmin.NewConfigService(cl, appConfig)
|
||||
return func(c echo.Context) error {
|
||||
modelName := c.Param("name")
|
||||
@@ -36,6 +37,14 @@ func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoade
|
||||
if err != nil {
|
||||
return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
|
||||
}
|
||||
|
||||
// Enabling/disabling rewrites the config on disk and reloads only the
|
||||
// local loader; tell peers to refresh so the model's availability is
|
||||
// consistent across replicas. No-op in standalone mode.
|
||||
if gs != nil {
|
||||
gs.BroadcastModelsChanged(modelName, "install")
|
||||
}
|
||||
|
||||
msg := fmt.Sprintf("Model '%s' has been %sd successfully.", modelName, action)
|
||||
if action == modeladmin.ActionDisable {
|
||||
msg += " The model will not be loaded on demand until re-enabled."
|
||||
|
||||
@@ -72,19 +72,19 @@ func RegisterLocalAIRoutes(router *echo.Echo,
|
||||
router.POST("/backends/upgrades/check", backendGalleryEndpointService.CheckUpgradesEndpoint(), adminMiddleware)
|
||||
router.POST("/backends/upgrade/:name", backendGalleryEndpointService.UpgradeBackendEndpoint(), adminMiddleware)
|
||||
// Custom model import endpoint
|
||||
router.POST("/models/import", localai.ImportModelEndpoint(cl, appConfig), adminMiddleware)
|
||||
router.POST("/models/import", localai.ImportModelEndpoint(cl, galleryService, appConfig), adminMiddleware)
|
||||
|
||||
// URI model import endpoint
|
||||
router.POST("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache), adminMiddleware)
|
||||
|
||||
// Custom model edit endpoint
|
||||
router.POST("/models/edit/:name", localai.EditModelEndpoint(cl, ml, appConfig), adminMiddleware)
|
||||
router.POST("/models/edit/:name", localai.EditModelEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
|
||||
|
||||
// List model aliases endpoint
|
||||
router.GET("/api/aliases", localai.ListAliasesEndpoint(cl), adminMiddleware)
|
||||
|
||||
// Toggle model enable/disable endpoint
|
||||
router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, appConfig), adminMiddleware)
|
||||
router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
|
||||
|
||||
// Toggle model pinned status endpoint
|
||||
router.PUT("/models/toggle-pinned/:name/:action", localai.TogglePinnedModelEndpoint(cl, appConfig, func() {
|
||||
|
||||
@@ -922,7 +922,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
||||
app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware)
|
||||
|
||||
// PATCH config endpoint - partial update using nested JSON merge
|
||||
app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, appConfig), adminMiddleware)
|
||||
app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
|
||||
|
||||
// VRAM estimation endpoint
|
||||
app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware)
|
||||
|
||||
@@ -68,6 +68,32 @@ var _ = Describe("LLM tests", func() {
|
||||
Expect(protoMessages[0].Content).To(Equal("Hello World"))
|
||||
})
|
||||
|
||||
// Regression for mudler/LocalAI#10524: a text part whose inner text is
|
||||
// itself a JSON-array string (mealie sends an ingredient list) must
|
||||
// flatten to that exact string verbatim. ToProto must NOT escape or
|
||||
// restructure it - the C++ backend then treats it as opaque text. This
|
||||
// pins the precise Go-side input that produced the "unsupported
|
||||
// content[].type" gRPC error before the backend stopped re-parsing it.
|
||||
It("flattens a JSON-array-looking text part to the verbatim string (#10524)", func() {
|
||||
ingredients := `["1/4 cup brown sugar, packed","1 pound ground beef"]`
|
||||
messages := Messages{
|
||||
{
|
||||
Role: "user",
|
||||
Content: []any{
|
||||
map[string]any{
|
||||
"type": "text",
|
||||
"text": ingredients,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
protoMessages := messages.ToProto()
|
||||
|
||||
Expect(protoMessages).To(HaveLen(1))
|
||||
Expect(protoMessages[0].Content).To(Equal(ingredients))
|
||||
})
|
||||
|
||||
It("should convert message with tool_calls", func() {
|
||||
messages := Messages{
|
||||
{
|
||||
|
||||
@@ -404,6 +404,36 @@ var _ = Describe("GalleryService cache invalidation broadcasts", func() {
|
||||
Element: "x", Op: "install",
|
||||
})).To(Succeed())
|
||||
})
|
||||
|
||||
It("BroadcastModelsChanged delivers the element and op to a peer's OnModelsChanged", func() {
|
||||
var (
|
||||
mu sync.Mutex
|
||||
seen []messaging.CacheInvalidateEvent
|
||||
)
|
||||
svcB.OnModelsChanged = func(evt messaging.CacheInvalidateEvent) {
|
||||
mu.Lock()
|
||||
seen = append(seen, evt)
|
||||
mu.Unlock()
|
||||
}
|
||||
Expect(svcA.SubscribeBroadcasts()).To(Succeed())
|
||||
Expect(svcB.SubscribeBroadcasts()).To(Succeed())
|
||||
|
||||
// An admin edit on replica A must reach replica B over the same subject
|
||||
// the gallery path uses, so B refreshes its in-memory config loader.
|
||||
svcA.BroadcastModelsChanged("my-alias", "install")
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
Expect(seen).To(ContainElement(messaging.CacheInvalidateEvent{
|
||||
Element: "my-alias", Op: "install",
|
||||
}))
|
||||
})
|
||||
|
||||
It("BroadcastModelsChanged is a no-op when NATS is not wired (standalone)", func() {
|
||||
standalone := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
|
||||
// No SetNATSClient: must not panic and must simply do nothing.
|
||||
Expect(func() { standalone.BroadcastModelsChanged("x", "delete") }).ToNot(Panic())
|
||||
})
|
||||
})
|
||||
|
||||
var _ = Describe("GalleryService PostgreSQL hydration", func() {
|
||||
|
||||
@@ -201,6 +201,24 @@ func (g *GalleryService) publishCacheInvalidate(subject string, evt messaging.Ca
|
||||
}
|
||||
}
|
||||
|
||||
// BroadcastModelsChanged notifies peer replicas that a model config was
|
||||
// created, edited, or removed out-of-band of the gallery install/delete
|
||||
// channel (e.g. the admin /models/edit, /models/import and
|
||||
// /models/toggle-state endpoints, which write the YAML and reload only the
|
||||
// local in-memory loader). Peers receive it via OnModelsChanged and refresh
|
||||
// their own ModelConfigLoader so a request load-balanced to any replica sees
|
||||
// the same config. No-op in standalone mode (no NATS client).
|
||||
//
|
||||
// op is "install" for a create/edit (the element must be (re)loaded from
|
||||
// disk) or "delete" for a removal (the element must be pruned from memory,
|
||||
// which a reload-from-path cannot do because the loader is additive).
|
||||
func (g *GalleryService) BroadcastModelsChanged(element, op string) {
|
||||
g.publishCacheInvalidate(messaging.SubjectCacheInvalidateModels, messaging.CacheInvalidateEvent{
|
||||
Element: element,
|
||||
Op: op,
|
||||
})
|
||||
}
|
||||
|
||||
// mergeStatus is the broadcast-side merge: it updates the in-memory map from
|
||||
// a peer's GalleryProgressEvent without re-publishing to NATS or re-writing
|
||||
// to PostgreSQL. UpdateStatus is the local-write entry point and does both;
|
||||
|
||||
53
core/services/modeladmin/remote_sync.go
Normal file
53
core/services/modeladmin/remote_sync.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package modeladmin
|
||||
|
||||
import (
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
"github.com/mudler/LocalAI/pkg/model"
|
||||
|
||||
"github.com/mudler/xlog"
|
||||
)
|
||||
|
||||
// opDelete is the CacheInvalidateEvent.Op value the gallery delete path and the
|
||||
// admin delete endpoint use; a delete must prune (a reload-from-path cannot).
|
||||
const opDelete = "delete"
|
||||
|
||||
// ApplyRemoteChange refreshes this replica's in-memory model state from a peer
|
||||
// replica's model-config change broadcast (messaging.CacheInvalidateEvent on
|
||||
// SubjectCacheInvalidateModels). It is the subscriber-side counterpart to
|
||||
// GalleryService.BroadcastModelsChanged.
|
||||
//
|
||||
// The op matters because LoadModelConfigsFromPath is additive: it loads every
|
||||
// YAML on disk into the loader but never removes an entry whose file is gone.
|
||||
// So a delete cannot be propagated by a plain reload - the deleted element must
|
||||
// be explicitly pruned. Specifically:
|
||||
//
|
||||
// - op == "delete" with a named element: prune that element from the loader.
|
||||
// - otherwise: reload all configs from disk (picks up creates and edits).
|
||||
//
|
||||
// In both cases, when an element is named, any running instance on this replica
|
||||
// is shut down (best-effort) so the next request rebuilds it from the new
|
||||
// config instead of serving the stale one - mirroring what the originating
|
||||
// replica does on a local edit/delete.
|
||||
//
|
||||
// ml may be nil (no running instances to shut down). modelsPath and opts are
|
||||
// forwarded to LoadModelConfigsFromPath.
|
||||
func ApplyRemoteChange(cl *config.ModelConfigLoader, ml *model.ModelLoader, modelsPath string, evt messaging.CacheInvalidateEvent, opts ...config.ConfigLoaderOption) error {
|
||||
if evt.Op == opDelete && evt.Element != "" {
|
||||
cl.RemoveModelConfig(evt.Element)
|
||||
} else if err := cl.LoadModelConfigsFromPath(modelsPath, opts...); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Drop any running instance of the affected model so the next request
|
||||
// rebuilds it from the refreshed config instead of serving the stale one.
|
||||
// Best-effort: the model may not be loaded on this replica, which surfaces
|
||||
// as a benign error here.
|
||||
if ml != nil && evt.Element != "" {
|
||||
if err := ml.ShutdownModel(evt.Element); err != nil {
|
||||
xlog.Debug("ApplyRemoteChange: could not shut down model instance (likely not loaded)",
|
||||
"model", evt.Element, "error", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
80
core/services/modeladmin/remote_sync_test.go
Normal file
80
core/services/modeladmin/remote_sync_test.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package modeladmin
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
"gopkg.in/yaml.v3"
|
||||
|
||||
"github.com/mudler/LocalAI/core/config"
|
||||
"github.com/mudler/LocalAI/core/services/messaging"
|
||||
)
|
||||
|
||||
var _ = Describe("ApplyRemoteChange", func() {
|
||||
var (
|
||||
dir string
|
||||
loader *config.ModelConfigLoader
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
dir = GinkgoT().TempDir()
|
||||
loader = config.NewModelConfigLoader(dir)
|
||||
})
|
||||
|
||||
writeYAML := func(name string, body map[string]any) {
|
||||
body["name"] = name
|
||||
data, err := yaml.Marshal(body)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(os.WriteFile(filepath.Join(dir, name+".yaml"), data, 0644)).To(Succeed())
|
||||
}
|
||||
|
||||
It("loads a peer-created config from disk on an install event", func() {
|
||||
// Peer wrote the YAML to the shared models dir; this replica has not
|
||||
// loaded it yet (empty in-memory loader).
|
||||
writeYAML("peer-alias", map[string]any{"alias": "qwen"})
|
||||
_, ok := loader.GetModelConfig("peer-alias")
|
||||
Expect(ok).To(BeFalse(), "precondition: not yet in memory")
|
||||
|
||||
err := ApplyRemoteChange(loader, nil, dir, messaging.CacheInvalidateEvent{
|
||||
Element: "peer-alias", Op: "install",
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, ok = loader.GetModelConfig("peer-alias")
|
||||
Expect(ok).To(BeTrue(), "install event must reload the new config from disk")
|
||||
})
|
||||
|
||||
It("prunes a peer-deleted config that a reload-from-path cannot drop", func() {
|
||||
// Model is present in memory (loaded earlier) but its file is now gone
|
||||
// from the shared dir. LoadModelConfigsFromPath is additive, so only an
|
||||
// explicit prune can remove it - this is the cross-replica delete bug.
|
||||
writeYAML("doomed", map[string]any{"alias": "qwen"})
|
||||
Expect(loader.LoadModelConfigsFromPath(dir)).To(Succeed())
|
||||
_, ok := loader.GetModelConfig("doomed")
|
||||
Expect(ok).To(BeTrue(), "precondition: in memory")
|
||||
Expect(os.Remove(filepath.Join(dir, "doomed.yaml"))).To(Succeed())
|
||||
|
||||
err := ApplyRemoteChange(loader, nil, dir, messaging.CacheInvalidateEvent{
|
||||
Element: "doomed", Op: "delete",
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, ok = loader.GetModelConfig("doomed")
|
||||
Expect(ok).To(BeFalse(), "delete event must prune the element from memory")
|
||||
})
|
||||
|
||||
It("does a full reload when no element is named", func() {
|
||||
writeYAML("m1", map[string]any{"alias": "qwen"})
|
||||
writeYAML("m2", map[string]any{"alias": "qwen"})
|
||||
|
||||
err := ApplyRemoteChange(loader, nil, dir, messaging.CacheInvalidateEvent{})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
_, ok1 := loader.GetModelConfig("m1")
|
||||
_, ok2 := loader.GetModelConfig("m2")
|
||||
Expect(ok1).To(BeTrue())
|
||||
Expect(ok2).To(BeTrue())
|
||||
})
|
||||
})
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "v4.5.0"
|
||||
"version": "v4.5.2"
|
||||
}
|
||||
|
||||
@@ -221,6 +221,54 @@
|
||||
- filename: llama-cpp/models/Qwen3.6-35B-A3B-NVFP4-MTP-GGUF/Qwen3.6-35B-A3B-NVFP4-MTP-TURBO.gguf
|
||||
sha256: f3d2fdc74e3ef19925ccbf794b04d7f6f11fb12eba7722b7749219d0cc5c36ed
|
||||
uri: https://huggingface.co/michaelw9999/Qwen3.6-35B-A3B-NVFP4-MTP-GGUF/resolve/main/Qwen3.6-35B-A3B-NVFP4-MTP-TURBO.gguf
|
||||
- name: "qwen-agentworld-35b-a3b"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
- https://huggingface.co/unsloth/Qwen-AgentWorld-35B-A3B-GGUF
|
||||
description: |
|
||||
# Qwen-AgentWorld-35B-A3B
|
||||
|
||||
📑 Technical Report |
|
||||
📖 Blog |
|
||||
🤗 Hugging Face |
|
||||
🤖 ModelScope |
|
||||
💻 GitHub |
|
||||
🖥️ Demo
|
||||
|
||||
> [!Note]
|
||||
> This repository contains the model weights and configuration files for **Qwen-AgentWorld-35B-A3B**, a native language world model trained for agentic environment simulation.
|
||||
>
|
||||
> These artifacts are compatible with Hugging Face Transformers, vLLM, SGLang, etc.
|
||||
|
||||
**Qwen-AgentWorld** is the first language world model to cover seven agent interaction domains within a single model. It simulates agentic environments via long chain-of-thought reasoning, predicting the next environment state given an agent's action and interaction history. Trained through a three-stage pipeline — CPT injects environment knowledge, SFT activates next-state-prediction reasoning, RL sharpens simulation fidelity — Qwen-AgentWorld is a **native world model**: environment modeling is the training objective from the CPT stage onward, not a post-hoc add-on.
|
||||
|
||||
## Highlights
|
||||
|
||||
...
|
||||
license: "apache-2.0"
|
||||
tags:
|
||||
- llm
|
||||
- gguf
|
||||
- qwen
|
||||
icon: https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen-AgentWorld/logo.png
|
||||
overrides:
|
||||
backend: llama-cpp
|
||||
function:
|
||||
automatic_tool_parsing_fallback: true
|
||||
grammar:
|
||||
disable: true
|
||||
known_usecases:
|
||||
- chat
|
||||
options:
|
||||
- use_jinja:true
|
||||
parameters:
|
||||
model: llama-cpp/models/Qwen-AgentWorld-35B-A3B-GGUF/Qwen-AgentWorld-35B-A3B-UD-Q4_K_M.gguf
|
||||
template:
|
||||
use_tokenizer_template: true
|
||||
files:
|
||||
- filename: llama-cpp/models/Qwen-AgentWorld-35B-A3B-GGUF/Qwen-AgentWorld-35B-A3B-UD-Q4_K_M.gguf
|
||||
sha256: e7a8eafdd8013443b6bcc4b6fb47b2d2025f772d359650b9ceb7d75971e22cad
|
||||
uri: https://huggingface.co/unsloth/Qwen-AgentWorld-35B-A3B-GGUF/resolve/main/Qwen-AgentWorld-35B-A3B-UD-Q4_K_M.gguf
|
||||
- name: "ornith-1.0-9b"
|
||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||
urls:
|
||||
|
||||
@@ -17,9 +17,15 @@ rm -rf "${BACKEND_DIR}"/build-*
|
||||
# run.sh's final `exec $CURDIR/<binary>` is the contract for what gets launched;
|
||||
# the binary is not always named after the backend (e.g. parakeet-cpp launches
|
||||
# parakeet-cpp-grpc), so derive it from run.sh and fall back to ${BACKEND}.
|
||||
#
|
||||
# Only scan the `exec` line(s): many run.sh select a runtime CPU variant via
|
||||
# unquoted `LIBRARY=$CURDIR/libgo<x>-avx512.so` lines, and a whole-file grep
|
||||
# would pick the last of those (avx512, which Darwin never builds) instead of
|
||||
# the binary — failing the check below for whisper/sam3-cpp/vibevoice-cpp/...
|
||||
# Also tolerate the exec being quoted (`exec "$CURDIR"/<binary>`).
|
||||
RUN_BINARY=""
|
||||
if [ -f "${BACKEND_DIR}/run.sh" ]; then
|
||||
RUN_BINARY=$(grep -oE '\$CURDIR/[A-Za-z0-9._-]+' "${BACKEND_DIR}/run.sh" | grep -v 'ld\.so' | tail -1 | sed 's|\$CURDIR/||')
|
||||
RUN_BINARY=$(grep -E '^[[:space:]]*exec[[:space:]]' "${BACKEND_DIR}/run.sh" | grep -oE '"?\$CURDIR"?/[A-Za-z0-9._-]+' | grep -v 'ld\.so' | tail -1 | sed -E 's|"?\$CURDIR"?/||')
|
||||
fi
|
||||
RUN_BINARY="${RUN_BINARY:-${BACKEND}}"
|
||||
|
||||
|
||||
@@ -141,6 +141,38 @@ copy_elf_deps() {
|
||||
done < <(ldd "$elf" 2>/dev/null | awk '/=>/ && $3 ~ /^\// {print $3}')
|
||||
}
|
||||
|
||||
# Sweep the transitive shared-library dependencies of everything already
|
||||
# bundled in a lib dir. The per-vendor packagers below copy an explicit
|
||||
# allowlist of top-level runtime libs, but those libs pull in transitive deps
|
||||
# that aren't in the list (e.g. ROCm's librocprofiler-register.so.0, libnuma,
|
||||
# libdrm_amdgpu). Because backends run through the bundled lib/ld.so with
|
||||
# LD_LIBRARY_PATH=lib (see run.sh), an unbundled transitive dep is a hard load
|
||||
# failure (issue #10537: "librocprofiler-register.so.0: cannot open shared
|
||||
# object file"). ldd resolves the full recursive closure, so a single pass over
|
||||
# the already-bundled libs is enough; core libc-family deps are skipped via
|
||||
# copy_elf_deps/is_core_lib so we never shadow the loader's own libc/libstdc++.
|
||||
sweep_transitive_deps() {
|
||||
local dir="${1:-$TARGET_LIB_DIR}"
|
||||
command -v ldd >/dev/null 2>&1 || return 0
|
||||
|
||||
# Snapshot the current set first: copy_elf_deps adds files as it runs, and
|
||||
# ldd already returns the full recursive closure, so we only need to sweep
|
||||
# the libs that were present before the sweep started.
|
||||
# `local x=$(...)` keeps set -e from tripping on shopt -p's nonzero exit.
|
||||
local old_nullglob=$(shopt -p nullglob)
|
||||
shopt -s nullglob
|
||||
local libs=("$dir"/*.so*)
|
||||
eval "$old_nullglob"
|
||||
|
||||
local lib
|
||||
for lib in "${libs[@]}"; do
|
||||
[ -e "$lib" ] || continue
|
||||
# Skip symlinks: their real target is in the snapshot and gets swept.
|
||||
[ -L "$lib" ] && continue
|
||||
copy_elf_deps "$lib"
|
||||
done
|
||||
}
|
||||
|
||||
# Package NVIDIA CUDA libraries
|
||||
package_cuda_libs() {
|
||||
echo "Packaging CUDA libraries for BUILD_TYPE=${BUILD_TYPE}..."
|
||||
@@ -185,6 +217,10 @@ package_cuda_libs() {
|
||||
# cp -arfL /usr/local/cuda/targets "$TARGET_LIB_DIR/../cuda/" 2>/dev/null || true
|
||||
# fi
|
||||
|
||||
# Pull in transitive deps the allowlist misses so the backend is
|
||||
# self-contained (same class of failure as #10537).
|
||||
sweep_transitive_deps "$TARGET_LIB_DIR"
|
||||
|
||||
echo "CUDA libraries packaged successfully"
|
||||
}
|
||||
|
||||
@@ -261,6 +297,10 @@ package_rocm_libs() {
|
||||
fi
|
||||
done
|
||||
|
||||
# Pull in transitive deps the allowlist misses (librocprofiler-register.so.0,
|
||||
# libnuma, libdrm_amdgpu, ...) so the backend is self-contained. See #10537.
|
||||
sweep_transitive_deps "$TARGET_LIB_DIR"
|
||||
|
||||
echo "ROCm libraries packaged successfully"
|
||||
}
|
||||
|
||||
@@ -303,6 +343,10 @@ package_intel_libs() {
|
||||
fi
|
||||
done
|
||||
|
||||
# Pull in transitive deps the allowlist misses so the backend is
|
||||
# self-contained (same class of failure as #10537).
|
||||
sweep_transitive_deps "$TARGET_LIB_DIR"
|
||||
|
||||
echo "Intel oneAPI libraries packaged successfully"
|
||||
}
|
||||
|
||||
@@ -432,6 +476,7 @@ export -f copy_lib
|
||||
export -f copy_libs_glob
|
||||
export -f is_core_lib
|
||||
export -f copy_elf_deps
|
||||
export -f sweep_transitive_deps
|
||||
export -f package_cuda_libs
|
||||
export -f package_rocm_libs
|
||||
export -f package_intel_libs
|
||||
|
||||
54
scripts/build/package-gpu-libs_test.sh
Executable file
54
scripts/build/package-gpu-libs_test.sh
Executable file
@@ -0,0 +1,54 @@
|
||||
#!/bin/bash
|
||||
# Regression test for scripts/build/package-gpu-libs.sh.
|
||||
#
|
||||
# Guards issue #10537: the per-vendor packagers copy an explicit allowlist of
|
||||
# top-level GPU runtime libs but used to miss their transitive dependencies
|
||||
# (e.g. ROCm's librocprofiler-register.so.0). Since backends run through the
|
||||
# bundled lib/ld.so with LD_LIBRARY_PATH=lib, an unbundled transitive dep is a
|
||||
# fatal "cannot open shared object file" at load time.
|
||||
#
|
||||
# This test fabricates a primary lib that links a transitive lib, simulates the
|
||||
# allowlist step (primary copied, transitive not), and asserts the transitive
|
||||
# sweep pulls the dependency in. Requires gcc + ldd (present in build images).
|
||||
set -euo pipefail
|
||||
|
||||
CURDIR=$(dirname "$(realpath "$0")")
|
||||
SCRIPT="$CURDIR/package-gpu-libs.sh"
|
||||
|
||||
if ! command -v gcc >/dev/null 2>&1 || ! command -v ldd >/dev/null 2>&1; then
|
||||
echo "SKIP: gcc/ldd not available"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
WORK=$(mktemp -d)
|
||||
trap 'rm -rf "$WORK"' EXIT
|
||||
|
||||
# Transitive dependency (stand-in for librocprofiler-register.so.0).
|
||||
echo 'int transitive_fn(void){return 42;}' > "$WORK/transitive.c"
|
||||
gcc -shared -fPIC -o "$WORK/libfaketransitive.so.0" "$WORK/transitive.c"
|
||||
|
||||
# Primary allowlisted lib (stand-in for libhipblas.so) that links it.
|
||||
echo 'int transitive_fn(void); int primary_fn(void){return transitive_fn();}' > "$WORK/primary.c"
|
||||
gcc -shared -fPIC -o "$WORK/libfakeprimary.so.0" "$WORK/primary.c" \
|
||||
-L"$WORK" -l:libfaketransitive.so.0 -Wl,-rpath,"$WORK"
|
||||
|
||||
# Simulate the allowlist step: primary already bundled, transitive not.
|
||||
TARGET="$WORK/target"
|
||||
mkdir -p "$TARGET"
|
||||
cp "$WORK/libfakeprimary.so.0" "$TARGET/"
|
||||
|
||||
# Make the transitive dep resolvable like /opt/rocm libs are in the build image.
|
||||
export LD_LIBRARY_PATH="$WORK:${LD_LIBRARY_PATH:-}"
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source "$SCRIPT" "$TARGET"
|
||||
sweep_transitive_deps "$TARGET"
|
||||
|
||||
if [ -e "$TARGET/libfaketransitive.so.0" ]; then
|
||||
echo "PASS: transitive dependency was bundled by sweep_transitive_deps"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "FAIL: transitive dependency was NOT bundled (regression of #10537)"
|
||||
ls -la "$TARGET"
|
||||
exit 1
|
||||
Reference in New Issue
Block a user