mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-27 01:47:18 -04:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
14b29ebf4e | ||
|
|
f0d0bff232 | ||
|
|
64150ca7ab | ||
|
|
f98b0f1c1e | ||
|
|
2c96c2d08e | ||
|
|
f01a969f7b | ||
|
|
56600eec3e | ||
|
|
c4fa256cdf | ||
|
|
17c1fc74b2 | ||
|
|
068d397acf | ||
|
|
5b3572f8b8 |
21
.github/workflows/release.yaml
vendored
21
.github/workflows/release.yaml
vendored
@@ -24,6 +24,11 @@ jobs:
|
|||||||
args: release --clean
|
args: release --clean
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
MACOS_SIGN_P12: ${{ secrets.MACOS_CERTIFICATE }}
|
||||||
|
MACOS_SIGN_PASSWORD: ${{ secrets.MACOS_CERTIFICATE_PWD }}
|
||||||
|
MACOS_NOTARY_KEY: ${{ secrets.MACOS_NOTARY_KEY }}
|
||||||
|
MACOS_NOTARY_KEY_ID: ${{ secrets.MACOS_NOTARY_KEY_ID }}
|
||||||
|
MACOS_NOTARY_ISSUER_ID: ${{ secrets.MACOS_NOTARY_ISSUER_ID }}
|
||||||
launcher-build-darwin:
|
launcher-build-darwin:
|
||||||
runs-on: macos-latest
|
runs-on: macos-latest
|
||||||
steps:
|
steps:
|
||||||
@@ -35,9 +40,19 @@ jobs:
|
|||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@v5
|
||||||
with:
|
with:
|
||||||
go-version: 1.23
|
go-version: 1.23
|
||||||
- name: Build launcher for macOS ARM64
|
- name: Import signing certificate
|
||||||
run: |
|
env:
|
||||||
make build-launcher-darwin
|
MACOS_CERTIFICATE: ${{ secrets.MACOS_CERTIFICATE }}
|
||||||
|
MACOS_CERTIFICATE_PWD: ${{ secrets.MACOS_CERTIFICATE_PWD }}
|
||||||
|
MACOS_CI_KEYCHAIN_PWD: ${{ secrets.MACOS_CI_KEYCHAIN_PWD }}
|
||||||
|
run: bash contrib/macos/sign-and-notarize.sh import-cert
|
||||||
|
- name: Build, sign and notarize the DMG
|
||||||
|
env:
|
||||||
|
MACOS_SIGN_IDENTITY: ${{ secrets.MACOS_SIGN_IDENTITY }}
|
||||||
|
MACOS_NOTARY_KEY: ${{ secrets.MACOS_NOTARY_KEY }}
|
||||||
|
MACOS_NOTARY_KEY_ID: ${{ secrets.MACOS_NOTARY_KEY_ID }}
|
||||||
|
MACOS_NOTARY_ISSUER_ID: ${{ secrets.MACOS_NOTARY_ISSUER_ID }}
|
||||||
|
run: make release-launcher-darwin
|
||||||
- name: Upload DMG to Release
|
- name: Upload DMG to Release
|
||||||
uses: softprops/action-gh-release@v3
|
uses: softprops/action-gh-release@v3
|
||||||
with:
|
with:
|
||||||
|
|||||||
16
.github/workflows/test.yml
vendored
16
.github/workflows/test.yml
vendored
@@ -121,3 +121,19 @@ jobs:
|
|||||||
detached: true
|
detached: true
|
||||||
connect-timeout-seconds: 180
|
connect-timeout-seconds: 180
|
||||||
limit-access-to-actor: true
|
limit-access-to-actor: true
|
||||||
|
|
||||||
|
# Fast standalone unit tests for the backends' pure C++ helpers - currently the
|
||||||
|
# llama-cpp message reconstruction (backend/cpp/llama-cpp/message_content.h),
|
||||||
|
# which guards the OpenAI chat content normalization (mudler/LocalAI#10524,
|
||||||
|
# #7324, #7528). The runner discovers every *_test.cpp under backend/cpp/, so
|
||||||
|
# new pure-C++ unit tests are picked up with no CI changes. These need only the
|
||||||
|
# C++ stdlib + nlohmann/json, so they run on every PR without the full
|
||||||
|
# llama.cpp + gRPC backend build. (The same suite is also wired as an opt-in
|
||||||
|
# CMake/ctest target, -DLLAMA_GRPC_BUILD_TESTS=ON, for in-backend-build runs.)
|
||||||
|
tests-backend-cpp:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
uses: actions/checkout@v7
|
||||||
|
- name: Run backend C++ unit tests
|
||||||
|
run: make test-backend-cpp
|
||||||
|
|||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -94,3 +94,6 @@ core/http/react-ui/test-results/
|
|||||||
|
|
||||||
# SDD / brainstorm scratch (agent-driven development)
|
# SDD / brainstorm scratch (agent-driven development)
|
||||||
.superpowers/
|
.superpowers/
|
||||||
|
|
||||||
|
# Local Apple signing material (never commit)
|
||||||
|
.certs/
|
||||||
|
|||||||
@@ -9,7 +9,8 @@ source:
|
|||||||
enabled: true
|
enabled: true
|
||||||
name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
|
name_template: '{{ .ProjectName }}-{{ .Tag }}-source'
|
||||||
builds:
|
builds:
|
||||||
- main: ./cmd/local-ai
|
- id: local-ai
|
||||||
|
main: ./cmd/local-ai
|
||||||
env:
|
env:
|
||||||
- CGO_ENABLED=0
|
- CGO_ENABLED=0
|
||||||
ldflags:
|
ldflags:
|
||||||
@@ -35,3 +36,19 @@ snapshot:
|
|||||||
version_template: "{{ .Tag }}-next"
|
version_template: "{{ .Tag }}-next"
|
||||||
changelog:
|
changelog:
|
||||||
use: github-native
|
use: github-native
|
||||||
|
# Sign + notarize the macOS server binary via the quill backend (runs on Linux,
|
||||||
|
# no macOS runner needed). Disabled automatically when MACOS_SIGN_P12 is unset
|
||||||
|
# (forks / PRs), so those builds stay unsigned and green.
|
||||||
|
notarize:
|
||||||
|
macos:
|
||||||
|
- enabled: '{{ isEnvSet "MACOS_SIGN_P12" }}'
|
||||||
|
ids:
|
||||||
|
- local-ai
|
||||||
|
sign:
|
||||||
|
certificate: "{{.Env.MACOS_SIGN_P12}}"
|
||||||
|
password: "{{.Env.MACOS_SIGN_PASSWORD}}"
|
||||||
|
notarize:
|
||||||
|
issuer_id: "{{.Env.MACOS_NOTARY_ISSUER_ID}}"
|
||||||
|
key_id: "{{.Env.MACOS_NOTARY_KEY_ID}}"
|
||||||
|
key: "{{.Env.MACOS_NOTARY_KEY}}"
|
||||||
|
wait: true
|
||||||
|
|||||||
44
Makefile
44
Makefile
@@ -103,7 +103,7 @@ COVERAGE_E2E_LABELS?=!real-models
|
|||||||
COVERAGE_EXCLUDE_RE?=grpc/proto/.*[.]pb[.]go
|
COVERAGE_EXCLUDE_RE?=grpc/proto/.*[.]pb[.]go
|
||||||
|
|
||||||
|
|
||||||
.PHONY: all test test-coverage test-coverage-baseline test-coverage-check test-ui test-ui-coverage-baseline test-ui-coverage-check install-hooks build vendor lint lint-all
|
.PHONY: all test test-coverage test-coverage-baseline test-coverage-check test-backend-cpp test-ui test-ui-coverage-baseline test-ui-coverage-check install-hooks build vendor lint lint-all
|
||||||
|
|
||||||
all: help
|
all: help
|
||||||
|
|
||||||
@@ -201,6 +201,13 @@ test: prepare-test
|
|||||||
OPUS_SHIM_LIBRARY=$(abspath ./pkg/opus/shim/libopusshim.so) \
|
OPUS_SHIM_LIBRARY=$(abspath ./pkg/opus/shim/libopusshim.so) \
|
||||||
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
|
||||||
|
|
||||||
|
## Compiles and runs the standalone C++ unit tests for the backends (pure
|
||||||
|
## helpers that depend only on the stdlib + nlohmann/json, no full backend
|
||||||
|
## build). Discovers every *_test.cpp under backend/cpp/ - see
|
||||||
|
## backend/cpp/run-unit-tests.sh. Set NLOHMANN_INCLUDE to skip the header fetch.
|
||||||
|
test-backend-cpp:
|
||||||
|
bash backend/cpp/run-unit-tests.sh
|
||||||
|
|
||||||
## Runs the core suite ($(TEST_PATHS)) with statement-coverage instrumentation
|
## Runs the core suite ($(TEST_PATHS)) with statement-coverage instrumentation
|
||||||
## and writes a merged profile to $(COVERAGE_PROFILE). Deliberately omits
|
## and writes a merged profile to $(COVERAGE_PROFILE). Deliberately omits
|
||||||
## --fail-fast so a single failure doesn't truncate the coverage number, and
|
## --fail-fast so a single failure doesn't truncate the coverage number, and
|
||||||
@@ -1453,13 +1460,32 @@ docs: docs/static/gallery.html
|
|||||||
########################################################
|
########################################################
|
||||||
|
|
||||||
## fyne cross-platform build
|
## fyne cross-platform build
|
||||||
build-launcher-darwin: build-launcher
|
# Build LocalAI.app from the launcher via fyne (metadata read from cmd/launcher/FyneApp.toml).
|
||||||
go run github.com/tiagomelo/macos-dmg-creator/cmd/createdmg@latest \
|
# Signing happens via contrib/macos/sign-and-notarize.sh, which is a no-op when the signing
|
||||||
--appName "LocalAI" \
|
# secrets are unset, so unsigned local/fork builds keep working.
|
||||||
--appBinaryPath "$(LAUNCHER_BINARY_NAME)" \
|
build-launcher-darwin:
|
||||||
--bundleIdentifier "com.localai.launcher" \
|
rm -rf dist/LocalAI.app cmd/launcher/LocalAI.app
|
||||||
--iconPath "core/http/static/logo.png" \
|
mkdir -p dist
|
||||||
--outputDir "dist/"
|
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os darwin -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)
|
||||||
|
mv cmd/launcher/LocalAI.app dist/LocalAI.app
|
||||||
|
bash contrib/macos/sign-and-notarize.sh sign dist/LocalAI.app
|
||||||
|
|
||||||
|
# Wrap the (signed) app into a drag-to-Applications DMG via hdiutil, then sign the DMG.
|
||||||
|
dmg-launcher-darwin: build-launcher-darwin
|
||||||
|
rm -rf dist/dmg dist/LocalAI.dmg
|
||||||
|
mkdir -p dist/dmg
|
||||||
|
cp -R dist/LocalAI.app dist/dmg/LocalAI.app
|
||||||
|
ln -s /Applications dist/dmg/Applications
|
||||||
|
hdiutil create -volname "LocalAI" -srcfolder dist/dmg -ov -format UDZO dist/LocalAI.dmg
|
||||||
|
bash contrib/macos/sign-and-notarize.sh sign dist/LocalAI.dmg
|
||||||
|
|
||||||
|
# Submit the DMG to Apple notarization and staple the ticket (no-op without notary secrets).
|
||||||
|
notarize-launcher-darwin: dmg-launcher-darwin
|
||||||
|
bash contrib/macos/sign-and-notarize.sh notarize dist/LocalAI.dmg
|
||||||
|
|
||||||
|
# Single entrypoint for CI: build -> sign app -> dmg -> sign dmg -> notarize -> staple.
|
||||||
|
release-launcher-darwin: notarize-launcher-darwin
|
||||||
|
@echo "dist/LocalAI.dmg is ready"
|
||||||
|
|
||||||
build-launcher-linux:
|
build-launcher-linux:
|
||||||
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv launcher.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz
|
cd cmd/launcher && go run fyne.io/tools/cmd/fyne@latest package -os linux -icon ../../core/http/static/logo.png --executable $(LAUNCHER_BINARY_NAME)-linux && mv LocalAI.tar.xz ../../$(LAUNCHER_BINARY_NAME)-linux.tar.xz
|
||||||
|
|||||||
@@ -87,3 +87,18 @@ target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|||||||
if(TARGET BUILD_INFO)
|
if(TARGET BUILD_INFO)
|
||||||
add_dependencies(${TARGET} BUILD_INFO)
|
add_dependencies(${TARGET} BUILD_INFO)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Unit test for the message-content normalization helper (message_content.h).
|
||||||
|
# Off by default so the normal backend build is untouched; enable with
|
||||||
|
# -DLLAMA_GRPC_BUILD_TESTS=ON and run via ctest. It reuses llama.cpp's vendored
|
||||||
|
# <nlohmann/json.hpp> (propagated by the common helpers library) so it has no
|
||||||
|
# extra dependency beyond what the backend already builds against.
|
||||||
|
option(LLAMA_GRPC_BUILD_TESTS "Build grpc-server unit tests" OFF)
|
||||||
|
if(LLAMA_GRPC_BUILD_TESTS)
|
||||||
|
enable_testing()
|
||||||
|
add_executable(message_content_test message_content_test.cpp message_content.h)
|
||||||
|
target_include_directories(message_content_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_link_libraries(message_content_test PRIVATE ${_LLAMA_COMMON_TARGET})
|
||||||
|
target_compile_features(message_content_test PRIVATE cxx_std_17)
|
||||||
|
add_test(NAME message_content_test COMMAND message_content_test)
|
||||||
|
endif()
|
||||||
|
|||||||
@@ -39,6 +39,7 @@
|
|||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "arg.h"
|
#include "arg.h"
|
||||||
#include "chat-auto-parser.h"
|
#include "chat-auto-parser.h"
|
||||||
|
#include "message_content.h"
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
||||||
#include <grpcpp/grpcpp.h>
|
#include <grpcpp/grpcpp.h>
|
||||||
@@ -1616,242 +1617,20 @@ public:
|
|||||||
|
|
||||||
for (int i = 0; i < request->messages_size(); i++) {
|
for (int i = 0; i < request->messages_size(); i++) {
|
||||||
const auto& msg = request->messages(i);
|
const auto& msg = request->messages(i);
|
||||||
json msg_json;
|
llama_grpc::ReconstructedMessageInput rin;
|
||||||
msg_json["role"] = msg.role();
|
rin.role = msg.role();
|
||||||
|
rin.content = msg.content();
|
||||||
bool is_last_user_msg = (i == last_user_msg_idx);
|
rin.name = msg.name();
|
||||||
bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0 || request->videos_size() > 0);
|
rin.tool_call_id = msg.tool_call_id();
|
||||||
|
rin.reasoning_content = msg.reasoning_content();
|
||||||
// Handle content - can be string, null, or array
|
rin.tool_calls = msg.tool_calls();
|
||||||
// For multimodal content, we'll embed images/audio from separate fields
|
rin.is_last_user_msg = (i == last_user_msg_idx);
|
||||||
if (!msg.content().empty()) {
|
if (rin.is_last_user_msg) {
|
||||||
// Try to parse content as JSON to see if it's already an array
|
for (int j = 0; j < request->images_size(); j++) rin.images.push_back(request->images(j));
|
||||||
json content_val;
|
for (int j = 0; j < request->audios_size(); j++) rin.audios.push_back(request->audios(j));
|
||||||
try {
|
for (int j = 0; j < request->videos_size(); j++) rin.videos.push_back(request->videos(j));
|
||||||
content_val = json::parse(msg.content());
|
|
||||||
// Handle null values - convert to empty string to avoid template errors
|
|
||||||
if (content_val.is_null()) {
|
|
||||||
content_val = "";
|
|
||||||
}
|
|
||||||
} catch (const json::parse_error&) {
|
|
||||||
// Not JSON, treat as plain string
|
|
||||||
content_val = msg.content();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If content is an object (e.g., from tool call failures), convert to string
|
|
||||||
if (content_val.is_object()) {
|
|
||||||
content_val = content_val.dump();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If content is a string and this is the last user message with images/audio, combine them
|
|
||||||
if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
|
|
||||||
json content_array = json::array();
|
|
||||||
// Add text first
|
|
||||||
content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
|
|
||||||
// Add images
|
|
||||||
if (request->images_size() > 0) {
|
|
||||||
for (int j = 0; j < request->images_size(); j++) {
|
|
||||||
json image_chunk;
|
|
||||||
image_chunk["type"] = "image_url";
|
|
||||||
json image_url;
|
|
||||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
|
||||||
image_chunk["image_url"] = image_url;
|
|
||||||
content_array.push_back(image_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Add audios
|
|
||||||
if (request->audios_size() > 0) {
|
|
||||||
for (int j = 0; j < request->audios_size(); j++) {
|
|
||||||
json audio_chunk;
|
|
||||||
audio_chunk["type"] = "input_audio";
|
|
||||||
json input_audio;
|
|
||||||
input_audio["data"] = request->audios(j);
|
|
||||||
input_audio["format"] = "wav"; // default, could be made configurable
|
|
||||||
audio_chunk["input_audio"] = input_audio;
|
|
||||||
content_array.push_back(audio_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (request->videos_size() > 0) {
|
|
||||||
for (int j = 0; j < request->videos_size(); j++) {
|
|
||||||
json video_chunk;
|
|
||||||
video_chunk["type"] = "input_video";
|
|
||||||
json input_video;
|
|
||||||
input_video["data"] = request->videos(j);
|
|
||||||
video_chunk["input_video"] = input_video;
|
|
||||||
content_array.push_back(video_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
msg_json["content"] = content_array;
|
|
||||||
} else {
|
|
||||||
// Use content as-is (already array or not last user message)
|
|
||||||
// Ensure null values are converted to empty string
|
|
||||||
if (content_val.is_null()) {
|
|
||||||
msg_json["content"] = "";
|
|
||||||
} else {
|
|
||||||
msg_json["content"] = content_val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (is_last_user_msg && has_images_or_audio) {
|
|
||||||
// If no content but this is the last user message with images/audio, create content array
|
|
||||||
json content_array = json::array();
|
|
||||||
if (request->images_size() > 0) {
|
|
||||||
for (int j = 0; j < request->images_size(); j++) {
|
|
||||||
json image_chunk;
|
|
||||||
image_chunk["type"] = "image_url";
|
|
||||||
json image_url;
|
|
||||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
|
||||||
image_chunk["image_url"] = image_url;
|
|
||||||
content_array.push_back(image_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (request->audios_size() > 0) {
|
|
||||||
for (int j = 0; j < request->audios_size(); j++) {
|
|
||||||
json audio_chunk;
|
|
||||||
audio_chunk["type"] = "input_audio";
|
|
||||||
json input_audio;
|
|
||||||
input_audio["data"] = request->audios(j);
|
|
||||||
input_audio["format"] = "wav"; // default, could be made configurable
|
|
||||||
audio_chunk["input_audio"] = input_audio;
|
|
||||||
content_array.push_back(audio_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (request->videos_size() > 0) {
|
|
||||||
for (int j = 0; j < request->videos_size(); j++) {
|
|
||||||
json video_chunk;
|
|
||||||
video_chunk["type"] = "input_video";
|
|
||||||
json input_video;
|
|
||||||
input_video["data"] = request->videos(j);
|
|
||||||
video_chunk["input_video"] = input_video;
|
|
||||||
content_array.push_back(video_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
msg_json["content"] = content_array;
|
|
||||||
} else if (msg.role() == "tool") {
|
|
||||||
// Tool role messages must have content field set, even if empty
|
|
||||||
// Jinja templates expect content to be a string, not null or object
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
|
|
||||||
if (msg.content().empty()) {
|
|
||||||
msg_json["content"] = "";
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): empty content, set to empty string\n", i);
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): content exists: %s\n",
|
|
||||||
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
|
|
||||||
// Content exists, parse and ensure it's a string
|
|
||||||
json content_val;
|
|
||||||
try {
|
|
||||||
content_val = json::parse(msg.content());
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): parsed JSON, type=%s\n",
|
|
||||||
i, content_val.is_null() ? "null" :
|
|
||||||
content_val.is_object() ? "object" :
|
|
||||||
content_val.is_string() ? "string" :
|
|
||||||
content_val.is_array() ? "array" : "other");
|
|
||||||
// Handle null values - Jinja templates expect content to be a string, not null
|
|
||||||
if (content_val.is_null()) {
|
|
||||||
msg_json["content"] = "";
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): null content, converted to empty string\n", i);
|
|
||||||
} else if (content_val.is_object()) {
|
|
||||||
// If content is an object (e.g., from tool call failures/errors), convert to string
|
|
||||||
msg_json["content"] = content_val.dump();
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): object content, converted to string: %s\n",
|
|
||||||
i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
|
|
||||||
} else if (content_val.is_string()) {
|
|
||||||
msg_json["content"] = content_val.get<std::string>();
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): string content, using as-is\n", i);
|
|
||||||
} else {
|
|
||||||
// For arrays or other types, convert to string
|
|
||||||
msg_json["content"] = content_val.dump();
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): %s content, converted to string\n",
|
|
||||||
i, content_val.is_array() ? "array" : "other type");
|
|
||||||
}
|
|
||||||
} catch (const json::parse_error&) {
|
|
||||||
// Not JSON, treat as plain string
|
|
||||||
msg_json["content"] = msg.content();
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (tool): not JSON, using as string\n", i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Ensure all messages have content set (fallback for any unhandled cases)
|
|
||||||
// Jinja templates expect content to be present, default to empty string if not set
|
|
||||||
if (!msg_json.contains("content")) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d (role=%s): no content field, adding empty string\n",
|
|
||||||
i, msg.role().c_str());
|
|
||||||
msg_json["content"] = "";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
messages_json.push_back(llama_grpc::build_reconstructed_message(rin));
|
||||||
// Add optional fields for OpenAI-compatible message format
|
|
||||||
if (!msg.name().empty()) {
|
|
||||||
msg_json["name"] = msg.name();
|
|
||||||
}
|
|
||||||
if (!msg.tool_call_id().empty()) {
|
|
||||||
msg_json["tool_call_id"] = msg.tool_call_id();
|
|
||||||
}
|
|
||||||
if (!msg.reasoning_content().empty()) {
|
|
||||||
msg_json["reasoning_content"] = msg.reasoning_content();
|
|
||||||
}
|
|
||||||
if (!msg.tool_calls().empty()) {
|
|
||||||
// Parse tool_calls JSON string and add to message
|
|
||||||
try {
|
|
||||||
json tool_calls = json::parse(msg.tool_calls());
|
|
||||||
msg_json["tool_calls"] = tool_calls;
|
|
||||||
SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
|
|
||||||
// IMPORTANT: If message has tool_calls but content is empty or not set,
|
|
||||||
// set content to space " " instead of empty string "", because llama.cpp's
|
|
||||||
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
|
|
||||||
// which causes template errors when accessing message.content[:tool_start_length]
|
|
||||||
if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d has tool_calls but empty content, setting to space\n", i);
|
|
||||||
msg_json["content"] = " ";
|
|
||||||
}
|
|
||||||
// Log each tool call with name and arguments
|
|
||||||
if (tool_calls.is_array()) {
|
|
||||||
for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
|
|
||||||
const auto& tc = tool_calls[tc_idx];
|
|
||||||
std::string tool_name = "unknown";
|
|
||||||
std::string tool_args = "{}";
|
|
||||||
if (tc.contains("function")) {
|
|
||||||
const auto& func = tc["function"];
|
|
||||||
if (func.contains("name")) {
|
|
||||||
tool_name = func["name"].get<std::string>();
|
|
||||||
}
|
|
||||||
if (func.contains("arguments")) {
|
|
||||||
tool_args = func["arguments"].is_string() ?
|
|
||||||
func["arguments"].get<std::string>() :
|
|
||||||
func["arguments"].dump();
|
|
||||||
}
|
|
||||||
} else if (tc.contains("name")) {
|
|
||||||
tool_name = tc["name"].get<std::string>();
|
|
||||||
if (tc.contains("arguments")) {
|
|
||||||
tool_args = tc["arguments"].is_string() ?
|
|
||||||
tc["arguments"].get<std::string>() :
|
|
||||||
tc["arguments"].dump();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SRV_INF("[TOOL CALLS DEBUG] PredictStream: Message %d, tool_call %zu: name=%s, arguments=%s\n",
|
|
||||||
i, tc_idx, tool_name.c_str(), tool_args.c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (const json::parse_error& e) {
|
|
||||||
SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Debug: Log final content state before adding to array
|
|
||||||
if (msg_json.contains("content")) {
|
|
||||||
if (msg_json["content"].is_null()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: content type=%s, has_value=%d\n",
|
|
||||||
i, msg_json["content"].is_string() ? "string" :
|
|
||||||
msg_json["content"].is_array() ? "array" :
|
|
||||||
msg_json["content"].is_object() ? "object" : "other",
|
|
||||||
msg_json["content"].is_null() ? 0 : 1);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
|
|
||||||
}
|
|
||||||
|
|
||||||
messages_json.push_back(msg_json);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final safety check: Ensure no message has null content (Jinja templates require strings)
|
// Final safety check: Ensure no message has null content (Jinja templates require strings)
|
||||||
@@ -2072,36 +1851,7 @@ public:
|
|||||||
if (body_json.contains("messages") && body_json["messages"].is_array()) {
|
if (body_json.contains("messages") && body_json["messages"].is_array()) {
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
|
SRV_INF("[CONTENT DEBUG] PredictStream: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
|
||||||
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
|
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
|
||||||
auto& msg = body_json["messages"][idx];
|
llama_grpc::normalize_template_message(body_json["messages"][idx]);
|
||||||
std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
|
|
||||||
if (msg.contains("content")) {
|
|
||||||
if (msg["content"].is_null()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
|
|
||||||
msg["content"] = ""; // Fix null content
|
|
||||||
} else if (role_str == "tool" && msg["content"].is_array()) {
|
|
||||||
// Tool messages must have string content, not array
|
|
||||||
// oaicompat_chat_params_parse expects tool messages to have string content
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=tool) has array content, converting to string\n", idx);
|
|
||||||
msg["content"] = msg["content"].dump();
|
|
||||||
} else if (!msg["content"].is_string() && !msg["content"].is_array()) {
|
|
||||||
// If content is object or other non-string type, convert to string for templates
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
|
|
||||||
if (msg["content"].is_object()) {
|
|
||||||
msg["content"] = msg["content"].dump();
|
|
||||||
} else {
|
|
||||||
msg["content"] = "";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n",
|
|
||||||
idx, role_str.c_str(),
|
|
||||||
msg["content"].is_string() ? "string" :
|
|
||||||
msg["content"].is_array() ? "array" :
|
|
||||||
msg["content"].is_object() ? "object" : "other");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] PredictStream: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
|
|
||||||
msg["content"] = ""; // Add missing content
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2433,264 +2183,20 @@ public:
|
|||||||
SRV_INF("[CONTENT DEBUG] Predict: Processing %d messages\n", request->messages_size());
|
SRV_INF("[CONTENT DEBUG] Predict: Processing %d messages\n", request->messages_size());
|
||||||
for (int i = 0; i < request->messages_size(); i++) {
|
for (int i = 0; i < request->messages_size(); i++) {
|
||||||
const auto& msg = request->messages(i);
|
const auto& msg = request->messages(i);
|
||||||
json msg_json;
|
llama_grpc::ReconstructedMessageInput rin;
|
||||||
msg_json["role"] = msg.role();
|
rin.role = msg.role();
|
||||||
|
rin.content = msg.content();
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d: role=%s, content_empty=%d, content_length=%zu\n",
|
rin.name = msg.name();
|
||||||
i, msg.role().c_str(), msg.content().empty() ? 1 : 0, msg.content().size());
|
rin.tool_call_id = msg.tool_call_id();
|
||||||
if (!msg.content().empty()) {
|
rin.reasoning_content = msg.reasoning_content();
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content (first 200 chars): %s\n",
|
rin.tool_calls = msg.tool_calls();
|
||||||
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
|
rin.is_last_user_msg = (i == last_user_msg_idx);
|
||||||
|
if (rin.is_last_user_msg) {
|
||||||
|
for (int j = 0; j < request->images_size(); j++) rin.images.push_back(request->images(j));
|
||||||
|
for (int j = 0; j < request->audios_size(); j++) rin.audios.push_back(request->audios(j));
|
||||||
|
for (int j = 0; j < request->videos_size(); j++) rin.videos.push_back(request->videos(j));
|
||||||
}
|
}
|
||||||
|
messages_json.push_back(llama_grpc::build_reconstructed_message(rin));
|
||||||
bool is_last_user_msg = (i == last_user_msg_idx);
|
|
||||||
bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0 || request->videos_size() > 0);
|
|
||||||
|
|
||||||
// Handle content - can be string, null, or array
|
|
||||||
// For multimodal content, we'll embed images/audio from separate fields
|
|
||||||
if (!msg.content().empty()) {
|
|
||||||
// Try to parse content as JSON to see if it's already an array
|
|
||||||
json content_val;
|
|
||||||
try {
|
|
||||||
content_val = json::parse(msg.content());
|
|
||||||
// Handle null values - convert to empty string to avoid template errors
|
|
||||||
if (content_val.is_null()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d parsed JSON is null, converting to empty string\n", i);
|
|
||||||
content_val = "";
|
|
||||||
}
|
|
||||||
} catch (const json::parse_error&) {
|
|
||||||
// Not JSON, treat as plain string
|
|
||||||
content_val = msg.content();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If content is an object (e.g., from tool call failures), convert to string
|
|
||||||
if (content_val.is_object()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content is object, converting to string\n", i);
|
|
||||||
content_val = content_val.dump();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If content is a string and this is the last user message with images/audio, combine them
|
|
||||||
if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
|
|
||||||
json content_array = json::array();
|
|
||||||
// Add text first
|
|
||||||
content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
|
|
||||||
// Add images
|
|
||||||
if (request->images_size() > 0) {
|
|
||||||
for (int j = 0; j < request->images_size(); j++) {
|
|
||||||
json image_chunk;
|
|
||||||
image_chunk["type"] = "image_url";
|
|
||||||
json image_url;
|
|
||||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
|
||||||
image_chunk["image_url"] = image_url;
|
|
||||||
content_array.push_back(image_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Add audios
|
|
||||||
if (request->audios_size() > 0) {
|
|
||||||
for (int j = 0; j < request->audios_size(); j++) {
|
|
||||||
json audio_chunk;
|
|
||||||
audio_chunk["type"] = "input_audio";
|
|
||||||
json input_audio;
|
|
||||||
input_audio["data"] = request->audios(j);
|
|
||||||
input_audio["format"] = "wav"; // default, could be made configurable
|
|
||||||
audio_chunk["input_audio"] = input_audio;
|
|
||||||
content_array.push_back(audio_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (request->videos_size() > 0) {
|
|
||||||
for (int j = 0; j < request->videos_size(); j++) {
|
|
||||||
json video_chunk;
|
|
||||||
video_chunk["type"] = "input_video";
|
|
||||||
json input_video;
|
|
||||||
input_video["data"] = request->videos(j);
|
|
||||||
video_chunk["input_video"] = input_video;
|
|
||||||
content_array.push_back(video_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
msg_json["content"] = content_array;
|
|
||||||
} else {
|
|
||||||
// Use content as-is (already array or not last user message)
|
|
||||||
// Ensure null values are converted to empty string
|
|
||||||
if (content_val.is_null()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content_val was null, setting to empty string\n", i);
|
|
||||||
msg_json["content"] = "";
|
|
||||||
} else {
|
|
||||||
msg_json["content"] = content_val;
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d content set, type=%s\n",
|
|
||||||
i, content_val.is_string() ? "string" :
|
|
||||||
content_val.is_array() ? "array" :
|
|
||||||
content_val.is_object() ? "object" : "other");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (is_last_user_msg && has_images_or_audio) {
|
|
||||||
// If no content but this is the last user message with images/audio, create content array
|
|
||||||
json content_array = json::array();
|
|
||||||
if (request->images_size() > 0) {
|
|
||||||
for (int j = 0; j < request->images_size(); j++) {
|
|
||||||
json image_chunk;
|
|
||||||
image_chunk["type"] = "image_url";
|
|
||||||
json image_url;
|
|
||||||
image_url["url"] = "data:image/jpeg;base64," + request->images(j);
|
|
||||||
image_chunk["image_url"] = image_url;
|
|
||||||
content_array.push_back(image_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (request->audios_size() > 0) {
|
|
||||||
for (int j = 0; j < request->audios_size(); j++) {
|
|
||||||
json audio_chunk;
|
|
||||||
audio_chunk["type"] = "input_audio";
|
|
||||||
json input_audio;
|
|
||||||
input_audio["data"] = request->audios(j);
|
|
||||||
input_audio["format"] = "wav"; // default, could be made configurable
|
|
||||||
audio_chunk["input_audio"] = input_audio;
|
|
||||||
content_array.push_back(audio_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (request->videos_size() > 0) {
|
|
||||||
for (int j = 0; j < request->videos_size(); j++) {
|
|
||||||
json video_chunk;
|
|
||||||
video_chunk["type"] = "input_video";
|
|
||||||
json input_video;
|
|
||||||
input_video["data"] = request->videos(j);
|
|
||||||
video_chunk["input_video"] = input_video;
|
|
||||||
content_array.push_back(video_chunk);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
msg_json["content"] = content_array;
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d created content array with media\n", i);
|
|
||||||
} else if (!msg.tool_calls().empty()) {
|
|
||||||
// Tool call messages may have null content, but templates expect string
|
|
||||||
// IMPORTANT: Set to space " " instead of empty string "", because llama.cpp's
|
|
||||||
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
|
|
||||||
// which causes template errors when accessing message.content[:tool_start_length]
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls, setting content to space (not empty string)\n", i);
|
|
||||||
msg_json["content"] = " ";
|
|
||||||
} else if (msg.role() == "tool") {
|
|
||||||
// Tool role messages must have content field set, even if empty
|
|
||||||
// Jinja templates expect content to be a string, not null or object
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d is tool role, content_empty=%d\n", i, msg.content().empty() ? 1 : 0);
|
|
||||||
if (msg.content().empty()) {
|
|
||||||
msg_json["content"] = "";
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): empty content, set to empty string\n", i);
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): content exists: %s\n",
|
|
||||||
i, msg.content().substr(0, std::min<size_t>(200, msg.content().size())).c_str());
|
|
||||||
// Content exists, parse and ensure it's a string
|
|
||||||
json content_val;
|
|
||||||
try {
|
|
||||||
content_val = json::parse(msg.content());
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): parsed JSON, type=%s\n",
|
|
||||||
i, content_val.is_null() ? "null" :
|
|
||||||
content_val.is_object() ? "object" :
|
|
||||||
content_val.is_string() ? "string" :
|
|
||||||
content_val.is_array() ? "array" : "other");
|
|
||||||
// Handle null values - Jinja templates expect content to be a string, not null
|
|
||||||
if (content_val.is_null()) {
|
|
||||||
msg_json["content"] = "";
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): null content, converted to empty string\n", i);
|
|
||||||
} else if (content_val.is_object()) {
|
|
||||||
// If content is an object (e.g., from tool call failures/errors), convert to string
|
|
||||||
msg_json["content"] = content_val.dump();
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): object content, converted to string: %s\n",
|
|
||||||
i, content_val.dump().substr(0, std::min<size_t>(200, content_val.dump().size())).c_str());
|
|
||||||
} else if (content_val.is_string()) {
|
|
||||||
msg_json["content"] = content_val.get<std::string>();
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): string content, using as-is\n", i);
|
|
||||||
} else {
|
|
||||||
// For arrays or other types, convert to string
|
|
||||||
msg_json["content"] = content_val.dump();
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): %s content, converted to string\n",
|
|
||||||
i, content_val.is_array() ? "array" : "other type");
|
|
||||||
}
|
|
||||||
} catch (const json::parse_error&) {
|
|
||||||
// Not JSON, treat as plain string
|
|
||||||
msg_json["content"] = msg.content();
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (tool): not JSON, using as string\n", i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Ensure all messages have content set (fallback for any unhandled cases)
|
|
||||||
// Jinja templates expect content to be present, default to empty string if not set
|
|
||||||
if (!msg_json.contains("content")) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d (role=%s): no content field, adding empty string\n",
|
|
||||||
i, msg.role().c_str());
|
|
||||||
msg_json["content"] = "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add optional fields for OpenAI-compatible message format
|
|
||||||
if (!msg.name().empty()) {
|
|
||||||
msg_json["name"] = msg.name();
|
|
||||||
}
|
|
||||||
if (!msg.tool_call_id().empty()) {
|
|
||||||
msg_json["tool_call_id"] = msg.tool_call_id();
|
|
||||||
}
|
|
||||||
if (!msg.reasoning_content().empty()) {
|
|
||||||
msg_json["reasoning_content"] = msg.reasoning_content();
|
|
||||||
}
|
|
||||||
if (!msg.tool_calls().empty()) {
|
|
||||||
// Parse tool_calls JSON string and add to message
|
|
||||||
try {
|
|
||||||
json tool_calls = json::parse(msg.tool_calls());
|
|
||||||
msg_json["tool_calls"] = tool_calls;
|
|
||||||
SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d has tool_calls: %s\n", i, tool_calls.dump().c_str());
|
|
||||||
// IMPORTANT: If message has tool_calls but content is empty or not set,
|
|
||||||
// set content to space " " instead of empty string "", because llama.cpp's
|
|
||||||
// common_chat_msgs_to_json_oaicompat converts empty strings to null (line 312),
|
|
||||||
// which causes template errors when accessing message.content[:tool_start_length]
|
|
||||||
if (!msg_json.contains("content") || (msg_json.contains("content") && msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d has tool_calls but empty content, setting to space\n", i);
|
|
||||||
msg_json["content"] = " ";
|
|
||||||
}
|
|
||||||
// Log each tool call with name and arguments
|
|
||||||
if (tool_calls.is_array()) {
|
|
||||||
for (size_t tc_idx = 0; tc_idx < tool_calls.size(); tc_idx++) {
|
|
||||||
const auto& tc = tool_calls[tc_idx];
|
|
||||||
std::string tool_name = "unknown";
|
|
||||||
std::string tool_args = "{}";
|
|
||||||
if (tc.contains("function")) {
|
|
||||||
const auto& func = tc["function"];
|
|
||||||
if (func.contains("name")) {
|
|
||||||
tool_name = func["name"].get<std::string>();
|
|
||||||
}
|
|
||||||
if (func.contains("arguments")) {
|
|
||||||
tool_args = func["arguments"].is_string() ?
|
|
||||||
func["arguments"].get<std::string>() :
|
|
||||||
func["arguments"].dump();
|
|
||||||
}
|
|
||||||
} else if (tc.contains("name")) {
|
|
||||||
tool_name = tc["name"].get<std::string>();
|
|
||||||
if (tc.contains("arguments")) {
|
|
||||||
tool_args = tc["arguments"].is_string() ?
|
|
||||||
tc["arguments"].get<std::string>() :
|
|
||||||
tc["arguments"].dump();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SRV_INF("[TOOL CALLS DEBUG] Predict: Message %d, tool_call %zu: name=%s, arguments=%s\n",
|
|
||||||
i, tc_idx, tool_name.c_str(), tool_args.c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (const json::parse_error& e) {
|
|
||||||
SRV_WRN("Failed to parse tool_calls JSON: %s\n", e.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Debug: Log final content state before adding to array
|
|
||||||
if (msg_json.contains("content")) {
|
|
||||||
if (msg_json["content"].is_null()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content is NULL - THIS WILL CAUSE ERROR!\n", i);
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: content type=%s, has_value=%d\n",
|
|
||||||
i, msg_json["content"].is_string() ? "string" :
|
|
||||||
msg_json["content"].is_array() ? "array" :
|
|
||||||
msg_json["content"].is_object() ? "object" : "other",
|
|
||||||
msg_json["content"].is_null() ? 0 : 1);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Message %d FINAL STATE: NO CONTENT FIELD - THIS WILL CAUSE ERROR!\n", i);
|
|
||||||
}
|
|
||||||
|
|
||||||
messages_json.push_back(msg_json);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final safety check: Ensure no message has null content (Jinja templates require strings)
|
// Final safety check: Ensure no message has null content (Jinja templates require strings)
|
||||||
@@ -2911,36 +2417,7 @@ public:
|
|||||||
if (body_json.contains("messages") && body_json["messages"].is_array()) {
|
if (body_json.contains("messages") && body_json["messages"].is_array()) {
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
|
SRV_INF("[CONTENT DEBUG] Predict: Before oaicompat_chat_params_parse - checking %zu messages\n", body_json["messages"].size());
|
||||||
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
|
for (size_t idx = 0; idx < body_json["messages"].size(); idx++) {
|
||||||
auto& msg = body_json["messages"][idx];
|
llama_grpc::normalize_template_message(body_json["messages"][idx]);
|
||||||
std::string role_str = msg.contains("role") ? msg["role"].get<std::string>() : "unknown";
|
|
||||||
if (msg.contains("content")) {
|
|
||||||
if (msg["content"].is_null()) {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) has NULL content - FIXING!\n", idx, role_str.c_str());
|
|
||||||
msg["content"] = ""; // Fix null content
|
|
||||||
} else if (role_str == "tool" && msg["content"].is_array()) {
|
|
||||||
// Tool messages must have string content, not array
|
|
||||||
// oaicompat_chat_params_parse expects tool messages to have string content
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=tool) has array content, converting to string\n", idx);
|
|
||||||
msg["content"] = msg["content"].dump();
|
|
||||||
} else if (!msg["content"].is_string() && !msg["content"].is_array()) {
|
|
||||||
// If content is object or other non-string type, convert to string for templates
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) content is not string/array, converting\n", idx, role_str.c_str());
|
|
||||||
if (msg["content"].is_object()) {
|
|
||||||
msg["content"] = msg["content"].dump();
|
|
||||||
} else {
|
|
||||||
msg["content"] = "";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s): content type=%s\n",
|
|
||||||
idx, role_str.c_str(),
|
|
||||||
msg["content"].is_string() ? "string" :
|
|
||||||
msg["content"].is_array() ? "array" :
|
|
||||||
msg["content"].is_object() ? "object" : "other");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
SRV_INF("[CONTENT DEBUG] Predict: BEFORE TEMPLATE - Message %zu (role=%s) MISSING content field - ADDING!\n", idx, role_str.c_str());
|
|
||||||
msg["content"] = ""; // Add missing content
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
192
backend/cpp/llama-cpp/message_content.h
Normal file
192
backend/cpp/llama-cpp/message_content.h
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
|
||||||
|
namespace llama_grpc {
|
||||||
|
|
||||||
|
// Normalizes a proto message's content string into the JSON value used when
|
||||||
|
// reconstructing OpenAI-format messages for the tokenizer (jinja) template.
|
||||||
|
//
|
||||||
|
// Shared by the streaming (PredictStream) and non-streaming (Predict) message
|
||||||
|
// reconstruction paths so the two cannot drift.
|
||||||
|
//
|
||||||
|
// LocalAI's Go layer (schema.Messages.ToProto) always sends content as a plain
|
||||||
|
// text string; multimodal media travels in separate proto fields, never inside
|
||||||
|
// content. So user/system/developer content is *only ever* opaque text and must
|
||||||
|
// NOT be JSON-sniffed: a prompt that merely looks like JSON (e.g. an ingredient
|
||||||
|
// list ["1/4 cup sugar", ...]) would otherwise be reinterpreted as structured
|
||||||
|
// content parts and rejected by oaicompat_chat_params_parse with
|
||||||
|
// "unsupported content[].type" (https://github.com/mudler/LocalAI/issues/10524).
|
||||||
|
// (developer is OpenAI's modern system alias - same "human-authored text" nature.)
|
||||||
|
//
|
||||||
|
// For assistant/tool messages we still collapse a literal JSON null/object
|
||||||
|
// (tool-call bookkeeping) to a string, but we never turn a plain string into an
|
||||||
|
// array/scalar. The array defense is therefore role-independent (arrays/scalars
|
||||||
|
// fall through for every role); the role gate only governs the null/object case.
|
||||||
|
inline nlohmann::ordered_json normalize_message_content(const std::string& role,
|
||||||
|
const std::string& content) {
|
||||||
|
nlohmann::ordered_json content_val = content;
|
||||||
|
if (role != "user" && role != "system" && role != "developer") {
|
||||||
|
try {
|
||||||
|
nlohmann::ordered_json parsed = nlohmann::ordered_json::parse(content);
|
||||||
|
if (parsed.is_null()) {
|
||||||
|
content_val = "";
|
||||||
|
} else if (parsed.is_object()) {
|
||||||
|
content_val = parsed.dump();
|
||||||
|
}
|
||||||
|
// arrays / scalars: keep the original plain-text string as-is
|
||||||
|
} catch (const nlohmann::ordered_json::parse_error&) {
|
||||||
|
// Not JSON, already the plain string
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return content_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final safety pass applied to each reconstructed OpenAI message right before it
|
||||||
|
// is handed to oaicompat_chat_params_parse (jinja templating). Jinja templates
|
||||||
|
// assume content is a string: a literal null breaks slicing such as
|
||||||
|
// message.content[:N] (#7324), and a tool message with array content is rejected
|
||||||
|
// (#7528). A multimodal user message legitimately carries a typed-part array
|
||||||
|
// ({type:text}, {type:image_url}, ...), which must be left intact. Shared by the
|
||||||
|
// streaming and non-streaming paths so this invariant cannot drift between them.
|
||||||
|
inline void normalize_template_message(nlohmann::ordered_json& msg) {
|
||||||
|
if (!msg.contains("content")) {
|
||||||
|
msg["content"] = ""; // templates expect the field to exist
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
nlohmann::ordered_json& content = msg["content"];
|
||||||
|
const std::string role = (msg.contains("role") && msg["role"].is_string())
|
||||||
|
? msg["role"].get<std::string>()
|
||||||
|
: std::string();
|
||||||
|
if (content.is_null()) {
|
||||||
|
content = ""; // #7324: null would crash content[:N] slicing
|
||||||
|
} else if (role == "tool" && content.is_array()) {
|
||||||
|
content = content.dump(); // #7528: tool messages must have string content
|
||||||
|
} else if (!content.is_string() && !content.is_array()) {
|
||||||
|
if (content.is_object()) {
|
||||||
|
content = content.dump(); // tool-call bookkeeping object -> string
|
||||||
|
} else {
|
||||||
|
content = ""; // other scalar (number/bool) -> empty
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// string, or a non-tool (multimodal) typed-part array: leave untouched
|
||||||
|
}
|
||||||
|
|
||||||
|
// One proto message's data, flattened to plain types so the reconstruction logic
|
||||||
|
// can be shared and unit-tested without protobuf. The streaming and non-streaming
|
||||||
|
// predict paths both populate this from proto::Message + the request's media.
|
||||||
|
struct ReconstructedMessageInput {
|
||||||
|
std::string role;
|
||||||
|
std::string content; // proto.Message.content (always a plain string)
|
||||||
|
std::string name;
|
||||||
|
std::string tool_call_id;
|
||||||
|
std::string reasoning_content;
|
||||||
|
std::string tool_calls; // tool_calls as a JSON string, or empty
|
||||||
|
bool is_last_user_msg = false; // attach request media to this message
|
||||||
|
std::vector<std::string> images; // base64 (jpeg)
|
||||||
|
std::vector<std::string> audios; // base64 (wav)
|
||||||
|
std::vector<std::string> videos; // base64
|
||||||
|
};
|
||||||
|
|
||||||
|
// Appends the request's media as OpenAI typed content parts. Imperative (not
|
||||||
|
// brace-init) to avoid nlohmann's object-vs-array initializer-list ambiguity.
|
||||||
|
inline void append_media_parts(nlohmann::ordered_json& content_array,
|
||||||
|
const std::vector<std::string>& images,
|
||||||
|
const std::vector<std::string>& audios,
|
||||||
|
const std::vector<std::string>& videos) {
|
||||||
|
for (const auto& img : images) {
|
||||||
|
nlohmann::ordered_json image_chunk;
|
||||||
|
image_chunk["type"] = "image_url";
|
||||||
|
nlohmann::ordered_json image_url;
|
||||||
|
image_url["url"] = "data:image/jpeg;base64," + img;
|
||||||
|
image_chunk["image_url"] = image_url;
|
||||||
|
content_array.push_back(image_chunk);
|
||||||
|
}
|
||||||
|
for (const auto& aud : audios) {
|
||||||
|
nlohmann::ordered_json audio_chunk;
|
||||||
|
audio_chunk["type"] = "input_audio";
|
||||||
|
nlohmann::ordered_json input_audio;
|
||||||
|
input_audio["data"] = aud;
|
||||||
|
input_audio["format"] = "wav"; // default; could be made configurable
|
||||||
|
audio_chunk["input_audio"] = input_audio;
|
||||||
|
content_array.push_back(audio_chunk);
|
||||||
|
}
|
||||||
|
for (const auto& vid : videos) {
|
||||||
|
nlohmann::ordered_json video_chunk;
|
||||||
|
video_chunk["type"] = "input_video";
|
||||||
|
nlohmann::ordered_json input_video;
|
||||||
|
input_video["data"] = vid;
|
||||||
|
video_chunk["input_video"] = input_video;
|
||||||
|
content_array.push_back(video_chunk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reconstructs a single OpenAI-format message (the object fed to
|
||||||
|
// oaicompat_chat_params_parse) from a proto message. Shared by PredictStream and
|
||||||
|
// Predict so the content/multimodal/tool_calls handling cannot drift between the
|
||||||
|
// two stream modes (it previously lived as two ~150-line copies with a redundant
|
||||||
|
// Predict-only tool_calls->" " branch). Guarantees content is always a string or
|
||||||
|
// a typed-part array, never null/missing.
|
||||||
|
inline nlohmann::ordered_json build_reconstructed_message(const ReconstructedMessageInput& in) {
|
||||||
|
nlohmann::ordered_json msg_json;
|
||||||
|
msg_json["role"] = in.role;
|
||||||
|
const bool has_media = !in.images.empty() || !in.audios.empty() || !in.videos.empty();
|
||||||
|
|
||||||
|
if (!in.content.empty()) {
|
||||||
|
nlohmann::ordered_json content_val = normalize_message_content(in.role, in.content);
|
||||||
|
if (content_val.is_string() && in.is_last_user_msg && has_media) {
|
||||||
|
// Last user message + media: build a typed-part array (text first).
|
||||||
|
nlohmann::ordered_json content_array = nlohmann::ordered_json::array();
|
||||||
|
nlohmann::ordered_json text_part;
|
||||||
|
text_part["type"] = "text";
|
||||||
|
text_part["text"] = content_val.get<std::string>();
|
||||||
|
content_array.push_back(text_part);
|
||||||
|
append_media_parts(content_array, in.images, in.audios, in.videos);
|
||||||
|
msg_json["content"] = content_array;
|
||||||
|
} else if (content_val.is_null()) {
|
||||||
|
msg_json["content"] = "";
|
||||||
|
} else {
|
||||||
|
msg_json["content"] = content_val;
|
||||||
|
}
|
||||||
|
} else if (in.is_last_user_msg && has_media) {
|
||||||
|
// No text but media on the last user message: media-only typed array.
|
||||||
|
nlohmann::ordered_json content_array = nlohmann::ordered_json::array();
|
||||||
|
append_media_parts(content_array, in.images, in.audios, in.videos);
|
||||||
|
msg_json["content"] = content_array;
|
||||||
|
} else {
|
||||||
|
// Empty content (any role, incl. tool/assistant): templates need a string.
|
||||||
|
msg_json["content"] = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!in.name.empty()) {
|
||||||
|
msg_json["name"] = in.name;
|
||||||
|
}
|
||||||
|
if (!in.tool_call_id.empty()) {
|
||||||
|
msg_json["tool_call_id"] = in.tool_call_id;
|
||||||
|
}
|
||||||
|
if (!in.reasoning_content.empty()) {
|
||||||
|
msg_json["reasoning_content"] = in.reasoning_content;
|
||||||
|
}
|
||||||
|
if (!in.tool_calls.empty()) {
|
||||||
|
try {
|
||||||
|
nlohmann::ordered_json tool_calls = nlohmann::ordered_json::parse(in.tool_calls);
|
||||||
|
msg_json["tool_calls"] = tool_calls;
|
||||||
|
// tool_calls + empty/blank content: use " " not "", because llama.cpp's
|
||||||
|
// common_chat_msgs_to_json_oaicompat turns "" into null, which breaks
|
||||||
|
// templates that slice message.content[:tool_start_length] (#7324).
|
||||||
|
if (!msg_json.contains("content") ||
|
||||||
|
(msg_json["content"].is_string() && msg_json["content"].get<std::string>().empty())) {
|
||||||
|
msg_json["content"] = " ";
|
||||||
|
}
|
||||||
|
} catch (const nlohmann::ordered_json::parse_error&) {
|
||||||
|
// Malformed tool_calls JSON: leave content as-is (prior behavior).
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return msg_json;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace llama_grpc
|
||||||
234
backend/cpp/llama-cpp/message_content_test.cpp
Normal file
234
backend/cpp/llama-cpp/message_content_test.cpp
Normal file
@@ -0,0 +1,234 @@
|
|||||||
|
// Unit tests for the shared message-reconstruction helpers (message_content.h).
|
||||||
|
//
|
||||||
|
// Build & run standalone (nlohmann/json single header on the include path):
|
||||||
|
// g++ -std=c++17 -I<dir-with-nlohmann> message_content_test.cpp -o t && ./t
|
||||||
|
// or via CMake: -DLLAMA_GRPC_BUILD_TESTS=ON then ctest.
|
||||||
|
//
|
||||||
|
// Regression coverage for:
|
||||||
|
// #10524 - a user/system prompt that is itself a JSON-array string must stay
|
||||||
|
// plain text, never be reinterpreted as OpenAI structured parts.
|
||||||
|
// #7324 - assistant/tool null content -> "" (templates slice content[:N]);
|
||||||
|
// assistant+tool_calls+empty content -> " " (not "", which becomes null).
|
||||||
|
// #7528 - tool message array content must reach the template as a string.
|
||||||
|
// multimodal - last user message text + media -> typed-part array, media kept.
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "message_content.h"
|
||||||
|
|
||||||
|
using nlohmann::ordered_json;
|
||||||
|
using llama_grpc::normalize_message_content;
|
||||||
|
using llama_grpc::normalize_template_message;
|
||||||
|
using llama_grpc::build_reconstructed_message;
|
||||||
|
using llama_grpc::ReconstructedMessageInput;
|
||||||
|
|
||||||
|
static int failures = 0;
|
||||||
|
|
||||||
|
static void check(bool ok, const std::string& name, const std::string& detail = "") {
|
||||||
|
if (!ok) {
|
||||||
|
std::cerr << "FAIL " << name << (detail.empty() ? "" : ": " + detail) << "\n";
|
||||||
|
failures++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- normalize_message_content -------------------------------------------
|
||||||
|
|
||||||
|
static void expect_norm_string(const char* name, const std::string& role,
|
||||||
|
const std::string& content, const std::string& want) {
|
||||||
|
auto got = normalize_message_content(role, content);
|
||||||
|
if (!got.is_string()) {
|
||||||
|
check(false, name, "expected a JSON string, got " +
|
||||||
|
std::string(got.is_array() ? "array" : got.is_object() ? "object" : "other") +
|
||||||
|
" (" + got.dump() + ")");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
check(got.get<std::string>() == want, name, "expected \"" + want + "\", got \"" + got.get<std::string>() + "\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_normalize() {
|
||||||
|
const std::string ingredients = R"(["1/4 cup brown sugar, packed","1 pound ground beef"])";
|
||||||
|
|
||||||
|
// #10524 - JSON-array text must stay a string. Role-INDEPENDENT array defense.
|
||||||
|
for (const char* role : {"user", "system", "developer", "function", "assistant", "tool"}) {
|
||||||
|
expect_norm_string((std::string("json_array_stays_text:") + role).c_str(), role, ingredients, ingredients);
|
||||||
|
}
|
||||||
|
|
||||||
|
// #10524 - user/system/developer JSON-object text stays verbatim (NOT re-dumped).
|
||||||
|
expect_norm_string("user_json_object_verbatim", "user", R"({"a":1})", R"({"a":1})");
|
||||||
|
expect_norm_string("system_json_object_verbatim", "system", R"({"a":1})", R"({"a":1})");
|
||||||
|
expect_norm_string("developer_json_object_verbatim", "developer", R"({"a":1})", R"({"a":1})");
|
||||||
|
|
||||||
|
// Plain text unchanged for all roles.
|
||||||
|
expect_norm_string("user_plain_text", "user", "hello world", "hello world");
|
||||||
|
expect_norm_string("assistant_non_json_text_kept", "assistant", "hi [unclosed", "hi [unclosed");
|
||||||
|
|
||||||
|
// #7324 boundary - user/system/developer literal "null" preserved (never parsed).
|
||||||
|
expect_norm_string("user_literal_null_stays", "user", "null", "null");
|
||||||
|
expect_norm_string("system_literal_null_stays", "system", "null", "null");
|
||||||
|
expect_norm_string("developer_literal_null_stays", "developer", "null", "null");
|
||||||
|
|
||||||
|
// #7324 - assistant/tool literal null collapses to empty string.
|
||||||
|
expect_norm_string("assistant_null_to_empty", "assistant", "null", "");
|
||||||
|
expect_norm_string("tool_null_to_empty", "tool", "null", "");
|
||||||
|
|
||||||
|
// #7324/#7528 - assistant/tool object bookkeeping stringified (stays a string).
|
||||||
|
check(normalize_message_content("assistant", R"({"tool":"x"})").is_string(), "assistant_object_stringified");
|
||||||
|
check(normalize_message_content("tool", R"({"error":"boom"})").is_string(), "tool_object_stringified");
|
||||||
|
|
||||||
|
// #10524-family - a bare scalar that parses as a JSON number stays the string.
|
||||||
|
expect_norm_string("assistant_scalar_number_stays_string", "assistant", "42", "42");
|
||||||
|
|
||||||
|
// baseline - empty content stays empty.
|
||||||
|
expect_norm_string("user_empty_stays_empty", "user", "", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- normalize_template_message (BEFORE TEMPLATE sanitizer) ---------------
|
||||||
|
|
||||||
|
static void test_template_sanitizer() {
|
||||||
|
// #7528 - a tool message with an ACTUAL array becomes a string.
|
||||||
|
{
|
||||||
|
ordered_json msg = {{"role", "tool"}, {"content", ordered_json::array({{{"type", "text"}, {"text", "r"}}})}};
|
||||||
|
normalize_template_message(msg);
|
||||||
|
check(msg["content"].is_string(), "before_template_tool_array_to_string", "got " + msg["content"].dump());
|
||||||
|
}
|
||||||
|
// #7324 - null content -> "" for any role.
|
||||||
|
{
|
||||||
|
ordered_json msg = {{"role", "assistant"}, {"content", nullptr}};
|
||||||
|
normalize_template_message(msg);
|
||||||
|
check(msg["content"].is_string() && msg["content"] == "", "before_template_null_to_empty");
|
||||||
|
}
|
||||||
|
// object content -> dumped string (would otherwise throw at the template).
|
||||||
|
{
|
||||||
|
ordered_json msg = {{"role", "assistant"}, {"content", {{"x", 1}}}};
|
||||||
|
normalize_template_message(msg);
|
||||||
|
check(msg["content"].is_string(), "before_template_object_to_string", "got " + msg["content"].dump());
|
||||||
|
}
|
||||||
|
// missing content field -> "".
|
||||||
|
{
|
||||||
|
ordered_json msg = {{"role", "user"}};
|
||||||
|
normalize_template_message(msg);
|
||||||
|
check(msg.contains("content") && msg["content"] == "", "before_template_missing_to_empty");
|
||||||
|
}
|
||||||
|
// multimodal: a well-typed user array must be left UNTOUCHED (role!=tool).
|
||||||
|
{
|
||||||
|
ordered_json parts = ordered_json::array();
|
||||||
|
parts.push_back({{"type", "text"}, {"text", "x"}});
|
||||||
|
ordered_json img; img["type"] = "image_url"; img["image_url"] = {{"url", "data:..."}};
|
||||||
|
parts.push_back(img);
|
||||||
|
ordered_json msg = {{"role", "user"}, {"content", parts}};
|
||||||
|
normalize_template_message(msg);
|
||||||
|
check(msg["content"].is_array() && msg["content"].size() == 2, "before_template_user_typed_array_preserved",
|
||||||
|
"got " + msg["content"].dump());
|
||||||
|
}
|
||||||
|
// a plain string is left untouched.
|
||||||
|
{
|
||||||
|
ordered_json msg = {{"role", "user"}, {"content", "hello"}};
|
||||||
|
normalize_template_message(msg);
|
||||||
|
check(msg["content"] == "hello", "before_template_string_untouched");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- build_reconstructed_message ----------------------------------------
|
||||||
|
|
||||||
|
static void test_reconstruction() {
|
||||||
|
const std::string ingredients = R"(["1/4 cup brown sugar","1 pound ground beef"])";
|
||||||
|
|
||||||
|
// #10524 end-state - user JSON-array text, no media -> string content.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "user"; in.content = ingredients;
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"].is_string() && m["content"] == ingredients, "recon_user_json_array_string",
|
||||||
|
"got " + m["content"].dump());
|
||||||
|
}
|
||||||
|
// multimodal - user text + one image on last user msg -> typed array, image kept.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "user"; in.content = ingredients; in.is_last_user_msg = true;
|
||||||
|
in.images.push_back("BASE64IMG");
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"].is_array() && m["content"].size() == 2, "recon_multimodal_text_plus_image",
|
||||||
|
"got " + m["content"].dump());
|
||||||
|
check(m["content"][0]["type"] == "text" && m["content"][0]["text"] == ingredients, "recon_multimodal_text_first");
|
||||||
|
check(m["content"][1]["type"] == "image_url", "recon_multimodal_image_kept");
|
||||||
|
}
|
||||||
|
// multimodal media-only - empty text + image on last user msg.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "user"; in.content = ""; in.is_last_user_msg = true;
|
||||||
|
in.images.push_back("BASE64IMG");
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"].is_array() && m["content"].size() == 1 && m["content"][0]["type"] == "image_url",
|
||||||
|
"recon_media_only", "got " + m["content"].dump());
|
||||||
|
}
|
||||||
|
// #7528 - tool array-string content stays a string.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "tool"; in.content = R"(["a","b"])"; in.tool_call_id = "call_1";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"].is_string() && m["content"] == R"(["a","b"])", "recon_tool_array_string",
|
||||||
|
"got " + m["content"].dump());
|
||||||
|
check(m["tool_call_id"] == "call_1", "recon_tool_call_id_set");
|
||||||
|
}
|
||||||
|
// tool empty content -> "".
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "tool"; in.content = "";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"].is_string() && m["content"] == "", "recon_tool_empty_to_string");
|
||||||
|
}
|
||||||
|
// #7324 - assistant + tool_calls + empty content -> " " (single space, not "").
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "assistant"; in.content = "";
|
||||||
|
in.tool_calls = R"([{"id":"c1","type":"function","function":{"name":"f","arguments":"{}"}}])";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"].is_string() && m["content"] == " ", "recon_toolcalls_empty_content_space",
|
||||||
|
"got " + m["content"].dump());
|
||||||
|
check(m["tool_calls"].is_array() && m["tool_calls"].size() == 1, "recon_toolcalls_parsed");
|
||||||
|
}
|
||||||
|
// assistant + tool_calls + real content keeps the content.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "assistant"; in.content = "I'll call f";
|
||||||
|
in.tool_calls = R"([{"id":"c1","type":"function","function":{"name":"f","arguments":"{}"}}])";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"] == "I'll call f", "recon_toolcalls_with_content_kept");
|
||||||
|
}
|
||||||
|
// assistant null content -> "".
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "assistant"; in.content = "null";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"] == "", "recon_assistant_null_to_empty");
|
||||||
|
}
|
||||||
|
// malformed tool_calls JSON must not throw; content preserved.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "assistant"; in.content = "hi"; in.tool_calls = "{not json";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["content"] == "hi" && !m.contains("tool_calls"), "recon_malformed_toolcalls_safe");
|
||||||
|
}
|
||||||
|
// optional fields: name + reasoning carried through.
|
||||||
|
{
|
||||||
|
ReconstructedMessageInput in;
|
||||||
|
in.role = "tool"; in.content = "result"; in.name = "get_weather"; in.reasoning_content = "thinking";
|
||||||
|
auto m = build_reconstructed_message(in);
|
||||||
|
check(m["name"] == "get_weather" && m["reasoning_content"] == "thinking", "recon_optional_fields");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
test_normalize();
|
||||||
|
test_template_sanitizer();
|
||||||
|
test_reconstruction();
|
||||||
|
|
||||||
|
if (failures == 0) {
|
||||||
|
std::cout << "OK: all message_content tests passed\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
std::cerr << failures << " test(s) failed\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
@@ -18,6 +18,10 @@ done
|
|||||||
|
|
||||||
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
||||||
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
||||||
|
# Shared message-reconstruction helpers (included by grpc-server.cpp) and their
|
||||||
|
# unit test (compiled only when -DLLAMA_GRPC_BUILD_TESTS=ON).
|
||||||
|
cp -r message_content.h llama.cpp/tools/grpc-server/
|
||||||
|
cp -r message_content_test.cpp llama.cpp/tools/grpc-server/
|
||||||
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
|
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
|
||||||
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
|
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
|
||||||
|
|
||||||
|
|||||||
71
backend/cpp/run-unit-tests.sh
Executable file
71
backend/cpp/run-unit-tests.sh
Executable file
@@ -0,0 +1,71 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Discovers and runs every standalone C++ unit test under backend/cpp/.
|
||||||
|
#
|
||||||
|
# A "standalone" unit test is a *_test.cpp that depends only on the C++ standard
|
||||||
|
# library and nlohmann/json (single header) - i.e. it exercises pure helpers and
|
||||||
|
# does not need the full llama.cpp + gRPC backend build. Tests that DO need the
|
||||||
|
# backend build use the CMake/ctest path (e.g. -DLLAMA_GRPC_BUILD_TESTS=ON)
|
||||||
|
# instead and are skipped here.
|
||||||
|
#
|
||||||
|
# This keeps CI generic: adding a new pure-C++ unit test file named *_test.cpp in
|
||||||
|
# an active backend source dir is picked up automatically, with no CI edits.
|
||||||
|
#
|
||||||
|
# Env:
|
||||||
|
# NLOHMANN_INCLUDE include dir that contains nlohmann/json.hpp. If unset, the
|
||||||
|
# nlohmann/json single header is fetched to a temp dir.
|
||||||
|
# CXX compiler (default: g++).
|
||||||
|
# JSON_VERSION nlohmann/json tag to fetch when NLOHMANN_INCLUDE is unset
|
||||||
|
# (default: v3.11.3).
|
||||||
|
set -uo pipefail
|
||||||
|
|
||||||
|
ROOT="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
CXX="${CXX:-g++}"
|
||||||
|
JSON_VERSION="${JSON_VERSION:-v3.11.3}"
|
||||||
|
|
||||||
|
JSON_INC="${NLOHMANN_INCLUDE:-}"
|
||||||
|
if [ -z "$JSON_INC" ]; then
|
||||||
|
JSON_INC="$(mktemp -d)"
|
||||||
|
mkdir -p "$JSON_INC/nlohmann"
|
||||||
|
echo "Fetching nlohmann/json ${JSON_VERSION} single header..."
|
||||||
|
if ! curl -L -sf \
|
||||||
|
"https://raw.githubusercontent.com/nlohmann/json/${JSON_VERSION}/single_include/nlohmann/json.hpp" \
|
||||||
|
-o "$JSON_INC/nlohmann/json.hpp"; then
|
||||||
|
echo "ERROR: failed to fetch nlohmann/json header" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Active source dirs only - exclude per-variant build copies, dev snapshots and
|
||||||
|
# the vendored upstream llama.cpp tree.
|
||||||
|
mapfile -t tests < <(find "$ROOT" -name '*_test.cpp' \
|
||||||
|
-not -path '*/llama.cpp/*' \
|
||||||
|
-not -path '*-build/*' \
|
||||||
|
-not -path '*-dev/*' \
|
||||||
|
-not -path '*fallback*' | sort)
|
||||||
|
|
||||||
|
if [ "${#tests[@]}" -eq 0 ]; then
|
||||||
|
echo "No standalone C++ unit tests found under $ROOT"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
fail=0
|
||||||
|
for test_src in "${tests[@]}"; do
|
||||||
|
name="$(basename "$test_src" .cpp)"
|
||||||
|
bin="$(mktemp -d)/$name"
|
||||||
|
echo "==> $test_src"
|
||||||
|
if ! "$CXX" -std=c++17 -Wall -Wextra \
|
||||||
|
-I"$JSON_INC" -I"$(dirname "$test_src")" \
|
||||||
|
"$test_src" -o "$bin"; then
|
||||||
|
echo "COMPILE FAILED: $test_src" >&2
|
||||||
|
fail=1
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
if ! "$bin"; then
|
||||||
|
echo "TEST FAILED: $test_src" >&2
|
||||||
|
fail=1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Ran ${#tests[@]} standalone C++ unit test file(s)"
|
||||||
|
exit "$fail"
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
# parakeet-cpp backend Makefile.
|
# parakeet-cpp backend Makefile.
|
||||||
#
|
#
|
||||||
# Upstream pin lives below as PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
# Upstream pin lives below as PARAKEET_VERSION?=f469a57270a1cc4554acb15febf60e56619673b9
|
||||||
# (.github/bump_deps.sh) can find and update it - matches the
|
# (.github/bump_deps.sh) can find and update it - matches the
|
||||||
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
# whisper.cpp / ds4 / vibevoice-cpp convention.
|
||||||
#
|
#
|
||||||
@@ -15,7 +15,7 @@
|
|||||||
# That's what the L0 smoke test uses. The default target below does the
|
# That's what the L0 smoke test uses. The default target below does the
|
||||||
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
# proper clone-at-pin + cmake build so CI doesn't need a side-checkout.
|
||||||
|
|
||||||
PARAKEET_VERSION?=89f5e2977b4d8bccd45e7bcc6f2ef7c4ed49e89a
|
PARAKEET_VERSION?=f469a57270a1cc4554acb15febf60e56619673b9
|
||||||
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
PARAKEET_REPO?=https://github.com/mudler/parakeet.cpp
|
||||||
|
|
||||||
GOCMD?=go
|
GOCMD?=go
|
||||||
|
|||||||
@@ -16,7 +16,15 @@ cp -rfv $CURDIR/run.sh $CURDIR/package/
|
|||||||
cp -rfLv $CURDIR/sources/go-piper/piper-phonemize/pi/lib/* $CURDIR/package/lib/
|
cp -rfLv $CURDIR/sources/go-piper/piper-phonemize/pi/lib/* $CURDIR/package/lib/
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
# Detect architecture and copy appropriate libraries
|
||||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS has no glibc loader to bundle. The piper binary links its bundled
|
||||||
|
# libs (libucd, libespeak-ng, libpiper_phonemize, libonnxruntime) via
|
||||||
|
# @rpath but ships with no LC_RPATH, so dyld aborts at launch with
|
||||||
|
# "Library not loaded: @rpath/libucd.dylib ... no LC_RPATH's found".
|
||||||
|
# Add an @loader_path/lib rpath so @rpath resolves to package/lib/.
|
||||||
|
echo "Detected macOS; adding @loader_path/lib rpath so bundled libs resolve via @rpath..."
|
||||||
|
install_name_tool -add_rpath @loader_path/lib "$CURDIR/package/piper"
|
||||||
|
elif [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
# x86_64 architecture
|
# x86_64 architecture
|
||||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||||
|
|||||||
@@ -4,7 +4,12 @@ set -ex
|
|||||||
CURDIR=$(dirname "$(realpath "$0")")
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
export ESPEAK_NG_DATA="$CURDIR"/espeak-ng-data
|
export ESPEAK_NG_DATA="$CURDIR"/espeak-ng-data
|
||||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
|
||||||
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||||
|
|||||||
@@ -15,7 +15,14 @@ cp -avf $CURDIR/run.sh $CURDIR/package/
|
|||||||
cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
|
cp -rfLv $CURDIR/backend-assets/lib/* $CURDIR/package/lib/
|
||||||
|
|
||||||
# Detect architecture and copy appropriate libraries
|
# Detect architecture and copy appropriate libraries
|
||||||
if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
# macOS has no glibc loader to bundle. silero-vad links its bundled
|
||||||
|
# libonnxruntime via @rpath but ships with no LC_RPATH, so dyld can't find
|
||||||
|
# it at runtime. Add an @loader_path/lib rpath so @rpath resolves to
|
||||||
|
# package/lib/ (matching the piper darwin fix, #10525).
|
||||||
|
echo "Detected macOS; adding @loader_path/lib rpath so bundled libs resolve via @rpath..."
|
||||||
|
install_name_tool -add_rpath @loader_path/lib "$CURDIR/package/silero-vad"
|
||||||
|
elif [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
|
||||||
# x86_64 architecture
|
# x86_64 architecture
|
||||||
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
echo "Detected x86_64 architecture, copying x86_64 libraries..."
|
||||||
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
cp -arfLv /lib64/ld-linux-x86-64.so.2 $CURDIR/package/lib/ld.so
|
||||||
|
|||||||
@@ -3,7 +3,11 @@ set -ex
|
|||||||
|
|
||||||
CURDIR=$(dirname "$(realpath "$0")")
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
|
||||||
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
if [ "$(uname)" = "Darwin" ]; then
|
||||||
|
export DYLD_LIBRARY_PATH="$CURDIR"/lib:$DYLD_LIBRARY_PATH
|
||||||
|
else
|
||||||
|
export LD_LIBRARY_PATH="$CURDIR"/lib:$LD_LIBRARY_PATH
|
||||||
|
fi
|
||||||
|
|
||||||
# If there is a lib/ld.so, use it
|
# If there is a lib/ld.so, use it
|
||||||
if [ -f "$CURDIR"/lib/ld.so ]; then
|
if [ -f "$CURDIR"/lib/ld.so ]; then
|
||||||
|
|||||||
8
cmd/launcher/FyneApp.toml
Normal file
8
cmd/launcher/FyneApp.toml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
Website = "https://localai.io"
|
||||||
|
|
||||||
|
[Details]
|
||||||
|
Icon = "../../core/http/static/logo.png"
|
||||||
|
Name = "LocalAI"
|
||||||
|
ID = "com.localai.launcher"
|
||||||
|
Version = "0.0.0"
|
||||||
|
Build = 1
|
||||||
14
contrib/macos/Launcher.entitlements
Normal file
14
contrib/macos/Launcher.entitlements
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||||
|
<plist version="1.0">
|
||||||
|
<dict>
|
||||||
|
<key>com.apple.security.network.client</key>
|
||||||
|
<true/>
|
||||||
|
<key>com.apple.security.network.server</key>
|
||||||
|
<true/>
|
||||||
|
<key>com.apple.security.cs.allow-jit</key>
|
||||||
|
<true/>
|
||||||
|
<key>com.apple.security.cs.allow-unsigned-executable-memory</key>
|
||||||
|
<true/>
|
||||||
|
</dict>
|
||||||
|
</plist>
|
||||||
84
contrib/macos/sign-and-notarize.sh
Executable file
84
contrib/macos/sign-and-notarize.sh
Executable file
@@ -0,0 +1,84 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Code-sign and notarize macOS artifacts for LocalAI.
|
||||||
|
# Every sub-command is a no-op (exit 0) when its required secret is unset,
|
||||||
|
# so unsigned builds (forks, local dev, PRs) keep working.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
ENTITLEMENTS="contrib/macos/Launcher.entitlements"
|
||||||
|
KEYCHAIN="localai-ci.keychain-db"
|
||||||
|
|
||||||
|
cmd_import_cert() {
|
||||||
|
if [ -z "${MACOS_CERTIFICATE:-}" ]; then
|
||||||
|
echo "[sign] MACOS_CERTIFICATE unset: skipping cert import (unsigned build)"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
local certfile keychain_pwd default_keychain
|
||||||
|
certfile="$(mktemp).p12"
|
||||||
|
keychain_pwd="${MACOS_CI_KEYCHAIN_PWD:?MACOS_CI_KEYCHAIN_PWD required when signing}"
|
||||||
|
echo "$MACOS_CERTIFICATE" | base64 --decode > "$certfile"
|
||||||
|
security create-keychain -p "$keychain_pwd" "$KEYCHAIN"
|
||||||
|
security set-keychain-settings -lut 21600 "$KEYCHAIN"
|
||||||
|
security unlock-keychain -p "$keychain_pwd" "$KEYCHAIN"
|
||||||
|
security import "$certfile" -k "$KEYCHAIN" -P "${MACOS_CERTIFICATE_PWD:?}" \
|
||||||
|
-T /usr/bin/codesign -T /usr/bin/security
|
||||||
|
security set-key-partition-list -S apple-tool:,apple:,codesign: \
|
||||||
|
-s -k "$keychain_pwd" "$KEYCHAIN" >/dev/null
|
||||||
|
default_keychain="$(security default-keychain | tr -d ' "')"
|
||||||
|
security list-keychains -d user -s "$KEYCHAIN" "$default_keychain"
|
||||||
|
rm -f "$certfile"
|
||||||
|
echo "[sign] certificate imported into $KEYCHAIN"
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_sign() {
|
||||||
|
local target="$1"
|
||||||
|
if [ -z "${MACOS_SIGN_IDENTITY:-}" ]; then
|
||||||
|
echo "[sign] MACOS_SIGN_IDENTITY unset: skipping codesign of $target"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
case "$target" in
|
||||||
|
*.app)
|
||||||
|
# Hardened runtime + entitlements are required for notarizing the app bundle.
|
||||||
|
codesign --deep --force --options runtime --timestamp \
|
||||||
|
--entitlements "$ENTITLEMENTS" \
|
||||||
|
--sign "$MACOS_SIGN_IDENTITY" "$target"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
# A disk image carries no entitlements/runtime; just sign the container.
|
||||||
|
codesign --force --timestamp --sign "$MACOS_SIGN_IDENTITY" "$target"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
codesign --verify --strict --verbose=2 "$target"
|
||||||
|
echo "[sign] signed $target"
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_notarize() {
|
||||||
|
local dmg="$1"
|
||||||
|
if [ -z "${MACOS_NOTARY_KEY:-}" ]; then
|
||||||
|
echo "[notarize] MACOS_NOTARY_KEY unset: skipping notarization of $dmg"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
local keyfile
|
||||||
|
keyfile="$(mktemp).p8"
|
||||||
|
echo "$MACOS_NOTARY_KEY" | base64 --decode > "$keyfile"
|
||||||
|
xcrun notarytool submit "$dmg" \
|
||||||
|
--key "$keyfile" \
|
||||||
|
--key-id "${MACOS_NOTARY_KEY_ID:?}" \
|
||||||
|
--issuer "${MACOS_NOTARY_ISSUER_ID:?}" \
|
||||||
|
--wait
|
||||||
|
rm -f "$keyfile"
|
||||||
|
xcrun stapler staple "$dmg"
|
||||||
|
xcrun stapler validate "$dmg"
|
||||||
|
echo "[notarize] notarized and stapled $dmg"
|
||||||
|
}
|
||||||
|
|
||||||
|
main() {
|
||||||
|
local sub="${1:-}"; shift || true
|
||||||
|
case "$sub" in
|
||||||
|
import-cert) cmd_import_cert ;;
|
||||||
|
sign) cmd_sign "$@" ;;
|
||||||
|
notarize) cmd_notarize "$@" ;;
|
||||||
|
*) echo "usage: $0 {import-cert|sign <path>|notarize <dmg>}" >&2; exit 2 ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
main "$@"
|
||||||
@@ -16,6 +16,7 @@ import (
|
|||||||
"github.com/mudler/LocalAI/core/services/galleryop"
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||||
"github.com/mudler/LocalAI/core/services/jobs"
|
"github.com/mudler/LocalAI/core/services/jobs"
|
||||||
"github.com/mudler/LocalAI/core/services/messaging"
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
"github.com/mudler/LocalAI/core/services/modeladmin"
|
||||||
"github.com/mudler/LocalAI/core/services/monitoring"
|
"github.com/mudler/LocalAI/core/services/monitoring"
|
||||||
"github.com/mudler/LocalAI/core/services/nodes"
|
"github.com/mudler/LocalAI/core/services/nodes"
|
||||||
"github.com/mudler/LocalAI/core/services/routing/admission"
|
"github.com/mudler/LocalAI/core/services/routing/admission"
|
||||||
@@ -330,9 +331,14 @@ func New(opts ...config.AppOption) (*Application, error) {
|
|||||||
gs := application.galleryService
|
gs := application.galleryService
|
||||||
sys := options.SystemState
|
sys := options.SystemState
|
||||||
cfgLoaderOpts := options.ToConfigLoaderOptions()
|
cfgLoaderOpts := options.ToConfigLoaderOptions()
|
||||||
gs.OnModelsChanged = func(_ messaging.CacheInvalidateEvent) {
|
gs.OnModelsChanged = func(evt messaging.CacheInvalidateEvent) {
|
||||||
if err := application.ModelConfigLoader().LoadModelConfigsFromPath(sys.Model.ModelsPath, cfgLoaderOpts...); err != nil {
|
// ApplyRemoteChange honors the op: a "delete" prunes the element
|
||||||
xlog.Warn("Failed to reload model configs after peer invalidation", "error", err)
|
// (a reload-from-path is additive and cannot drop it), anything
|
||||||
|
// else reloads from disk; a named element's running instance is
|
||||||
|
// shut down so the new config takes effect. The originating
|
||||||
|
// replica reloads inline and never depends on this path.
|
||||||
|
if err := modeladmin.ApplyRemoteChange(application.ModelConfigLoader(), application.modelLoader, sys.Model.ModelsPath, evt, cfgLoaderOpts...); err != nil {
|
||||||
|
xlog.Warn("Failed to apply peer model config change", "error", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := application.galleryService.SubscribeBroadcasts(); err != nil {
|
if err := application.galleryService.SubscribeBroadcasts(); err != nil {
|
||||||
|
|||||||
@@ -155,7 +155,7 @@ func AutocompleteEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, a
|
|||||||
// @Param name path string true "Model name"
|
// @Param name path string true "Model name"
|
||||||
// @Success 200 {object} map[string]any "success message"
|
// @Success 200 {object} map[string]any "success message"
|
||||||
// @Router /api/models/config-json/{name} [patch]
|
// @Router /api/models/config-json/{name} [patch]
|
||||||
func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||||
svc := modeladmin.NewConfigService(cl, appConfig)
|
svc := modeladmin.NewConfigService(cl, appConfig)
|
||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
modelName := c.Param("name")
|
modelName := c.Param("name")
|
||||||
@@ -173,6 +173,14 @@ func PatchConfigEndpoint(cl *config.ModelConfigLoader, _ *model.ModelLoader, app
|
|||||||
if _, err := svc.PatchConfig(c.Request().Context(), modelName, patchMap); err != nil {
|
if _, err := svc.PatchConfig(c.Request().Context(), modelName, patchMap); err != nil {
|
||||||
return c.JSON(httpStatusForModelAdminError(err), map[string]any{"error": err.Error()})
|
return c.JSON(httpStatusForModelAdminError(err), map[string]any{"error": err.Error()})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Patch rewrites the config on disk and reloads only the local loader;
|
||||||
|
// tell peers to refresh so the change is consistent across replicas.
|
||||||
|
// No-op in standalone mode.
|
||||||
|
if gs != nil {
|
||||||
|
gs.BroadcastModelsChanged(modelName, "install")
|
||||||
|
}
|
||||||
|
|
||||||
return c.JSON(http.StatusOK, map[string]any{
|
return c.JSON(http.StatusOK, map[string]any{
|
||||||
"success": true,
|
"success": true,
|
||||||
"message": fmt.Sprintf("Model '%s' updated successfully", modelName),
|
"message": fmt.Sprintf("Model '%s' updated successfully", modelName),
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ var _ = Describe("Config Metadata Endpoints", func() {
|
|||||||
app = echo.New()
|
app = echo.New()
|
||||||
app.GET("/api/models/config-metadata", ConfigMetadataEndpoint())
|
app.GET("/api/models/config-metadata", ConfigMetadataEndpoint())
|
||||||
app.GET("/api/models/config-metadata/autocomplete/:provider", AutocompleteEndpoint(configLoader, modelLoader, appConfig))
|
app.GET("/api/models/config-metadata/autocomplete/:provider", AutocompleteEndpoint(configLoader, modelLoader, appConfig))
|
||||||
app.PATCH("/api/models/config-json/:name", PatchConfigEndpoint(configLoader, modelLoader, appConfig))
|
app.PATCH("/api/models/config-json/:name", PatchConfigEndpoint(configLoader, modelLoader, nil, appConfig))
|
||||||
})
|
})
|
||||||
|
|
||||||
AfterEach(func() {
|
AfterEach(func() {
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
httpUtils "github.com/mudler/LocalAI/core/http/middleware"
|
httpUtils "github.com/mudler/LocalAI/core/http/middleware"
|
||||||
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||||
"github.com/mudler/LocalAI/core/services/modeladmin"
|
"github.com/mudler/LocalAI/core/services/modeladmin"
|
||||||
"github.com/mudler/LocalAI/internal"
|
"github.com/mudler/LocalAI/internal"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
@@ -55,7 +56,7 @@ func GetEditModelPage(cl *config.ModelConfigLoader, appConfig *config.Applicatio
|
|||||||
}
|
}
|
||||||
|
|
||||||
// EditModelEndpoint handles updating existing model configurations
|
// EditModelEndpoint handles updating existing model configurations
|
||||||
func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||||
svc := modeladmin.NewConfigService(cl, appConfig)
|
svc := modeladmin.NewConfigService(cl, appConfig)
|
||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
modelName := c.Param("name")
|
modelName := c.Param("name")
|
||||||
@@ -70,6 +71,17 @@ func EditModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appC
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
|
return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tell peer replicas to refresh their in-memory config: this endpoint
|
||||||
|
// only reloaded the local loader. A rename is a delete of the old name
|
||||||
|
// plus an install of the new one. No-op in standalone mode.
|
||||||
|
if gs != nil {
|
||||||
|
if result.Renamed {
|
||||||
|
gs.BroadcastModelsChanged(result.OldName, "delete")
|
||||||
|
}
|
||||||
|
gs.BroadcastModelsChanged(result.NewName, "install")
|
||||||
|
}
|
||||||
|
|
||||||
msg := fmt.Sprintf("Model '%s' updated successfully. Model has been reloaded with new configuration.", result.NewName)
|
msg := fmt.Sprintf("Model '%s' updated successfully. Model has been reloaded with new configuration.", result.NewName)
|
||||||
if result.Renamed {
|
if result.Renamed {
|
||||||
msg = fmt.Sprintf("Model '%s' renamed to '%s' and updated successfully.", result.OldName, result.NewName)
|
msg = fmt.Sprintf("Model '%s' renamed to '%s' and updated successfully.", result.OldName, result.NewName)
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ var _ = Describe("Edit Model test", func() {
|
|||||||
app := echo.New()
|
app := echo.New()
|
||||||
// Set up a simple renderer for the test
|
// Set up a simple renderer for the test
|
||||||
app.Renderer = &testRenderer{}
|
app.Renderer = &testRenderer{}
|
||||||
app.POST("/import-model", ImportModelEndpoint(modelConfigLoader, applicationConfig))
|
app.POST("/import-model", ImportModelEndpoint(modelConfigLoader, nil, applicationConfig))
|
||||||
app.GET("/edit-model/:name", GetEditModelPage(modelConfigLoader, applicationConfig))
|
app.GET("/edit-model/:name", GetEditModelPage(modelConfigLoader, applicationConfig))
|
||||||
|
|
||||||
requestBody := bytes.NewBufferString(`{"name": "foo", "backend": "foo", "model": "foo"}`)
|
requestBody := bytes.NewBufferString(`{"name": "foo", "backend": "foo", "model": "foo"}`)
|
||||||
@@ -106,7 +106,7 @@ var _ = Describe("Edit Model test", func() {
|
|||||||
Expect(exists).To(BeTrue())
|
Expect(exists).To(BeTrue())
|
||||||
|
|
||||||
app := echo.New()
|
app := echo.New()
|
||||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))
|
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
|
||||||
|
|
||||||
newYAML := "name: newname\nbackend: llama\nmodel: foo\n"
|
newYAML := "name: newname\nbackend: llama\nmodel: foo\n"
|
||||||
req := httptest.NewRequest("POST", "/models/edit/oldname", bytes.NewBufferString(newYAML))
|
req := httptest.NewRequest("POST", "/models/edit/oldname", bytes.NewBufferString(newYAML))
|
||||||
@@ -163,7 +163,7 @@ var _ = Describe("Edit Model test", func() {
|
|||||||
Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
||||||
|
|
||||||
app := echo.New()
|
app := echo.New()
|
||||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))
|
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
|
||||||
|
|
||||||
req := httptest.NewRequest(
|
req := httptest.NewRequest(
|
||||||
"POST",
|
"POST",
|
||||||
@@ -204,7 +204,7 @@ var _ = Describe("Edit Model test", func() {
|
|||||||
Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
Expect(modelConfigLoader.LoadModelConfigsFromPath(tempDir)).To(Succeed())
|
||||||
|
|
||||||
app := echo.New()
|
app := echo.New()
|
||||||
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, applicationConfig))
|
app.POST("/models/edit/:name", EditModelEndpoint(modelConfigLoader, modelLoader, nil, applicationConfig))
|
||||||
|
|
||||||
req := httptest.NewRequest(
|
req := httptest.NewRequest(
|
||||||
"POST",
|
"POST",
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ImportModelEndpoint handles creating new model configurations
|
// ImportModelEndpoint handles creating new model configurations
|
||||||
func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
func ImportModelEndpoint(cl *config.ModelConfigLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
// Get the raw body
|
// Get the raw body
|
||||||
body, err := io.ReadAll(c.Request().Body)
|
body, err := io.ReadAll(c.Request().Body)
|
||||||
@@ -245,6 +245,13 @@ func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applica
|
|||||||
}
|
}
|
||||||
return c.JSON(http.StatusInternalServerError, response)
|
return c.JSON(http.StatusInternalServerError, response)
|
||||||
}
|
}
|
||||||
|
// Tell peer replicas to load the newly-created config from the shared
|
||||||
|
// models dir: this endpoint only reloaded the local loader. No-op in
|
||||||
|
// standalone mode.
|
||||||
|
if gs != nil {
|
||||||
|
gs.BroadcastModelsChanged(modelConfig.Name, "install")
|
||||||
|
}
|
||||||
|
|
||||||
// Return success response
|
// Return success response
|
||||||
response := ModelResponse{
|
response := ModelResponse{
|
||||||
Success: true,
|
Success: true,
|
||||||
|
|||||||
@@ -60,7 +60,10 @@ func GetNodeEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc {
|
|||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
ctx := c.Request().Context()
|
ctx := c.Request().Context()
|
||||||
id := c.Param("id")
|
id := c.Param("id")
|
||||||
node, err := registry.Get(ctx, id)
|
// GetWithExtras (not Get) so the response carries the node's labels,
|
||||||
|
// loaded-model count, and in-flight total — the bare BackendNode keeps
|
||||||
|
// labels in a separate table, leaving the detail view's label list empty.
|
||||||
|
node, err := registry.GetWithExtras(ctx, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return c.JSON(http.StatusNotFound, nodeError(http.StatusNotFound, "node not found"))
|
return c.JSON(http.StatusNotFound, nodeError(http.StatusNotFound, "node not found"))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
|
|
||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
"github.com/mudler/LocalAI/core/config"
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/services/galleryop"
|
||||||
"github.com/mudler/LocalAI/core/services/modeladmin"
|
"github.com/mudler/LocalAI/core/services/modeladmin"
|
||||||
"github.com/mudler/LocalAI/pkg/model"
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
@@ -24,7 +25,7 @@ import (
|
|||||||
// @Failure 404 {object} ModelResponse
|
// @Failure 404 {object} ModelResponse
|
||||||
// @Failure 500 {object} ModelResponse
|
// @Failure 500 {object} ModelResponse
|
||||||
// @Router /api/models/{name}/{action} [put]
|
// @Router /api/models/{name}/{action} [put]
|
||||||
func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, gs *galleryop.GalleryService, appConfig *config.ApplicationConfig) echo.HandlerFunc {
|
||||||
svc := modeladmin.NewConfigService(cl, appConfig)
|
svc := modeladmin.NewConfigService(cl, appConfig)
|
||||||
return func(c echo.Context) error {
|
return func(c echo.Context) error {
|
||||||
modelName := c.Param("name")
|
modelName := c.Param("name")
|
||||||
@@ -36,6 +37,14 @@ func ToggleStateModelEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoade
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
|
return c.JSON(httpStatusForModelAdminError(err), ModelResponse{Success: false, Error: err.Error()})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enabling/disabling rewrites the config on disk and reloads only the
|
||||||
|
// local loader; tell peers to refresh so the model's availability is
|
||||||
|
// consistent across replicas. No-op in standalone mode.
|
||||||
|
if gs != nil {
|
||||||
|
gs.BroadcastModelsChanged(modelName, "install")
|
||||||
|
}
|
||||||
|
|
||||||
msg := fmt.Sprintf("Model '%s' has been %sd successfully.", modelName, action)
|
msg := fmt.Sprintf("Model '%s' has been %sd successfully.", modelName, action)
|
||||||
if action == modeladmin.ActionDisable {
|
if action == modeladmin.ActionDisable {
|
||||||
msg += " The model will not be loaded on demand until re-enabled."
|
msg += " The model will not be loaded on demand until re-enabled."
|
||||||
|
|||||||
@@ -72,19 +72,19 @@ func RegisterLocalAIRoutes(router *echo.Echo,
|
|||||||
router.POST("/backends/upgrades/check", backendGalleryEndpointService.CheckUpgradesEndpoint(), adminMiddleware)
|
router.POST("/backends/upgrades/check", backendGalleryEndpointService.CheckUpgradesEndpoint(), adminMiddleware)
|
||||||
router.POST("/backends/upgrade/:name", backendGalleryEndpointService.UpgradeBackendEndpoint(), adminMiddleware)
|
router.POST("/backends/upgrade/:name", backendGalleryEndpointService.UpgradeBackendEndpoint(), adminMiddleware)
|
||||||
// Custom model import endpoint
|
// Custom model import endpoint
|
||||||
router.POST("/models/import", localai.ImportModelEndpoint(cl, appConfig), adminMiddleware)
|
router.POST("/models/import", localai.ImportModelEndpoint(cl, galleryService, appConfig), adminMiddleware)
|
||||||
|
|
||||||
// URI model import endpoint
|
// URI model import endpoint
|
||||||
router.POST("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache), adminMiddleware)
|
router.POST("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache), adminMiddleware)
|
||||||
|
|
||||||
// Custom model edit endpoint
|
// Custom model edit endpoint
|
||||||
router.POST("/models/edit/:name", localai.EditModelEndpoint(cl, ml, appConfig), adminMiddleware)
|
router.POST("/models/edit/:name", localai.EditModelEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
|
||||||
|
|
||||||
// List model aliases endpoint
|
// List model aliases endpoint
|
||||||
router.GET("/api/aliases", localai.ListAliasesEndpoint(cl), adminMiddleware)
|
router.GET("/api/aliases", localai.ListAliasesEndpoint(cl), adminMiddleware)
|
||||||
|
|
||||||
// Toggle model enable/disable endpoint
|
// Toggle model enable/disable endpoint
|
||||||
router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, appConfig), adminMiddleware)
|
router.PUT("/models/toggle-state/:name/:action", localai.ToggleStateModelEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
|
||||||
|
|
||||||
// Toggle model pinned status endpoint
|
// Toggle model pinned status endpoint
|
||||||
router.PUT("/models/toggle-pinned/:name/:action", localai.TogglePinnedModelEndpoint(cl, appConfig, func() {
|
router.PUT("/models/toggle-pinned/:name/:action", localai.TogglePinnedModelEndpoint(cl, appConfig, func() {
|
||||||
|
|||||||
@@ -922,7 +922,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
|
|||||||
app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware)
|
app.GET("/api/models/config-metadata/autocomplete/:provider", localai.AutocompleteEndpoint(cl, ml, appConfig), adminMiddleware)
|
||||||
|
|
||||||
// PATCH config endpoint - partial update using nested JSON merge
|
// PATCH config endpoint - partial update using nested JSON merge
|
||||||
app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, appConfig), adminMiddleware)
|
app.PATCH("/api/models/config-json/:name", localai.PatchConfigEndpoint(cl, ml, galleryService, appConfig), adminMiddleware)
|
||||||
|
|
||||||
// VRAM estimation endpoint
|
// VRAM estimation endpoint
|
||||||
app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware)
|
app.POST("/api/models/vram-estimate", localai.VRAMEstimateEndpoint(cl, appConfig), adminMiddleware)
|
||||||
|
|||||||
@@ -68,6 +68,32 @@ var _ = Describe("LLM tests", func() {
|
|||||||
Expect(protoMessages[0].Content).To(Equal("Hello World"))
|
Expect(protoMessages[0].Content).To(Equal("Hello World"))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Regression for mudler/LocalAI#10524: a text part whose inner text is
|
||||||
|
// itself a JSON-array string (mealie sends an ingredient list) must
|
||||||
|
// flatten to that exact string verbatim. ToProto must NOT escape or
|
||||||
|
// restructure it - the C++ backend then treats it as opaque text. This
|
||||||
|
// pins the precise Go-side input that produced the "unsupported
|
||||||
|
// content[].type" gRPC error before the backend stopped re-parsing it.
|
||||||
|
It("flattens a JSON-array-looking text part to the verbatim string (#10524)", func() {
|
||||||
|
ingredients := `["1/4 cup brown sugar, packed","1 pound ground beef"]`
|
||||||
|
messages := Messages{
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: []any{
|
||||||
|
map[string]any{
|
||||||
|
"type": "text",
|
||||||
|
"text": ingredients,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
protoMessages := messages.ToProto()
|
||||||
|
|
||||||
|
Expect(protoMessages).To(HaveLen(1))
|
||||||
|
Expect(protoMessages[0].Content).To(Equal(ingredients))
|
||||||
|
})
|
||||||
|
|
||||||
It("should convert message with tool_calls", func() {
|
It("should convert message with tool_calls", func() {
|
||||||
messages := Messages{
|
messages := Messages{
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -404,6 +404,36 @@ var _ = Describe("GalleryService cache invalidation broadcasts", func() {
|
|||||||
Element: "x", Op: "install",
|
Element: "x", Op: "install",
|
||||||
})).To(Succeed())
|
})).To(Succeed())
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("BroadcastModelsChanged delivers the element and op to a peer's OnModelsChanged", func() {
|
||||||
|
var (
|
||||||
|
mu sync.Mutex
|
||||||
|
seen []messaging.CacheInvalidateEvent
|
||||||
|
)
|
||||||
|
svcB.OnModelsChanged = func(evt messaging.CacheInvalidateEvent) {
|
||||||
|
mu.Lock()
|
||||||
|
seen = append(seen, evt)
|
||||||
|
mu.Unlock()
|
||||||
|
}
|
||||||
|
Expect(svcA.SubscribeBroadcasts()).To(Succeed())
|
||||||
|
Expect(svcB.SubscribeBroadcasts()).To(Succeed())
|
||||||
|
|
||||||
|
// An admin edit on replica A must reach replica B over the same subject
|
||||||
|
// the gallery path uses, so B refreshes its in-memory config loader.
|
||||||
|
svcA.BroadcastModelsChanged("my-alias", "install")
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
Expect(seen).To(ContainElement(messaging.CacheInvalidateEvent{
|
||||||
|
Element: "my-alias", Op: "install",
|
||||||
|
}))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("BroadcastModelsChanged is a no-op when NATS is not wired (standalone)", func() {
|
||||||
|
standalone := galleryop.NewGalleryService(&config.ApplicationConfig{}, nil)
|
||||||
|
// No SetNATSClient: must not panic and must simply do nothing.
|
||||||
|
Expect(func() { standalone.BroadcastModelsChanged("x", "delete") }).ToNot(Panic())
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
var _ = Describe("GalleryService PostgreSQL hydration", func() {
|
var _ = Describe("GalleryService PostgreSQL hydration", func() {
|
||||||
|
|||||||
@@ -201,6 +201,24 @@ func (g *GalleryService) publishCacheInvalidate(subject string, evt messaging.Ca
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BroadcastModelsChanged notifies peer replicas that a model config was
|
||||||
|
// created, edited, or removed out-of-band of the gallery install/delete
|
||||||
|
// channel (e.g. the admin /models/edit, /models/import and
|
||||||
|
// /models/toggle-state endpoints, which write the YAML and reload only the
|
||||||
|
// local in-memory loader). Peers receive it via OnModelsChanged and refresh
|
||||||
|
// their own ModelConfigLoader so a request load-balanced to any replica sees
|
||||||
|
// the same config. No-op in standalone mode (no NATS client).
|
||||||
|
//
|
||||||
|
// op is "install" for a create/edit (the element must be (re)loaded from
|
||||||
|
// disk) or "delete" for a removal (the element must be pruned from memory,
|
||||||
|
// which a reload-from-path cannot do because the loader is additive).
|
||||||
|
func (g *GalleryService) BroadcastModelsChanged(element, op string) {
|
||||||
|
g.publishCacheInvalidate(messaging.SubjectCacheInvalidateModels, messaging.CacheInvalidateEvent{
|
||||||
|
Element: element,
|
||||||
|
Op: op,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// mergeStatus is the broadcast-side merge: it updates the in-memory map from
|
// mergeStatus is the broadcast-side merge: it updates the in-memory map from
|
||||||
// a peer's GalleryProgressEvent without re-publishing to NATS or re-writing
|
// a peer's GalleryProgressEvent without re-publishing to NATS or re-writing
|
||||||
// to PostgreSQL. UpdateStatus is the local-write entry point and does both;
|
// to PostgreSQL. UpdateStatus is the local-write entry point and does both;
|
||||||
|
|||||||
53
core/services/modeladmin/remote_sync.go
Normal file
53
core/services/modeladmin/remote_sync.go
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
package modeladmin
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
"github.com/mudler/LocalAI/pkg/model"
|
||||||
|
|
||||||
|
"github.com/mudler/xlog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// opDelete is the CacheInvalidateEvent.Op value the gallery delete path and the
|
||||||
|
// admin delete endpoint use; a delete must prune (a reload-from-path cannot).
|
||||||
|
const opDelete = "delete"
|
||||||
|
|
||||||
|
// ApplyRemoteChange refreshes this replica's in-memory model state from a peer
|
||||||
|
// replica's model-config change broadcast (messaging.CacheInvalidateEvent on
|
||||||
|
// SubjectCacheInvalidateModels). It is the subscriber-side counterpart to
|
||||||
|
// GalleryService.BroadcastModelsChanged.
|
||||||
|
//
|
||||||
|
// The op matters because LoadModelConfigsFromPath is additive: it loads every
|
||||||
|
// YAML on disk into the loader but never removes an entry whose file is gone.
|
||||||
|
// So a delete cannot be propagated by a plain reload - the deleted element must
|
||||||
|
// be explicitly pruned. Specifically:
|
||||||
|
//
|
||||||
|
// - op == "delete" with a named element: prune that element from the loader.
|
||||||
|
// - otherwise: reload all configs from disk (picks up creates and edits).
|
||||||
|
//
|
||||||
|
// In both cases, when an element is named, any running instance on this replica
|
||||||
|
// is shut down (best-effort) so the next request rebuilds it from the new
|
||||||
|
// config instead of serving the stale one - mirroring what the originating
|
||||||
|
// replica does on a local edit/delete.
|
||||||
|
//
|
||||||
|
// ml may be nil (no running instances to shut down). modelsPath and opts are
|
||||||
|
// forwarded to LoadModelConfigsFromPath.
|
||||||
|
func ApplyRemoteChange(cl *config.ModelConfigLoader, ml *model.ModelLoader, modelsPath string, evt messaging.CacheInvalidateEvent, opts ...config.ConfigLoaderOption) error {
|
||||||
|
if evt.Op == opDelete && evt.Element != "" {
|
||||||
|
cl.RemoveModelConfig(evt.Element)
|
||||||
|
} else if err := cl.LoadModelConfigsFromPath(modelsPath, opts...); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drop any running instance of the affected model so the next request
|
||||||
|
// rebuilds it from the refreshed config instead of serving the stale one.
|
||||||
|
// Best-effort: the model may not be loaded on this replica, which surfaces
|
||||||
|
// as a benign error here.
|
||||||
|
if ml != nil && evt.Element != "" {
|
||||||
|
if err := ml.ShutdownModel(evt.Element); err != nil {
|
||||||
|
xlog.Debug("ApplyRemoteChange: could not shut down model instance (likely not loaded)",
|
||||||
|
"model", evt.Element, "error", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
80
core/services/modeladmin/remote_sync_test.go
Normal file
80
core/services/modeladmin/remote_sync_test.go
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
package modeladmin
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo/v2"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
"gopkg.in/yaml.v3"
|
||||||
|
|
||||||
|
"github.com/mudler/LocalAI/core/config"
|
||||||
|
"github.com/mudler/LocalAI/core/services/messaging"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = Describe("ApplyRemoteChange", func() {
|
||||||
|
var (
|
||||||
|
dir string
|
||||||
|
loader *config.ModelConfigLoader
|
||||||
|
)
|
||||||
|
|
||||||
|
BeforeEach(func() {
|
||||||
|
dir = GinkgoT().TempDir()
|
||||||
|
loader = config.NewModelConfigLoader(dir)
|
||||||
|
})
|
||||||
|
|
||||||
|
writeYAML := func(name string, body map[string]any) {
|
||||||
|
body["name"] = name
|
||||||
|
data, err := yaml.Marshal(body)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(os.WriteFile(filepath.Join(dir, name+".yaml"), data, 0644)).To(Succeed())
|
||||||
|
}
|
||||||
|
|
||||||
|
It("loads a peer-created config from disk on an install event", func() {
|
||||||
|
// Peer wrote the YAML to the shared models dir; this replica has not
|
||||||
|
// loaded it yet (empty in-memory loader).
|
||||||
|
writeYAML("peer-alias", map[string]any{"alias": "qwen"})
|
||||||
|
_, ok := loader.GetModelConfig("peer-alias")
|
||||||
|
Expect(ok).To(BeFalse(), "precondition: not yet in memory")
|
||||||
|
|
||||||
|
err := ApplyRemoteChange(loader, nil, dir, messaging.CacheInvalidateEvent{
|
||||||
|
Element: "peer-alias", Op: "install",
|
||||||
|
})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
_, ok = loader.GetModelConfig("peer-alias")
|
||||||
|
Expect(ok).To(BeTrue(), "install event must reload the new config from disk")
|
||||||
|
})
|
||||||
|
|
||||||
|
It("prunes a peer-deleted config that a reload-from-path cannot drop", func() {
|
||||||
|
// Model is present in memory (loaded earlier) but its file is now gone
|
||||||
|
// from the shared dir. LoadModelConfigsFromPath is additive, so only an
|
||||||
|
// explicit prune can remove it - this is the cross-replica delete bug.
|
||||||
|
writeYAML("doomed", map[string]any{"alias": "qwen"})
|
||||||
|
Expect(loader.LoadModelConfigsFromPath(dir)).To(Succeed())
|
||||||
|
_, ok := loader.GetModelConfig("doomed")
|
||||||
|
Expect(ok).To(BeTrue(), "precondition: in memory")
|
||||||
|
Expect(os.Remove(filepath.Join(dir, "doomed.yaml"))).To(Succeed())
|
||||||
|
|
||||||
|
err := ApplyRemoteChange(loader, nil, dir, messaging.CacheInvalidateEvent{
|
||||||
|
Element: "doomed", Op: "delete",
|
||||||
|
})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
_, ok = loader.GetModelConfig("doomed")
|
||||||
|
Expect(ok).To(BeFalse(), "delete event must prune the element from memory")
|
||||||
|
})
|
||||||
|
|
||||||
|
It("does a full reload when no element is named", func() {
|
||||||
|
writeYAML("m1", map[string]any{"alias": "qwen"})
|
||||||
|
writeYAML("m2", map[string]any{"alias": "qwen"})
|
||||||
|
|
||||||
|
err := ApplyRemoteChange(loader, nil, dir, messaging.CacheInvalidateEvent{})
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
|
||||||
|
_, ok1 := loader.GetModelConfig("m1")
|
||||||
|
_, ok2 := loader.GetModelConfig("m2")
|
||||||
|
Expect(ok1).To(BeTrue())
|
||||||
|
Expect(ok2).To(BeTrue())
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -673,6 +673,49 @@ func (r *NodeRegistry) Get(ctx context.Context, nodeID string) (*BackendNode, er
|
|||||||
return &node, nil
|
return &node, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetWithExtras returns a single node enriched with the same computed fields as
|
||||||
|
// ListWithExtras (labels, loaded-model count, in-flight total). The plain Get
|
||||||
|
// returns a bare BackendNode whose Labels live in a separate table, so the node
|
||||||
|
// detail view needs this to show a node's existing labels and live counts.
|
||||||
|
func (r *NodeRegistry) GetWithExtras(ctx context.Context, nodeID string) (*NodeWithExtras, error) {
|
||||||
|
node, err := r.Get(ctx, nodeID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
labels := make(map[string]string)
|
||||||
|
nodeLabels, err := r.GetNodeLabels(ctx, nodeID)
|
||||||
|
if err != nil {
|
||||||
|
xlog.Warn("GetWithExtras: failed to get labels", "node", nodeID, "error", err)
|
||||||
|
} else {
|
||||||
|
for _, l := range nodeLabels {
|
||||||
|
labels[l.Key] = l.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var modelCount int64
|
||||||
|
if err := r.db.WithContext(ctx).Model(&NodeModel{}).
|
||||||
|
Where("node_id = ? AND state = ?", nodeID, "loaded").
|
||||||
|
Count(&modelCount).Error; err != nil {
|
||||||
|
xlog.Warn("GetWithExtras: failed to get model count", "node", nodeID, "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var inFlight struct{ Total int }
|
||||||
|
if err := r.db.WithContext(ctx).Model(&NodeModel{}).
|
||||||
|
Select("COALESCE(SUM(in_flight), 0) as total").
|
||||||
|
Where("node_id = ? AND state IN ?", nodeID, []string{"loaded", "unloading"}).
|
||||||
|
Scan(&inFlight).Error; err != nil {
|
||||||
|
xlog.Warn("GetWithExtras: failed to get in-flight count", "node", nodeID, "error", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &NodeWithExtras{
|
||||||
|
BackendNode: *node,
|
||||||
|
ModelCount: int(modelCount),
|
||||||
|
InFlightCount: inFlight.Total,
|
||||||
|
Labels: labels,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
// GetByName returns a single node by name.
|
// GetByName returns a single node by name.
|
||||||
func (r *NodeRegistry) GetByName(ctx context.Context, name string) (*BackendNode, error) {
|
func (r *NodeRegistry) GetByName(ctx context.Context, name string) (*BackendNode, error) {
|
||||||
var node BackendNode
|
var node BackendNode
|
||||||
|
|||||||
@@ -646,6 +646,38 @@ var _ = Describe("NodeRegistry", func() {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Describe("GetWithExtras", func() {
|
||||||
|
It("returns the node enriched with its labels map", func() {
|
||||||
|
node := makeNode("extras-node", "10.0.0.80:50051", 8_000_000_000)
|
||||||
|
Expect(registry.Register(context.Background(), node, true)).To(Succeed())
|
||||||
|
Expect(registry.SetNodeLabel(context.Background(), node.ID, "env", "prod")).To(Succeed())
|
||||||
|
Expect(registry.SetNodeLabel(context.Background(), node.ID, "region", "us-east")).To(Succeed())
|
||||||
|
|
||||||
|
got, err := registry.GetWithExtras(context.Background(), node.ID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got).ToNot(BeNil())
|
||||||
|
Expect(got.ID).To(Equal(node.ID))
|
||||||
|
Expect(got.Name).To(Equal("extras-node"))
|
||||||
|
Expect(got.Labels).To(Equal(map[string]string{"env": "prod", "region": "us-east"}))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns an empty (non-nil) labels map when the node has none", func() {
|
||||||
|
node := makeNode("extras-no-labels", "10.0.0.81:50051", 8_000_000_000)
|
||||||
|
Expect(registry.Register(context.Background(), node, true)).To(Succeed())
|
||||||
|
|
||||||
|
got, err := registry.GetWithExtras(context.Background(), node.ID)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(got).ToNot(BeNil())
|
||||||
|
Expect(got.Labels).ToNot(BeNil())
|
||||||
|
Expect(got.Labels).To(BeEmpty())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("returns an error for an unknown node", func() {
|
||||||
|
_, err := registry.GetWithExtras(context.Background(), "does-not-exist")
|
||||||
|
Expect(err).To(HaveOccurred())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
Describe("FindNodesBySelector", func() {
|
Describe("FindNodesBySelector", func() {
|
||||||
It("returns nodes matching all labels in selector", func() {
|
It("returns nodes matching all labels in selector", func() {
|
||||||
n1 := makeNode("sel-match", "10.0.0.80:50051", 8_000_000_000)
|
n1 := makeNode("sel-match", "10.0.0.80:50051", 8_000_000_000)
|
||||||
|
|||||||
@@ -22,13 +22,16 @@ Download the latest DMG from GitHub releases:
|
|||||||
3. Drag the LocalAI application to your Applications folder
|
3. Drag the LocalAI application to your Applications folder
|
||||||
4. Launch LocalAI from your Applications folder
|
4. Launch LocalAI from your Applications folder
|
||||||
|
|
||||||
## Known Issues
|
## Verification
|
||||||
|
|
||||||
> **Note**: The DMGs are not signed by Apple and may show as quarantined.
|
The `LocalAI.dmg` (and the app inside it) and the `local-ai` server binary are
|
||||||
>
|
signed with an Apple Developer ID and notarized by Apple, so they launch with no
|
||||||
> **Workaround**: See [this issue](https://github.com/mudler/LocalAI/issues/6268) for details on how to bypass the quarantine.
|
quarantine prompt or workaround. To inspect the signature yourself:
|
||||||
>
|
|
||||||
> **Fix tracking**: The signing issue is being tracked in [this issue](https://github.com/mudler/LocalAI/issues/6244).
|
```bash
|
||||||
|
spctl --assess --type open --context context:primary-signature -v /Applications/LocalAI.app
|
||||||
|
codesign --verify --deep --strict --verbose=2 /Applications/LocalAI.app
|
||||||
|
```
|
||||||
|
|
||||||
## Next Steps
|
## Next Steps
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"version": "v4.5.0"
|
"version": "v4.5.2"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,58 @@
|
|||||||
---
|
---
|
||||||
|
- name: "ornith-1.0-9b"
|
||||||
|
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||||
|
urls:
|
||||||
|
- https://huggingface.co/deepreinforce-ai/Ornith-1.0-9B-GGUF
|
||||||
|
description: |
|
||||||
|
[](https://deep-reinforce.com/ornith.html)
|
||||||
|
|
||||||
|
# Ornith-1.0-9B-GGUF
|
||||||
|
|
||||||
|
Aloha! 🌺 Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.
|
||||||
|
|
||||||
|
Highlights:
|
||||||
|
|
||||||
|
- **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.
|
||||||
|
- **Self-Improving Training Framework**: Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model discovers better search trajectories and generates higher-quality solutions.
|
||||||
|
- **Licence**: MIT licensed, globally accessible, and free from regional limitations.
|
||||||
|
|
||||||
|
## Ornith 1.0 9B
|
||||||
|
|
||||||
|
This model card documents **Ornith-1.0-9B**, the most lightweight member of the Ornith family, designed for efficient single-GPU deployment.
|
||||||
|
|
||||||
|
### Benchmarks
|
||||||
|
|
||||||
|
Ornith-1.0-9B
|
||||||
|
Qwen3.5-9B
|
||||||
|
Qwen3.5-35B
|
||||||
|
Gemma4-12B
|
||||||
|
Gemma4-31B
|
||||||
|
|
||||||
|
Agentic Coding
|
||||||
|
|
||||||
|
...
|
||||||
|
license: "mit"
|
||||||
|
tags:
|
||||||
|
- llm
|
||||||
|
- gguf
|
||||||
|
overrides:
|
||||||
|
backend: llama-cpp
|
||||||
|
function:
|
||||||
|
automatic_tool_parsing_fallback: true
|
||||||
|
grammar:
|
||||||
|
disable: true
|
||||||
|
known_usecases:
|
||||||
|
- chat
|
||||||
|
options:
|
||||||
|
- use_jinja:true
|
||||||
|
parameters:
|
||||||
|
model: llama-cpp/models/Ornith-1.0-9B-GGUF/ornith-1.0-9b-Q4_K_M.gguf
|
||||||
|
template:
|
||||||
|
use_tokenizer_template: true
|
||||||
|
files:
|
||||||
|
- filename: llama-cpp/models/Ornith-1.0-9B-GGUF/ornith-1.0-9b-Q4_K_M.gguf
|
||||||
|
sha256: 5720d1f671b4996481274fffe01868c3c36e87c135cc8538471cc7bd6087b106
|
||||||
|
uri: https://huggingface.co/deepreinforce-ai/Ornith-1.0-9B-GGUF/resolve/main/ornith-1.0-9b-Q4_K_M.gguf
|
||||||
- name: "ornith-1.0-35b"
|
- name: "ornith-1.0-35b"
|
||||||
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
|
||||||
urls:
|
urls:
|
||||||
|
|||||||
@@ -17,9 +17,15 @@ rm -rf "${BACKEND_DIR}"/build-*
|
|||||||
# run.sh's final `exec $CURDIR/<binary>` is the contract for what gets launched;
|
# run.sh's final `exec $CURDIR/<binary>` is the contract for what gets launched;
|
||||||
# the binary is not always named after the backend (e.g. parakeet-cpp launches
|
# the binary is not always named after the backend (e.g. parakeet-cpp launches
|
||||||
# parakeet-cpp-grpc), so derive it from run.sh and fall back to ${BACKEND}.
|
# parakeet-cpp-grpc), so derive it from run.sh and fall back to ${BACKEND}.
|
||||||
|
#
|
||||||
|
# Only scan the `exec` line(s): many run.sh select a runtime CPU variant via
|
||||||
|
# unquoted `LIBRARY=$CURDIR/libgo<x>-avx512.so` lines, and a whole-file grep
|
||||||
|
# would pick the last of those (avx512, which Darwin never builds) instead of
|
||||||
|
# the binary — failing the check below for whisper/sam3-cpp/vibevoice-cpp/...
|
||||||
|
# Also tolerate the exec being quoted (`exec "$CURDIR"/<binary>`).
|
||||||
RUN_BINARY=""
|
RUN_BINARY=""
|
||||||
if [ -f "${BACKEND_DIR}/run.sh" ]; then
|
if [ -f "${BACKEND_DIR}/run.sh" ]; then
|
||||||
RUN_BINARY=$(grep -oE '\$CURDIR/[A-Za-z0-9._-]+' "${BACKEND_DIR}/run.sh" | grep -v 'ld\.so' | tail -1 | sed 's|\$CURDIR/||')
|
RUN_BINARY=$(grep -E '^[[:space:]]*exec[[:space:]]' "${BACKEND_DIR}/run.sh" | grep -oE '"?\$CURDIR"?/[A-Za-z0-9._-]+' | grep -v 'ld\.so' | tail -1 | sed -E 's|"?\$CURDIR"?/||')
|
||||||
fi
|
fi
|
||||||
RUN_BINARY="${RUN_BINARY:-${BACKEND}}"
|
RUN_BINARY="${RUN_BINARY:-${BACKEND}}"
|
||||||
|
|
||||||
|
|||||||
@@ -141,6 +141,38 @@ copy_elf_deps() {
|
|||||||
done < <(ldd "$elf" 2>/dev/null | awk '/=>/ && $3 ~ /^\// {print $3}')
|
done < <(ldd "$elf" 2>/dev/null | awk '/=>/ && $3 ~ /^\// {print $3}')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Sweep the transitive shared-library dependencies of everything already
|
||||||
|
# bundled in a lib dir. The per-vendor packagers below copy an explicit
|
||||||
|
# allowlist of top-level runtime libs, but those libs pull in transitive deps
|
||||||
|
# that aren't in the list (e.g. ROCm's librocprofiler-register.so.0, libnuma,
|
||||||
|
# libdrm_amdgpu). Because backends run through the bundled lib/ld.so with
|
||||||
|
# LD_LIBRARY_PATH=lib (see run.sh), an unbundled transitive dep is a hard load
|
||||||
|
# failure (issue #10537: "librocprofiler-register.so.0: cannot open shared
|
||||||
|
# object file"). ldd resolves the full recursive closure, so a single pass over
|
||||||
|
# the already-bundled libs is enough; core libc-family deps are skipped via
|
||||||
|
# copy_elf_deps/is_core_lib so we never shadow the loader's own libc/libstdc++.
|
||||||
|
sweep_transitive_deps() {
|
||||||
|
local dir="${1:-$TARGET_LIB_DIR}"
|
||||||
|
command -v ldd >/dev/null 2>&1 || return 0
|
||||||
|
|
||||||
|
# Snapshot the current set first: copy_elf_deps adds files as it runs, and
|
||||||
|
# ldd already returns the full recursive closure, so we only need to sweep
|
||||||
|
# the libs that were present before the sweep started.
|
||||||
|
# `local x=$(...)` keeps set -e from tripping on shopt -p's nonzero exit.
|
||||||
|
local old_nullglob=$(shopt -p nullglob)
|
||||||
|
shopt -s nullglob
|
||||||
|
local libs=("$dir"/*.so*)
|
||||||
|
eval "$old_nullglob"
|
||||||
|
|
||||||
|
local lib
|
||||||
|
for lib in "${libs[@]}"; do
|
||||||
|
[ -e "$lib" ] || continue
|
||||||
|
# Skip symlinks: their real target is in the snapshot and gets swept.
|
||||||
|
[ -L "$lib" ] && continue
|
||||||
|
copy_elf_deps "$lib"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
# Package NVIDIA CUDA libraries
|
# Package NVIDIA CUDA libraries
|
||||||
package_cuda_libs() {
|
package_cuda_libs() {
|
||||||
echo "Packaging CUDA libraries for BUILD_TYPE=${BUILD_TYPE}..."
|
echo "Packaging CUDA libraries for BUILD_TYPE=${BUILD_TYPE}..."
|
||||||
@@ -185,6 +217,10 @@ package_cuda_libs() {
|
|||||||
# cp -arfL /usr/local/cuda/targets "$TARGET_LIB_DIR/../cuda/" 2>/dev/null || true
|
# cp -arfL /usr/local/cuda/targets "$TARGET_LIB_DIR/../cuda/" 2>/dev/null || true
|
||||||
# fi
|
# fi
|
||||||
|
|
||||||
|
# Pull in transitive deps the allowlist misses so the backend is
|
||||||
|
# self-contained (same class of failure as #10537).
|
||||||
|
sweep_transitive_deps "$TARGET_LIB_DIR"
|
||||||
|
|
||||||
echo "CUDA libraries packaged successfully"
|
echo "CUDA libraries packaged successfully"
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -261,6 +297,10 @@ package_rocm_libs() {
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# Pull in transitive deps the allowlist misses (librocprofiler-register.so.0,
|
||||||
|
# libnuma, libdrm_amdgpu, ...) so the backend is self-contained. See #10537.
|
||||||
|
sweep_transitive_deps "$TARGET_LIB_DIR"
|
||||||
|
|
||||||
echo "ROCm libraries packaged successfully"
|
echo "ROCm libraries packaged successfully"
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -303,6 +343,10 @@ package_intel_libs() {
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# Pull in transitive deps the allowlist misses so the backend is
|
||||||
|
# self-contained (same class of failure as #10537).
|
||||||
|
sweep_transitive_deps "$TARGET_LIB_DIR"
|
||||||
|
|
||||||
echo "Intel oneAPI libraries packaged successfully"
|
echo "Intel oneAPI libraries packaged successfully"
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -432,6 +476,7 @@ export -f copy_lib
|
|||||||
export -f copy_libs_glob
|
export -f copy_libs_glob
|
||||||
export -f is_core_lib
|
export -f is_core_lib
|
||||||
export -f copy_elf_deps
|
export -f copy_elf_deps
|
||||||
|
export -f sweep_transitive_deps
|
||||||
export -f package_cuda_libs
|
export -f package_cuda_libs
|
||||||
export -f package_rocm_libs
|
export -f package_rocm_libs
|
||||||
export -f package_intel_libs
|
export -f package_intel_libs
|
||||||
|
|||||||
54
scripts/build/package-gpu-libs_test.sh
Executable file
54
scripts/build/package-gpu-libs_test.sh
Executable file
@@ -0,0 +1,54 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Regression test for scripts/build/package-gpu-libs.sh.
|
||||||
|
#
|
||||||
|
# Guards issue #10537: the per-vendor packagers copy an explicit allowlist of
|
||||||
|
# top-level GPU runtime libs but used to miss their transitive dependencies
|
||||||
|
# (e.g. ROCm's librocprofiler-register.so.0). Since backends run through the
|
||||||
|
# bundled lib/ld.so with LD_LIBRARY_PATH=lib, an unbundled transitive dep is a
|
||||||
|
# fatal "cannot open shared object file" at load time.
|
||||||
|
#
|
||||||
|
# This test fabricates a primary lib that links a transitive lib, simulates the
|
||||||
|
# allowlist step (primary copied, transitive not), and asserts the transitive
|
||||||
|
# sweep pulls the dependency in. Requires gcc + ldd (present in build images).
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
CURDIR=$(dirname "$(realpath "$0")")
|
||||||
|
SCRIPT="$CURDIR/package-gpu-libs.sh"
|
||||||
|
|
||||||
|
if ! command -v gcc >/dev/null 2>&1 || ! command -v ldd >/dev/null 2>&1; then
|
||||||
|
echo "SKIP: gcc/ldd not available"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
WORK=$(mktemp -d)
|
||||||
|
trap 'rm -rf "$WORK"' EXIT
|
||||||
|
|
||||||
|
# Transitive dependency (stand-in for librocprofiler-register.so.0).
|
||||||
|
echo 'int transitive_fn(void){return 42;}' > "$WORK/transitive.c"
|
||||||
|
gcc -shared -fPIC -o "$WORK/libfaketransitive.so.0" "$WORK/transitive.c"
|
||||||
|
|
||||||
|
# Primary allowlisted lib (stand-in for libhipblas.so) that links it.
|
||||||
|
echo 'int transitive_fn(void); int primary_fn(void){return transitive_fn();}' > "$WORK/primary.c"
|
||||||
|
gcc -shared -fPIC -o "$WORK/libfakeprimary.so.0" "$WORK/primary.c" \
|
||||||
|
-L"$WORK" -l:libfaketransitive.so.0 -Wl,-rpath,"$WORK"
|
||||||
|
|
||||||
|
# Simulate the allowlist step: primary already bundled, transitive not.
|
||||||
|
TARGET="$WORK/target"
|
||||||
|
mkdir -p "$TARGET"
|
||||||
|
cp "$WORK/libfakeprimary.so.0" "$TARGET/"
|
||||||
|
|
||||||
|
# Make the transitive dep resolvable like /opt/rocm libs are in the build image.
|
||||||
|
export LD_LIBRARY_PATH="$WORK:${LD_LIBRARY_PATH:-}"
|
||||||
|
|
||||||
|
# shellcheck source=/dev/null
|
||||||
|
source "$SCRIPT" "$TARGET"
|
||||||
|
sweep_transitive_deps "$TARGET"
|
||||||
|
|
||||||
|
if [ -e "$TARGET/libfaketransitive.so.0" ]; then
|
||||||
|
echo "PASS: transitive dependency was bundled by sweep_transitive_deps"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "FAIL: transitive dependency was NOT bundled (regression of #10537)"
|
||||||
|
ls -la "$TARGET"
|
||||||
|
exit 1
|
||||||
Reference in New Issue
Block a user