mirror of
https://github.com/mudler/LocalAI.git
synced 2026-06-27 01:47:18 -04:00
The llama-cpp gRPC backend reconstructs OpenAI messages from proto for the tokenizer-template path and blindly json::parse'd each message's content string. LocalAI's Go layer always flattens content to a plain string, so a user prompt that merely looks like JSON (e.g. mealie's ingredient array ["1/4 cup brown sugar", ...]) was reinterpreted as structured content parts and rejected by oaicompat_chat_params_parse with "unsupported content[].type". Normalize content per role instead: user/system/developer content is opaque text and is never JSON-sniffed; assistant/tool content still collapses a literal JSON null/object (tool-call bookkeeping) to a string, but a plain string is never turned into an array/scalar. The array defense is role-independent, so the role gate only governs the benign null/object case. While here, extract the duplicated per-message reconstruction and the pre-template content sanitization into shared, unit-tested helpers (message_content.h) so the streaming (PredictStream) and non-streaming (Predict) paths cannot drift. This removes ~490 lines of copy-pasted defensive code, the dead tool-role parse branches, and the redundant Predict-only tool_calls branch, while preserving the prior #7324 (null content -> "") and #7528 (tool array content -> string) fixes. Tests: - backend/cpp/llama-cpp/message_content_test.cpp: standalone C++ unit tests for all three helpers (#10524, #7324, #7528, multimodal), discovered and run by `make test-backend-cpp` and a new generic tests-backend-cpp CI job. Also wired as an opt-in CMake/ctest target (-DLLAMA_GRPC_BUILD_TESTS=ON). - core/schema/message_test.go: Go regression pinning that ToProto flattens a JSON-array-looking text part to the verbatim string. - prepare.sh now copies message_content.h into the build tree. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
105 lines
4.4 KiB
CMake
105 lines
4.4 KiB
CMake
set(TARGET grpc-server)
|
|
set(CMAKE_CXX_STANDARD 17)
|
|
cmake_minimum_required(VERSION 3.15)
|
|
set(TARGET grpc-server)
|
|
set(_PROTOBUF_LIBPROTOBUF libprotobuf)
|
|
set(_REFLECTION grpc++_reflection)
|
|
|
|
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
|
# Set correct Homebrew install folder for Apple Silicon and Intel Macs
|
|
if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
|
|
set(HOMEBREW_DEFAULT_PREFIX "/opt/homebrew")
|
|
else()
|
|
set(HOMEBREW_DEFAULT_PREFIX "/usr/local")
|
|
endif()
|
|
|
|
link_directories("${HOMEBREW_DEFAULT_PREFIX}/lib")
|
|
include_directories("${HOMEBREW_DEFAULT_PREFIX}/include")
|
|
endif()
|
|
|
|
find_package(absl CONFIG REQUIRED)
|
|
find_package(Protobuf CONFIG REQUIRED)
|
|
find_package(gRPC CONFIG REQUIRED)
|
|
|
|
find_program(_PROTOBUF_PROTOC protoc)
|
|
set(_GRPC_GRPCPP grpc++)
|
|
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
|
|
|
|
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
|
include_directories(${Protobuf_INCLUDE_DIRS})
|
|
|
|
message(STATUS "Using protobuf version ${Protobuf_VERSION} | Protobuf_INCLUDE_DIRS: ${Protobuf_INCLUDE_DIRS} | CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")
|
|
|
|
# Proto file
|
|
get_filename_component(hw_proto "../../../../../../backend/backend.proto" ABSOLUTE)
|
|
get_filename_component(hw_proto_path "${hw_proto}" PATH)
|
|
|
|
# Generated sources
|
|
set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.cc")
|
|
set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.pb.h")
|
|
set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.cc")
|
|
set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/backend.grpc.pb.h")
|
|
|
|
add_custom_command(
|
|
OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}"
|
|
COMMAND ${_PROTOBUF_PROTOC}
|
|
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
|
|
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
|
|
-I "${hw_proto_path}"
|
|
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
|
|
"${hw_proto}"
|
|
DEPENDS "${hw_proto}")
|
|
|
|
# hw_grpc_proto: force STATIC. Under the CPU_ALL_VARIANTS build BUILD_SHARED_LIBS=ON
|
|
# (ggml/llama become shared), which would otherwise make this glue library a DSO. As a
|
|
# DSO it references the hidden-visibility symbols in the static libprotobuf.a, which the
|
|
# linker cannot satisfy ("hidden symbol ... in libprotobuf.a is referenced by DSO").
|
|
# Keeping it STATIC links protobuf/gRPC directly into the grpc-server executable while
|
|
# only ggml/llama stay shared. No effect on the static variants (already BUILD_SHARED_LIBS=OFF).
|
|
add_library(hw_grpc_proto STATIC
|
|
${hw_grpc_srcs}
|
|
${hw_grpc_hdrs}
|
|
${hw_proto_srcs}
|
|
${hw_proto_hdrs} )
|
|
|
|
add_executable(${TARGET} grpc-server.cpp json.hpp httplib.h)
|
|
|
|
target_include_directories(${TARGET} PRIVATE ../llava)
|
|
target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
|
|
|
|
# Upstream llama.cpp renamed the `common` helpers library to `llama-common`.
|
|
# Forks that branched before the rename (e.g. llama-cpp-turboquant) still
|
|
# expose it as `common`. Detect which one is present so the same CMakeLists
|
|
# drives both builds — otherwise an unresolved name silently degrades to a
|
|
# plain `-l` flag and the PUBLIC include dir (where common.h lives) is lost.
|
|
if (TARGET llama-common)
|
|
set(_LLAMA_COMMON_TARGET llama-common)
|
|
else()
|
|
set(_LLAMA_COMMON_TARGET common)
|
|
endif()
|
|
|
|
target_link_libraries(${TARGET} PRIVATE ${_LLAMA_COMMON_TARGET} llama mtmd ${CMAKE_THREAD_LIBS_INIT} absl::flags hw_grpc_proto
|
|
absl::flags_parse
|
|
gRPC::${_REFLECTION}
|
|
gRPC::${_GRPC_GRPCPP}
|
|
protobuf::${_PROTOBUF_LIBPROTOBUF})
|
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
|
if(TARGET BUILD_INFO)
|
|
add_dependencies(${TARGET} BUILD_INFO)
|
|
endif()
|
|
|
|
# Unit test for the message-content normalization helper (message_content.h).
|
|
# Off by default so the normal backend build is untouched; enable with
|
|
# -DLLAMA_GRPC_BUILD_TESTS=ON and run via ctest. It reuses llama.cpp's vendored
|
|
# <nlohmann/json.hpp> (propagated by the common helpers library) so it has no
|
|
# extra dependency beyond what the backend already builds against.
|
|
option(LLAMA_GRPC_BUILD_TESTS "Build grpc-server unit tests" OFF)
|
|
if(LLAMA_GRPC_BUILD_TESTS)
|
|
enable_testing()
|
|
add_executable(message_content_test message_content_test.cpp message_content.h)
|
|
target_include_directories(message_content_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
|
target_link_libraries(message_content_test PRIVATE ${_LLAMA_COMMON_TARGET})
|
|
target_compile_features(message_content_test PRIVATE cxx_std_17)
|
|
add_test(NAME message_content_test COMMAND message_content_test)
|
|
endif()
|