mirror of
https://github.com/mudler/LocalAI.git
synced 2025-12-26 16:09:20 -05:00
* Build llama.cpp separately Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * WIP Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * WIP Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * WIP Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Start to try to attach some tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add git and small fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix: correctly autoload external backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Try to run AIO tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Slightly update the Makefile helps Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Adapt auto-bumper Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Try to run linux test Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add llama-cpp into build pipelines Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add default capability (for cpu) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Drop llama-cpp specific logic from the backend loader Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * drop grpc install in ci for tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Pass by backends path for tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Build protogen at start Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fix(tests): set backends path consistently Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Correctly configure the backends path Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Try to build for darwin Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * WIP Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Compile for metal on arm64/darwin Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Try to run build off from cross-arch Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add to the backend index nvidia-l4t and cpu's llama-cpp backends Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Build also darwin-x86 for llama-cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Disable arm64 builds temporary Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Test backend build on PR Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup build backend reusable workflow Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * pass by skip drivers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use crane Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Skip drivers Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * x86 darwin Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Add packaging step for llama.cpp Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * fixups Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix leftover from bark-cpp extraction Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Try to fix hipblas build Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
52 lines
2.0 KiB
Bash
52 lines
2.0 KiB
Bash
#!/bin/bash
|
|
|
|
## Patches
|
|
## Apply patches from the `patches` directory
|
|
for patch in $(ls patches); do
|
|
echo "Applying patch $patch"
|
|
patch -d llama.cpp/ -p1 < patches/$patch
|
|
done
|
|
|
|
set -e
|
|
|
|
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
|
|
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
|
|
cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
|
|
cp -rfv llama.cpp/tools/server/utils.hpp llama.cpp/tools/grpc-server/
|
|
cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
|
|
|
|
set +e
|
|
if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
|
|
echo "grpc-server already added"
|
|
else
|
|
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
|
|
fi
|
|
set -e
|
|
|
|
# Now to keep maximum compatibility with the original server.cpp, we need to remove the index.html.gz.hpp and loading.html.hpp includes
|
|
# and remove the main function
|
|
# TODO: upstream this to the original server.cpp by extracting the upstream main function to a separate file
|
|
awk '
|
|
/int[ \t]+main[ \t]*\(/ { # If the line starts the main function
|
|
in_main=1; # Set a flag
|
|
open_braces=0; # Track number of open braces
|
|
}
|
|
in_main {
|
|
open_braces += gsub(/\{/, "{"); # Count opening braces
|
|
open_braces -= gsub(/\}/, "}"); # Count closing braces
|
|
if (open_braces == 0) { # If all braces are closed
|
|
in_main=0; # End skipping
|
|
}
|
|
next; # Skip lines inside main
|
|
}
|
|
!in_main # Print lines not inside main
|
|
' "llama.cpp/tools/server/server.cpp" > llama.cpp/tools/grpc-server/server.cpp
|
|
|
|
# remove index.html.gz.hpp and loading.html.hpp includes
|
|
if [[ "$OSTYPE" == "darwin"* ]]; then
|
|
# macOS
|
|
sed -i '' '/#include "index\.html\.gz\.hpp"/d; /#include "loading\.html\.hpp"/d' llama.cpp/tools/grpc-server/server.cpp
|
|
else
|
|
# Linux and others
|
|
sed -i '/#include "index\.html\.gz\.hpp"/d; /#include "loading\.html\.hpp"/d' llama.cpp/tools/grpc-server/server.cpp
|
|
fi |