diff --git a/backend/cpp/ik-llama-cpp/Makefile b/backend/cpp/ik-llama-cpp/Makefile index f001decf7..8254cd8c9 100644 --- a/backend/cpp/ik-llama-cpp/Makefile +++ b/backend/cpp/ik-llama-cpp/Makefile @@ -1,5 +1,5 @@ -IK_LLAMA_VERSION?=286ce324baed17c95faec77792eaa6bdb1c7a5f5 +IK_LLAMA_VERSION?=16996aeab772c69b6473597038b2ef0b85297e8b LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp CMAKE_ARGS?= diff --git a/backend/cpp/ik-llama-cpp/patches/0002-clip-ggml-quantize-chunk-user-data.patch b/backend/cpp/ik-llama-cpp/patches/0002-clip-ggml-quantize-chunk-user-data.patch new file mode 100644 index 000000000..5724f4d06 --- /dev/null +++ b/backend/cpp/ik-llama-cpp/patches/0002-clip-ggml-quantize-chunk-user-data.patch @@ -0,0 +1,11 @@ +--- a/examples/llava/clip.cpp ++++ b/examples/llava/clip.cpp +@@ -2494,7 +2494,7 @@ + } + new_data = work.data(); + +- new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr); ++ new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr, nullptr); + } else { + new_type = cur->type; + new_data = cur->data;