diff --git a/backend/cpp/ik-llama-cpp/Makefile b/backend/cpp/ik-llama-cpp/Makefile
index f001decf7..8254cd8c9 100644
--- a/backend/cpp/ik-llama-cpp/Makefile
+++ b/backend/cpp/ik-llama-cpp/Makefile
@@ -1,5 +1,5 @@
 
-IK_LLAMA_VERSION?=286ce324baed17c95faec77792eaa6bdb1c7a5f5
+IK_LLAMA_VERSION?=16996aeab772c69b6473597038b2ef0b85297e8b
 LLAMA_REPO?=https://github.com/ikawrakow/ik_llama.cpp
 
 CMAKE_ARGS?=
diff --git a/backend/cpp/ik-llama-cpp/patches/0002-clip-ggml-quantize-chunk-user-data.patch b/backend/cpp/ik-llama-cpp/patches/0002-clip-ggml-quantize-chunk-user-data.patch
new file mode 100644
index 000000000..5724f4d06
--- /dev/null
+++ b/backend/cpp/ik-llama-cpp/patches/0002-clip-ggml-quantize-chunk-user-data.patch
@@ -0,0 +1,11 @@
+--- a/examples/llava/clip.cpp
++++ b/examples/llava/clip.cpp
+@@ -2494,7 +2494,7 @@
+             }
+             new_data = work.data();
+
+-            new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr);
++            new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr, nullptr);
+         } else {
+             new_type = cur->type;
+             new_data = cur->data;