diff --git a/llama/compat/llama-ollama-compat.cpp b/llama/compat/llama-ollama-compat.cpp
index 432dbdfdb..84866b24f 100644
--- a/llama/compat/llama-ollama-compat.cpp
+++ b/llama/compat/llama-ollama-compat.cpp
@@ -823,6 +823,48 @@ void handle_deepseekocr_clip(gguf_context * meta, ggml_context * ctx) {
     promote_tensor_to_f32(meta, ctx, "v.position_embd.weight");
 }
 
+// =========================================================================
+// gemma4 (clip side — gemma4v projector)
+// =========================================================================
+//
+// Ollama's monolithic gemma4 GGUF embeds a SigLIP-style ViT plus the
+// gemma4v projector (a single `mm.input_projection`). All v.* / mm.*
+// tensor names already match upstream's PROJECTOR_TYPE_GEMMA4V — this
+// handler only needs KV translation and an F32 promote of the patch
+// embedding (Metal IM2COL).
+//
+// gemma4 vision uses image normalization mean=[0,0,0] / std=[1,1,1]
+// (the LM does its own per-image normalization via v.std_bias /
+// v.std_scale tensors) — different from the [0.5,0.5,0.5] used by
+// most other arches.
+
+void handle_gemma4_clip(gguf_context * meta, ggml_context * ctx) {
+    LLAMA_LOG_INFO("%s: detected Ollama-format gemma4 GGUF used as mmproj; translating\n", __func__);
+
+    copy_u32_kv(meta, "gemma4.vision.block_count",                   "clip.vision.block_count");
+    copy_u32_kv(meta, "gemma4.vision.embedding_length",              "clip.vision.embedding_length");
+    copy_u32_kv(meta, "gemma4.vision.feed_forward_length",           "clip.vision.feed_forward_length");
+    copy_u32_kv(meta, "gemma4.vision.attention.head_count",          "clip.vision.attention.head_count");
+    copy_f32_kv(meta, "gemma4.vision.attention.layer_norm_epsilon",  "clip.vision.attention.layer_norm_epsilon");
+    copy_u32_kv(meta, "gemma4.vision.patch_size",                    "clip.vision.patch_size");
+    // gemma4 vision is fixed at 224x224 patches.
+    inject_u32_if_missing(meta, "clip.vision.image_size", 224);
+    // projection_dim = LM embedding length.
+    copy_u32_kv(meta, "gemma4.embedding_length",                     "clip.vision.projection_dim");
+
+    static const float kZeros[3] = {0.0f, 0.0f, 0.0f};
+    static const float kOnes [3] = {1.0f, 1.0f, 1.0f};
+    inject_f32_arr_if_missing(meta, "clip.vision.image_mean", kZeros, 3);
+    inject_f32_arr_if_missing(meta, "clip.vision.image_std",  kOnes,  3);
+
+    inject_bool_if_missing(meta, "clip.has_vision_encoder", true);
+    gguf_set_val_str(meta, "clip.vision.projector_type", "gemma4v");
+    gguf_set_val_str(meta, "general.architecture",       "clip");
+
+    // Metal IM2COL needs F32 patch_embd weights (same as other arches).
+    promote_tensor_to_f32(meta, ctx, "v.patch_embd.weight");
+}
+
 // =========================================================================
 // llama4 (clip side)
 // =========================================================================
@@ -1104,6 +1146,10 @@ void translate_clip_metadata(gguf_context * meta, ggml_context * ctx) {
         handle_llama4_clip(meta, ctx);
         return;
     }
+    if (detect_ollama_gemma4(meta, ctx)) {
+        handle_gemma4_clip(meta, ctx);
+        return;
+    }
 }
 
 bool should_skip_tensor(const llama_model_loader * ml, const char * tensor_name) {
diff --git a/llm/llama_server.go b/llm/llama_server.go
index 1ffaf3d53..cbfdf8a5e 100644
--- a/llm/llama_server.go
+++ b/llm/llama_server.go
@@ -434,6 +434,7 @@ func NewLlamaServerRunner(
 	// the compat layer's clip-side coverage in llama/compat/.
 	compatClipArches := map[string]bool{
 		"gemma3":      true,
+		"gemma4":      true,
 		"qwen35moe":   true,
 		"mistral3":    true,
 		"deepseekocr": true,