From 8ab56e2ad3664039030134d609db77ec9af47240 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 20 Apr 2026 23:34:11 +0200
Subject: [PATCH] feat(gallery): add wan i2v 720p (#9457)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

feat(gallery): add Wan 2.1 I2V 14B 720P + pin all wan ggufs by sha256

Adds a new entry for the native-720p image-to-video sibling of the
480p I2V model (wan-2.1-i2v-14b-480p-ggml). The 720p I2V model is
trained purely as image-to-video — no first-last-frame interpolation
path — so motion is freer than repurposing the FLF2V 720P variant as
an i2v. Shares the same VAE, umt5_xxl text encoder, and clip_vision_h
auxiliary files as the existing 480p I2V and 720p FLF2V entries, so
no new aux downloads are introduced.

Also pins the main diffusion gguf by sha256 for the new entry and for
the three existing wan entries that were previously missing a hash
(wan-2.1-t2v-1.3b-ggml, wan-2.1-i2v-14b-480p-ggml,
wan-2.1-flf2v-14b-720p-ggml). Hashes were fetched from HuggingFace's
x-linked-etag header per .agents/adding-gallery-models.md.

Assisted-by: Claude:claude-opus-4-7
---
 gallery/index.yaml | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index cd8e51a10..da85b17e6 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -15295,6 +15295,7 @@
       model: wan2.1_t2v_1.3b-q8_0.gguf
   files:
     - filename: "wan2.1_t2v_1.3b-q8_0.gguf"
+      sha256: "8f10260cc26498fee303851ee1c2047918934125731b9b78d4babfce4ec27458"
       uri: "huggingface://calcuis/wan-gguf/wan2.1_t2v_1.3b-q8_0.gguf"
     - filename: "wan_2.1_vae.safetensors"
       uri: "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors"
@@ -15328,6 +15329,7 @@
       - "vae_path:wan_2.1_vae.safetensors"
   files:
     - filename: "wan2.1-i2v-14b-480p-Q4_K_M.gguf"
+      sha256: "d91f7139acadb42ea05cdf97b311e5099f714f11fbe4d90916500e2f53cbba82"
       uri: "huggingface://city96/Wan2.1-I2V-14B-480P-gguf/wan2.1-i2v-14b-480p-Q4_K_M.gguf"
     - filename: "wan_2.1_vae.safetensors"
       uri: "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors"
@@ -15369,6 +15371,7 @@
       - "vae_path:wan_2.1_vae.safetensors"
   files:
     - filename: "wan2.1-flf2v-14b-720p-Q4_K_M.gguf"
+      sha256: "7652d7d8b0795009ff21ed83d806af762aae8a8faa8640dd07b3a67e4dfab445"
       uri: "huggingface://city96/Wan2.1-FLF2V-14B-720P-gguf/wan2.1-flf2v-14b-720p-Q4_K_M.gguf"
     - filename: "wan_2.1_vae.safetensors"
       uri: "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors"
@@ -15376,6 +15379,47 @@
       uri: "huggingface://city96/umt5-xxl-encoder-gguf/umt5-xxl-encoder-Q8_0.gguf"
     - filename: "clip_vision_h.safetensors"
       uri: "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/clip_vision/clip_vision_h.safetensors"
+- name: wan-2.1-i2v-14b-720p-ggml
+  license: apache-2.0
+  url: "github:mudler/LocalAI/gallery/wan-ggml.yaml@master"
+  description: |
+    Wan 2.1 I2V 14B 720P — image-to-video diffusion, GGUF Q4_K_M.
+    Native 720p sibling of the 480p I2V model: animates a single
+    reference image into a 33-frame clip at up to 1280x720. Trained
+    purely as image-to-video (no first-last-frame interpolation path),
+    so motion is freer and better-suited to single-anchor animation
+    than repurposing the FLF2V 720P variant for i2v. Shares the same
+    VAE, umt5_xxl text encoder, and clip_vision_h as the I2V 14B 480P
+    and FLF2V 14B 720P entries.
+  urls:
+    - https://huggingface.co/city96/Wan2.1-I2V-14B-720P-gguf
+  tags:
+    - image-to-video
+    - wan
+    - video-generation
+    - cpu
+    - gpu
+  overrides:
+    parameters:
+      model: wan2.1-i2v-14b-720p-Q4_K_M.gguf
+    options:
+      - "clip_vision_path:clip_vision_h.safetensors"
+      - "diffusion_model"
+      - "vae_decode_only:false"
+      - "sampler:euler"
+      - "flow_shift:3.0"
+      - "t5xxl_path:umt5-xxl-encoder-Q8_0.gguf"
+      - "vae_path:wan_2.1_vae.safetensors"
+  files:
+    - filename: "wan2.1-i2v-14b-720p-Q4_K_M.gguf"
+      sha256: "ffecd91e4b636d8e3e43f3fa388218158ba447109547bde777c6d67ef4fe42a4"
+      uri: "huggingface://city96/Wan2.1-I2V-14B-720P-gguf/wan2.1-i2v-14b-720p-Q4_K_M.gguf"
+    - filename: "wan_2.1_vae.safetensors"
+      uri: "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors"
+    - filename: "umt5-xxl-encoder-Q8_0.gguf"
+      uri: "huggingface://city96/umt5-xxl-encoder-gguf/umt5-xxl-encoder-Q8_0.gguf"
+    - filename: "clip_vision_h.safetensors"
+      uri: "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/clip_vision/clip_vision_h.safetensors"
 - name: sd-1.5-ggml
   icon: https://avatars.githubusercontent.com/u/37351293
   license: creativeml-openrail-m