ggml: update qwen25vl vision size estimate (#10711 )

fix crash in old clients with quantization progress (#10710 )
Older clients assumed the digest was at least 19 characters long so increase the size of the dummy digest to avoid array out of bounds crashes.
2026-01-19 04:51:17 -05:00 · 2025-05-14 16:42:30 -07:00 · 2025-05-14 14:54:18 -07:00
2 changed files with 5 additions and 15 deletions
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -6,7 +6,6 @@ import (
 	"fmt"
 	"io"
 	"log/slog"
-	"math"
 	"slices"
 	"strings"

@@ -653,24 +652,15 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
 			numPatches*numPatches*headCount)
 	case "qwen25vl":
 		maxPixels := uint64(llm.KV().Uint("vision.max_pixels", 28*28*1280))
-		mergeSize := uint64(llm.KV().Uint("vision.spatial_merge_size", 2))
-		temporalPatchSize := uint64(2)

-		// Calculate max possible patches based on max_pixels
-		maxHeight := uint64(math.Sqrt(float64(maxPixels)))
-		maxWidth := maxPixels / maxHeight
-		maxGridHeight := maxHeight / patchSize
-		maxGridWidth := maxWidth / patchSize
-		// Account for merged patches (2x2 grid)
-		numPatches := (maxGridHeight * maxGridWidth) / (mergeSize * mergeSize)
+		numPatches := maxPixels / (patchSize * patchSize)

-		// Calculate graph size based on typical operations in ProcessImage and createPatches
 		graphSize = 4 * (maxPixels*numChannels + // Original image storage
 			// Normalized pixels
 			maxPixels*numChannels +
-			// Patches storage (numPatches * channels * temporalPatchSize * patchSize^2)
-			numPatches*numChannels*temporalPatchSize*patchSize*patchSize +
-			// Self-attention calculations (similar to other architectures)
+			// Patches storage (numPatches * channels * patchSize^2)
+			numPatches*numChannels*patchSize*patchSize +
+			// Self-attention calculations
 			numPatches*numPatches*headCount +
 			// Additional buffer for processing
 			embeddingLength*numPatches)
--- a/server/create.go
+++ b/server/create.go
@@ -430,7 +430,7 @@ func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.Progr
 	fnWrap := func(n uint64) {
 		done := doneBytes.Add(n)
 		progress := float32(done) / float32(totalBytes)
-		fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantizeType), Digest: "0", Total: layer.Size, Completed: int64(progress * float32(layer.Size))})
+		fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantizeType), Digest: "0000000000000000000", Total: layer.Size, Completed: int64(progress * float32(layer.Size))})
 	}
 	ftype, err := ggml.ParseFileType(quantizeType)
 	if err != nil {
Author	SHA1	Message	Date
Bruce MacDonald	bd68d3ae50	ggml: update qwen25vl vision size estimate (#10711 )	2025-05-14 16:42:30 -07:00
Daniel Hiltgen	ff80718e9c	fix crash in old clients with quantization progress (#10710 ) Older clients assumed the digest was at least 19 characters long so increase the size of the dummy digest to avoid array out of bounds crashes.	2025-05-14 14:54:18 -07:00