chore(size): display size of HF models and allow to specify it from the gallery (#8907)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto
2026-03-09 17:38:14 +01:00
committed by GitHub
parent 74db732873
commit 05a3d00924
8 changed files with 349 additions and 6 deletions

View File

@@ -12,6 +12,9 @@ type Metadata struct {
Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"`
// AdditionalFiles are used to add additional files to the model
AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"`
// Size is an optional hardcoded model size string (e.g. "500MB", "14.5GB").
// Used when the size cannot be estimated automatically.
Size string `json:"size,omitempty" yaml:"size,omitempty"`
// Gallery is a reference to the gallery which contains the model
Gallery config.Gallery `json:"gallery,omitempty" yaml:"gallery,omitempty"`
// Installed is used to indicate if the model is installed or not

View File

@@ -151,7 +151,7 @@ export default function Models() {
const term = searchVal || filterVal || ''
const queryParams = {
page: params.page || page,
items: 21,
items: 9,
}
if (term) queryParams.term = term
if (sortVal) {

View File

@@ -197,7 +197,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
}
items := c.QueryParam("items")
if items == "" {
items = "21"
items = "9"
}
models, err := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.SystemState)
@@ -253,7 +253,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
itemsNum, err := strconv.Atoi(items)
if err != nil || itemsNum < 1 {
itemsNum = 21
itemsNum = 9
}
totalPages := int(math.Ceil(float64(len(models)) / float64(itemsNum)))
@@ -268,6 +268,25 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
seenIDs := make(map[string]bool)
weightExts := map[string]bool{".gguf": true, ".safetensors": true, ".bin": true, ".pt": true}
extractHFRepo := func(overrides map[string]interface{}, urls []string) string {
// Try overrides.parameters.model first
if overrides != nil {
if params, ok := overrides["parameters"].(map[string]interface{}); ok {
if modelRef, ok := params["model"].(string); ok {
if repoID, ok := vram.ExtractHFRepoID(modelRef); ok {
return repoID
}
}
}
}
// Fall back to the first HuggingFace URL in the metadata urls list
for _, u := range urls {
if repoID, ok := vram.ExtractHFRepoID(u); ok {
return repoID
}
}
return ""
}
hasWeightFiles := func(files []gallery.File) bool {
for _, f := range files {
ext := strings.ToLower(path.Ext(path.Base(f.URI)))
@@ -279,6 +298,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
}
const estimateTimeout = 3 * time.Second
const hfEstimateTimeout = 10 * time.Second
const estimateConcurrency = 3
sem := make(chan struct{}, estimateConcurrency)
var wg sync.WaitGroup
@@ -356,6 +376,34 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
}
}
}(files, obj)
} else if m.Size != "" {
if sizeBytes, err := vram.ParseSizeString(m.Size); err == nil && sizeBytes > 0 {
result := vram.EstimateFromSize(sizeBytes)
obj["estimated_size_bytes"] = result.SizeBytes
obj["estimated_size_display"] = result.SizeDisplay
obj["estimated_vram_bytes"] = result.VRAMBytes
obj["estimated_vram_display"] = result.VRAMDisplay
}
} else if hfRepoID := extractHFRepo(m.Overrides, m.URLs); hfRepoID != "" {
wg.Add(1)
go func(repoID string, out map[string]interface{}) {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
ctx, cancel := context.WithTimeout(context.Background(), hfEstimateTimeout)
defer cancel()
result, err := vram.EstimateFromHFRepo(ctx, repoID)
if err == nil {
if result.SizeBytes > 0 {
out["estimated_size_bytes"] = result.SizeBytes
out["estimated_size_display"] = result.SizeDisplay
}
if result.VRAMBytes > 0 {
out["estimated_vram_bytes"] = result.VRAMBytes
out["estimated_vram_display"] = result.VRAMDisplay
}
}
}(hfRepoID, obj)
}
modelsJSON = append(modelsJSON, obj)
@@ -678,7 +726,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
}
items := c.QueryParam("items")
if items == "" {
items = "21"
items = "9"
}
backends, err := gallery.AvailableBackends(appConfig.BackendGalleries, appConfig.SystemState)
@@ -734,7 +782,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
itemsNum, err := strconv.Atoi(items)
if err != nil || itemsNum < 1 {
itemsNum = 21
itemsNum = 9
}
totalPages := int(math.Ceil(float64(len(backends)) / float64(itemsNum)))

View File

@@ -480,6 +480,7 @@
sha256: 8434af1d39eba99f0ef46cf1450bf1a63fa941a26933a1ef5dbbf4adf0d00e44
- name: moonshine-tiny
license: apache-2.0
size: "108MB"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
description: |
Moonshine Tiny is a lightweight speech-to-text model optimized for fast transcription. It is designed for efficient on-device ASR with high accuracy relative to its size.
@@ -500,6 +501,7 @@
model: moonshine/tiny
- name: whisperx-tiny
license: mit
size: "151MB"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
description: |
WhisperX Tiny is a fast and accurate speech recognition model with speaker diarization capabilities. Built on OpenAI's Whisper with additional features for alignment and speaker segmentation.
@@ -540,6 +542,7 @@
model: openbmb/VoxCPM1.5
- name: neutts-air
license: apache-2.0
size: "1.5GB"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
description: |
NeuTTS Air is the world's first super-realistic, on-device TTS speech language model with instant voice cloning. Built on a 0.5B LLM backbone, it brings natural-sounding speech, real-time performance, and speaker cloning to local devices.
@@ -1532,6 +1535,7 @@
sha256: 67561d63bfa2153616e4c02fd967007c182593fc53738a6ad94bf5f84e8832ac
- &pocket-tts
url: "github:mudler/LocalAI/gallery/pocket-tts.yaml@master"
size: "236MB"
icon: https://avatars.githubusercontent.com/u/6154722?s=200&v=4
license: mit
tags:
@@ -2244,6 +2248,7 @@
uri: huggingface://bartowski/LiquidAI_LFM2-8B-A1B-GGUF/LiquidAI_LFM2-8B-A1B-Q4_K_M.gguf
- name: "kokoro"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
size: "327MB"
urls:
- https://github.com/hexgrad/kokoro
license: apache-2.0
@@ -2533,6 +2538,7 @@
uri: huggingface://DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf/OpenAI-20B-NEOPlus-Uncensored-IQ4_NL.gguf
- name: "chatterbox"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
size: "3.2GB"
icon: https://private-user-images.githubusercontent.com/660224/448166653-bd8c5f03-e91d-4ee5-b680-57355da204d1.png
license: "mit"
urls:
@@ -2619,6 +2625,7 @@
- &rfdetr
name: "rfdetr-base"
url: "github:mudler/LocalAI/gallery/virtual.yaml@master"
size: "116MB"
icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4
license: apache-2.0
description: |

View File

@@ -309,7 +309,7 @@ func (u URI) ContentLength(ctx context.Context) (int64, error) {
}
return info.Size(), nil
}
if !u.LooksLikeHTTPURL() {
if !strings.HasPrefix(urlStr, HTTPPrefix) && !strings.HasPrefix(urlStr, HTTPSPrefix) {
return 0, fmt.Errorf("unsupported URI scheme for ContentLength: %s", string(u))
}
req, err := http.NewRequestWithContext(ctx, "HEAD", urlStr, nil)

View File

@@ -124,6 +124,69 @@ func sizeOnlyVRAM(sizeOnDisk uint64, ctxLen uint32) uint64 {
return vram
}
// ParseSizeString parses a human-readable size string (e.g. "500MB", "14.5 GB", "2tb")
// into bytes. Supports B, KB, MB, GB, TB, PB (case-insensitive, space optional).
// Uses SI units (1 KB = 1000 B).
func ParseSizeString(s string) (uint64, error) {
s = strings.TrimSpace(s)
if s == "" {
return 0, fmt.Errorf("empty size string")
}
s = strings.ToUpper(s)
// Find where the numeric part ends
i := 0
for i < len(s) && (s[i] == '.' || (s[i] >= '0' && s[i] <= '9')) {
i++
}
if i == 0 {
return 0, fmt.Errorf("no numeric value in size string: %q", s)
}
numStr := s[:i]
suffix := strings.TrimSpace(s[i:])
var num float64
if _, err := fmt.Sscanf(numStr, "%f", &num); err != nil {
return 0, fmt.Errorf("invalid numeric value %q: %w", numStr, err)
}
if num < 0 {
return 0, fmt.Errorf("negative size: %q", s)
}
multiplier := uint64(1)
switch suffix {
case "", "B":
multiplier = 1
case "K", "KB":
multiplier = 1000
case "M", "MB":
multiplier = 1000 * 1000
case "G", "GB":
multiplier = 1000 * 1000 * 1000
case "T", "TB":
multiplier = 1000 * 1000 * 1000 * 1000
case "P", "PB":
multiplier = 1000 * 1000 * 1000 * 1000 * 1000
default:
return 0, fmt.Errorf("unknown size suffix: %q", suffix)
}
return uint64(num * float64(multiplier)), nil
}
// EstimateFromSize builds an EstimateResult from a raw byte count.
func EstimateFromSize(sizeBytes uint64) EstimateResult {
vramBytes := sizeOnlyVRAM(sizeBytes, 8192)
return EstimateResult{
SizeBytes: sizeBytes,
SizeDisplay: FormatBytes(sizeBytes),
VRAMBytes: vramBytes,
VRAMDisplay: FormatBytes(vramBytes),
}
}
func FormatBytes(n uint64) string {
const unit = 1000
if n < unit {

147
pkg/vram/hf_estimate.go Normal file
View File

@@ -0,0 +1,147 @@
package vram
import (
"context"
"strings"
"sync"
"time"
hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
)
var (
hfSizeCacheMu sync.Mutex
hfSizeCacheData = make(map[string]hfSizeCacheEntry)
)
type hfSizeCacheEntry struct {
result EstimateResult
err error
expiresAt time.Time
}
const hfSizeCacheTTL = 15 * time.Minute
// ExtractHFRepoID extracts a HuggingFace repo ID from a string.
// It handles both short form ("org/model") and full URL form
// ("https://huggingface.co/org/model", "huggingface.co/org/model").
// Returns the repo ID and true if found, or empty string and false otherwise.
func ExtractHFRepoID(s string) (string, bool) {
s = strings.TrimSpace(s)
if s == "" {
return "", false
}
// Handle full URL form: https://huggingface.co/org/model or huggingface.co/org/model
for _, prefix := range []string{
"https://huggingface.co/",
"http://huggingface.co/",
"huggingface.co/",
} {
if strings.HasPrefix(strings.ToLower(s), prefix) {
rest := s[len(prefix):]
// Strip trailing slashes and path fragments beyond org/model
rest = strings.TrimRight(rest, "/")
parts := strings.SplitN(rest, "/", 3)
if len(parts) >= 2 && parts[0] != "" && parts[1] != "" {
return parts[0] + "/" + parts[1], true
}
return "", false
}
}
// Handle short form: org/model
if strings.Contains(s, "://") || strings.Contains(s, " ") {
return "", false
}
parts := strings.Split(s, "/")
if len(parts) == 2 && parts[0] != "" && parts[1] != "" {
return s, true
}
return "", false
}
// EstimateFromHFRepo estimates model size by querying the HuggingFace API for file listings.
// Results are cached for 15 minutes.
func EstimateFromHFRepo(ctx context.Context, repoID string) (EstimateResult, error) {
hfSizeCacheMu.Lock()
if entry, ok := hfSizeCacheData[repoID]; ok && time.Now().Before(entry.expiresAt) {
hfSizeCacheMu.Unlock()
return entry.result, entry.err
}
hfSizeCacheMu.Unlock()
result, err := estimateFromHFRepoUncached(ctx, repoID)
hfSizeCacheMu.Lock()
hfSizeCacheData[repoID] = hfSizeCacheEntry{
result: result,
err: err,
expiresAt: time.Now().Add(hfSizeCacheTTL),
}
hfSizeCacheMu.Unlock()
return result, err
}
func estimateFromHFRepoUncached(ctx context.Context, repoID string) (EstimateResult, error) {
client := hfapi.NewClient()
type listResult struct {
files []hfapi.FileInfo
err error
}
ch := make(chan listResult, 1)
go func() {
files, err := client.ListFiles(repoID)
ch <- listResult{files, err}
}()
select {
case <-ctx.Done():
return EstimateResult{}, ctx.Err()
case res := <-ch:
if res.err != nil {
return EstimateResult{}, res.err
}
return estimateFromFileInfos(res.files), nil
}
}
func estimateFromFileInfos(files []hfapi.FileInfo) EstimateResult {
var totalSize int64
for _, f := range files {
if f.Type != "file" {
continue
}
ext := strings.ToLower(f.Path)
if idx := strings.LastIndex(ext, "."); idx >= 0 {
ext = ext[idx:]
} else {
continue
}
if !weightExts[ext] {
continue
}
size := f.Size
if f.LFS != nil && f.LFS.Size > 0 {
size = f.LFS.Size
}
totalSize += size
}
if totalSize <= 0 {
return EstimateResult{}
}
sizeBytes := uint64(totalSize)
vramBytes := sizeOnlyVRAM(sizeBytes, 8192)
return EstimateResult{
SizeBytes: sizeBytes,
SizeDisplay: FormatBytes(sizeBytes),
VRAMBytes: vramBytes,
VRAMDisplay: FormatBytes(vramBytes),
}
}

View File

@@ -0,0 +1,75 @@
package vram_test
import (
. "github.com/mudler/LocalAI/pkg/vram"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("ParseSizeString", func() {
DescribeTable("valid sizes",
func(input string, expected uint64) {
got, err := ParseSizeString(input)
Expect(err).ToNot(HaveOccurred())
Expect(got).To(BeNumerically("~", expected, float64(expected)*0.0001+1))
},
Entry("500MB", "500MB", uint64(500_000_000)),
Entry("14.5GB", "14.5GB", uint64(14_500_000_000)),
Entry("2TB", "2TB", uint64(2_000_000_000_000)),
Entry("1024KB", "1024KB", uint64(1_024_000)),
Entry("100B", "100B", uint64(100)),
Entry("75 MB with space", "75 MB", uint64(75_000_000)),
Entry("1.5 gb lowercase", "1.5 gb", uint64(1_500_000_000)),
Entry("0.5GB", "0.5GB", uint64(500_000_000)),
Entry("3PB", "3PB", uint64(3_000_000_000_000_000)),
Entry("short suffix 100M", "100M", uint64(100_000_000)),
Entry("short suffix 2G", "2G", uint64(2_000_000_000)),
Entry("short suffix 1K", "1K", uint64(1_000)),
)
DescribeTable("invalid sizes",
func(input string) {
_, err := ParseSizeString(input)
Expect(err).To(HaveOccurred())
},
Entry("empty", ""),
Entry("suffix only", "MB"),
Entry("letters only", "abc"),
Entry("negative", "-5GB"),
Entry("unknown suffix", "5XB"),
)
})
var _ = Describe("ExtractHFRepoID", func() {
DescribeTable("valid repo IDs",
func(input, expectedID string) {
gotID, gotOK := ExtractHFRepoID(input)
Expect(gotOK).To(BeTrue())
Expect(gotID).To(Equal(expectedID))
},
Entry("short form", "Wan-AI/Wan2.2-I2V-A14B-Diffusers", "Wan-AI/Wan2.2-I2V-A14B-Diffusers"),
Entry("short form 2", "meta-llama/Llama-3-8B", "meta-llama/Llama-3-8B"),
Entry("https URL", "https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers", "Wan-AI/Wan2.2-I2V-A14B-Diffusers"),
Entry("http URL", "http://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers", "Wan-AI/Wan2.2-I2V-A14B-Diffusers"),
Entry("no scheme", "huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers", "Wan-AI/Wan2.2-I2V-A14B-Diffusers"),
Entry("trailing slash", "https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers/", "Wan-AI/Wan2.2-I2V-A14B-Diffusers"),
Entry("extra path", "https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers/tree/main", "Wan-AI/Wan2.2-I2V-A14B-Diffusers"),
Entry("uppercase URL", "HTTPS://HUGGINGFACE.CO/org/model", "org/model"),
)
DescribeTable("invalid inputs",
func(input string) {
_, gotOK := ExtractHFRepoID(input)
Expect(gotOK).To(BeFalse())
},
Entry("empty", ""),
Entry("single word", "single-word"),
Entry("three parts", "llama-cpp/models/file.gguf"),
Entry("non-HF URL", "https://example.com/org/model"),
Entry("wrong scheme", "ftp://huggingface.co/org/model"),
Entry("has space", "has spaces/model"),
Entry("incomplete URL", "huggingface.co/"),
Entry("org only", "huggingface.co/org"),
Entry("empty org", "huggingface.co//model"),
)
})