diff --git a/core/gallery/metadata_type.go b/core/gallery/metadata_type.go index f0059eab6..066cf83a6 100644 --- a/core/gallery/metadata_type.go +++ b/core/gallery/metadata_type.go @@ -12,6 +12,9 @@ type Metadata struct { Tags []string `json:"tags,omitempty" yaml:"tags,omitempty"` // AdditionalFiles are used to add additional files to the model AdditionalFiles []File `json:"files,omitempty" yaml:"files,omitempty"` + // Size is an optional hardcoded model size string (e.g. "500MB", "14.5GB"). + // Used when the size cannot be estimated automatically. + Size string `json:"size,omitempty" yaml:"size,omitempty"` // Gallery is a reference to the gallery which contains the model Gallery config.Gallery `json:"gallery,omitempty" yaml:"gallery,omitempty"` // Installed is used to indicate if the model is installed or not diff --git a/core/http/react-ui/src/pages/Models.jsx b/core/http/react-ui/src/pages/Models.jsx index ce476876a..9bb698d80 100644 --- a/core/http/react-ui/src/pages/Models.jsx +++ b/core/http/react-ui/src/pages/Models.jsx @@ -151,7 +151,7 @@ export default function Models() { const term = searchVal || filterVal || '' const queryParams = { page: params.page || page, - items: 21, + items: 9, } if (term) queryParams.term = term if (sortVal) { diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index 68bbc1325..87950ac3d 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -197,7 +197,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model } items := c.QueryParam("items") if items == "" { - items = "21" + items = "9" } models, err := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.SystemState) @@ -253,7 +253,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model itemsNum, err := strconv.Atoi(items) if err != nil || itemsNum < 1 { - itemsNum = 21 + itemsNum = 9 } totalPages := int(math.Ceil(float64(len(models)) / float64(itemsNum))) @@ -268,6 +268,25 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model seenIDs := make(map[string]bool) weightExts := map[string]bool{".gguf": true, ".safetensors": true, ".bin": true, ".pt": true} + extractHFRepo := func(overrides map[string]interface{}, urls []string) string { + // Try overrides.parameters.model first + if overrides != nil { + if params, ok := overrides["parameters"].(map[string]interface{}); ok { + if modelRef, ok := params["model"].(string); ok { + if repoID, ok := vram.ExtractHFRepoID(modelRef); ok { + return repoID + } + } + } + } + // Fall back to the first HuggingFace URL in the metadata urls list + for _, u := range urls { + if repoID, ok := vram.ExtractHFRepoID(u); ok { + return repoID + } + } + return "" + } hasWeightFiles := func(files []gallery.File) bool { for _, f := range files { ext := strings.ToLower(path.Ext(path.Base(f.URI))) @@ -279,6 +298,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model } const estimateTimeout = 3 * time.Second + const hfEstimateTimeout = 10 * time.Second const estimateConcurrency = 3 sem := make(chan struct{}, estimateConcurrency) var wg sync.WaitGroup @@ -356,6 +376,34 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model } } }(files, obj) + } else if m.Size != "" { + if sizeBytes, err := vram.ParseSizeString(m.Size); err == nil && sizeBytes > 0 { + result := vram.EstimateFromSize(sizeBytes) + obj["estimated_size_bytes"] = result.SizeBytes + obj["estimated_size_display"] = result.SizeDisplay + obj["estimated_vram_bytes"] = result.VRAMBytes + obj["estimated_vram_display"] = result.VRAMDisplay + } + } else if hfRepoID := extractHFRepo(m.Overrides, m.URLs); hfRepoID != "" { + wg.Add(1) + go func(repoID string, out map[string]interface{}) { + defer wg.Done() + sem <- struct{}{} + defer func() { <-sem }() + ctx, cancel := context.WithTimeout(context.Background(), hfEstimateTimeout) + defer cancel() + result, err := vram.EstimateFromHFRepo(ctx, repoID) + if err == nil { + if result.SizeBytes > 0 { + out["estimated_size_bytes"] = result.SizeBytes + out["estimated_size_display"] = result.SizeDisplay + } + if result.VRAMBytes > 0 { + out["estimated_vram_bytes"] = result.VRAMBytes + out["estimated_vram_display"] = result.VRAMDisplay + } + } + }(hfRepoID, obj) } modelsJSON = append(modelsJSON, obj) @@ -678,7 +726,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model } items := c.QueryParam("items") if items == "" { - items = "21" + items = "9" } backends, err := gallery.AvailableBackends(appConfig.BackendGalleries, appConfig.SystemState) @@ -734,7 +782,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model itemsNum, err := strconv.Atoi(items) if err != nil || itemsNum < 1 { - itemsNum = 21 + itemsNum = 9 } totalPages := int(math.Ceil(float64(len(backends)) / float64(itemsNum))) diff --git a/gallery/index.yaml b/gallery/index.yaml index 6622a6729..5c0d67017 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -480,6 +480,7 @@ sha256: 8434af1d39eba99f0ef46cf1450bf1a63fa941a26933a1ef5dbbf4adf0d00e44 - name: moonshine-tiny license: apache-2.0 + size: "108MB" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" description: | Moonshine Tiny is a lightweight speech-to-text model optimized for fast transcription. It is designed for efficient on-device ASR with high accuracy relative to its size. @@ -500,6 +501,7 @@ model: moonshine/tiny - name: whisperx-tiny license: mit + size: "151MB" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" description: | WhisperX Tiny is a fast and accurate speech recognition model with speaker diarization capabilities. Built on OpenAI's Whisper with additional features for alignment and speaker segmentation. @@ -540,6 +542,7 @@ model: openbmb/VoxCPM1.5 - name: neutts-air license: apache-2.0 + size: "1.5GB" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" description: | NeuTTS Air is the world's first super-realistic, on-device TTS speech language model with instant voice cloning. Built on a 0.5B LLM backbone, it brings natural-sounding speech, real-time performance, and speaker cloning to local devices. @@ -1532,6 +1535,7 @@ sha256: 67561d63bfa2153616e4c02fd967007c182593fc53738a6ad94bf5f84e8832ac - &pocket-tts url: "github:mudler/LocalAI/gallery/pocket-tts.yaml@master" + size: "236MB" icon: https://avatars.githubusercontent.com/u/6154722?s=200&v=4 license: mit tags: @@ -2244,6 +2248,7 @@ uri: huggingface://bartowski/LiquidAI_LFM2-8B-A1B-GGUF/LiquidAI_LFM2-8B-A1B-Q4_K_M.gguf - name: "kokoro" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + size: "327MB" urls: - https://github.com/hexgrad/kokoro license: apache-2.0 @@ -2533,6 +2538,7 @@ uri: huggingface://DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf/OpenAI-20B-NEOPlus-Uncensored-IQ4_NL.gguf - name: "chatterbox" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + size: "3.2GB" icon: https://private-user-images.githubusercontent.com/660224/448166653-bd8c5f03-e91d-4ee5-b680-57355da204d1.png license: "mit" urls: @@ -2619,6 +2625,7 @@ - &rfdetr name: "rfdetr-base" url: "github:mudler/LocalAI/gallery/virtual.yaml@master" + size: "116MB" icon: https://avatars.githubusercontent.com/u/53104118?s=200&v=4 license: apache-2.0 description: | diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go index cb8795f7e..58764bb13 100644 --- a/pkg/downloader/uri.go +++ b/pkg/downloader/uri.go @@ -309,7 +309,7 @@ func (u URI) ContentLength(ctx context.Context) (int64, error) { } return info.Size(), nil } - if !u.LooksLikeHTTPURL() { + if !strings.HasPrefix(urlStr, HTTPPrefix) && !strings.HasPrefix(urlStr, HTTPSPrefix) { return 0, fmt.Errorf("unsupported URI scheme for ContentLength: %s", string(u)) } req, err := http.NewRequestWithContext(ctx, "HEAD", urlStr, nil) diff --git a/pkg/vram/estimate.go b/pkg/vram/estimate.go index 88f30c2ac..792eb7c33 100644 --- a/pkg/vram/estimate.go +++ b/pkg/vram/estimate.go @@ -124,6 +124,69 @@ func sizeOnlyVRAM(sizeOnDisk uint64, ctxLen uint32) uint64 { return vram } +// ParseSizeString parses a human-readable size string (e.g. "500MB", "14.5 GB", "2tb") +// into bytes. Supports B, KB, MB, GB, TB, PB (case-insensitive, space optional). +// Uses SI units (1 KB = 1000 B). +func ParseSizeString(s string) (uint64, error) { + s = strings.TrimSpace(s) + if s == "" { + return 0, fmt.Errorf("empty size string") + } + + s = strings.ToUpper(s) + + // Find where the numeric part ends + i := 0 + for i < len(s) && (s[i] == '.' || (s[i] >= '0' && s[i] <= '9')) { + i++ + } + if i == 0 { + return 0, fmt.Errorf("no numeric value in size string: %q", s) + } + + numStr := s[:i] + suffix := strings.TrimSpace(s[i:]) + + var num float64 + if _, err := fmt.Sscanf(numStr, "%f", &num); err != nil { + return 0, fmt.Errorf("invalid numeric value %q: %w", numStr, err) + } + if num < 0 { + return 0, fmt.Errorf("negative size: %q", s) + } + + multiplier := uint64(1) + switch suffix { + case "", "B": + multiplier = 1 + case "K", "KB": + multiplier = 1000 + case "M", "MB": + multiplier = 1000 * 1000 + case "G", "GB": + multiplier = 1000 * 1000 * 1000 + case "T", "TB": + multiplier = 1000 * 1000 * 1000 * 1000 + case "P", "PB": + multiplier = 1000 * 1000 * 1000 * 1000 * 1000 + default: + return 0, fmt.Errorf("unknown size suffix: %q", suffix) + } + + return uint64(num * float64(multiplier)), nil +} + +// EstimateFromSize builds an EstimateResult from a raw byte count. +func EstimateFromSize(sizeBytes uint64) EstimateResult { + vramBytes := sizeOnlyVRAM(sizeBytes, 8192) + return EstimateResult{ + SizeBytes: sizeBytes, + SizeDisplay: FormatBytes(sizeBytes), + VRAMBytes: vramBytes, + VRAMDisplay: FormatBytes(vramBytes), + } +} + func FormatBytes(n uint64) string { const unit = 1000 if n < unit { diff --git a/pkg/vram/hf_estimate.go b/pkg/vram/hf_estimate.go new file mode 100644 index 000000000..2d1ca9b42 --- /dev/null +++ b/pkg/vram/hf_estimate.go @@ -0,0 +1,147 @@ +package vram + +import ( + "context" + "strings" + "sync" + "time" + + hfapi "github.com/mudler/LocalAI/pkg/huggingface-api" +) + +var ( + hfSizeCacheMu sync.Mutex + hfSizeCacheData = make(map[string]hfSizeCacheEntry) +) + +type hfSizeCacheEntry struct { + result EstimateResult + err error + expiresAt time.Time +} + +const hfSizeCacheTTL = 15 * time.Minute + +// ExtractHFRepoID extracts a HuggingFace repo ID from a string. +// It handles both short form ("org/model") and full URL form +// ("https://huggingface.co/org/model", "huggingface.co/org/model"). +// Returns the repo ID and true if found, or empty string and false otherwise. +func ExtractHFRepoID(s string) (string, bool) { + s = strings.TrimSpace(s) + if s == "" { + return "", false + } + + // Handle full URL form: https://huggingface.co/org/model or huggingface.co/org/model + for _, prefix := range []string{ + "https://huggingface.co/", + "http://huggingface.co/", + "huggingface.co/", + } { + if strings.HasPrefix(strings.ToLower(s), prefix) { + rest := s[len(prefix):] + // Strip trailing slashes and path fragments beyond org/model + rest = strings.TrimRight(rest, "/") + parts := strings.SplitN(rest, "/", 3) + if len(parts) >= 2 && parts[0] != "" && parts[1] != "" { + return parts[0] + "/" + parts[1], true + } + return "", false + } + } + + // Handle short form: org/model + if strings.Contains(s, "://") || strings.Contains(s, " ") { + return "", false + } + parts := strings.Split(s, "/") + if len(parts) == 2 && parts[0] != "" && parts[1] != "" { + return s, true + } + + return "", false +} + +// EstimateFromHFRepo estimates model size by querying the HuggingFace API for file listings. +// Results are cached for 15 minutes. +func EstimateFromHFRepo(ctx context.Context, repoID string) (EstimateResult, error) { + hfSizeCacheMu.Lock() + if entry, ok := hfSizeCacheData[repoID]; ok && time.Now().Before(entry.expiresAt) { + hfSizeCacheMu.Unlock() + return entry.result, entry.err + } + hfSizeCacheMu.Unlock() + + result, err := estimateFromHFRepoUncached(ctx, repoID) + + hfSizeCacheMu.Lock() + hfSizeCacheData[repoID] = hfSizeCacheEntry{ + result: result, + err: err, + expiresAt: time.Now().Add(hfSizeCacheTTL), + } + hfSizeCacheMu.Unlock() + + return result, err +} + +func estimateFromHFRepoUncached(ctx context.Context, repoID string) (EstimateResult, error) { + client := hfapi.NewClient() + + type listResult struct { + files []hfapi.FileInfo + err error + } + ch := make(chan listResult, 1) + go func() { + files, err := client.ListFiles(repoID) + ch <- listResult{files, err} + }() + + select { + case <-ctx.Done(): + return EstimateResult{}, ctx.Err() + case res := <-ch: + if res.err != nil { + return EstimateResult{}, res.err + } + return estimateFromFileInfos(res.files), nil + } +} + +func estimateFromFileInfos(files []hfapi.FileInfo) EstimateResult { + var totalSize int64 + for _, f := range files { + if f.Type != "file" { + continue + } + ext := strings.ToLower(f.Path) + if idx := strings.LastIndex(ext, "."); idx >= 0 { + ext = ext[idx:] + } else { + continue + } + if !weightExts[ext] { + continue + } + size := f.Size + if f.LFS != nil && f.LFS.Size > 0 { + size = f.LFS.Size + } + totalSize += size + } + + if totalSize <= 0 { + return EstimateResult{} + } + + sizeBytes := uint64(totalSize) + vramBytes := sizeOnlyVRAM(sizeBytes, 8192) + + return EstimateResult{ + SizeBytes: sizeBytes, + SizeDisplay: FormatBytes(sizeBytes), + VRAMBytes: vramBytes, + VRAMDisplay: FormatBytes(vramBytes), + } +} diff --git a/pkg/vram/hf_estimate_test.go b/pkg/vram/hf_estimate_test.go new file mode 100644 index 000000000..345d9ec24 --- /dev/null +++ b/pkg/vram/hf_estimate_test.go @@ -0,0 +1,75 @@ +package vram_test + +import ( + . "github.com/mudler/LocalAI/pkg/vram" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("ParseSizeString", func() { + DescribeTable("valid sizes", + func(input string, expected uint64) { + got, err := ParseSizeString(input) + Expect(err).ToNot(HaveOccurred()) + Expect(got).To(BeNumerically("~", expected, float64(expected)*0.0001+1)) + }, + Entry("500MB", "500MB", uint64(500_000_000)), + Entry("14.5GB", "14.5GB", uint64(14_500_000_000)), + Entry("2TB", "2TB", uint64(2_000_000_000_000)), + Entry("1024KB", "1024KB", uint64(1_024_000)), + Entry("100B", "100B", uint64(100)), + Entry("75 MB with space", "75 MB", uint64(75_000_000)), + Entry("1.5 gb lowercase", "1.5 gb", uint64(1_500_000_000)), + Entry("0.5GB", "0.5GB", uint64(500_000_000)), + Entry("3PB", "3PB", uint64(3_000_000_000_000_000)), + Entry("short suffix 100M", "100M", uint64(100_000_000)), + Entry("short suffix 2G", "2G", uint64(2_000_000_000)), + Entry("short suffix 1K", "1K", uint64(1_000)), + ) + + DescribeTable("invalid sizes", + func(input string) { + _, err := ParseSizeString(input) + Expect(err).To(HaveOccurred()) + }, + Entry("empty", ""), + Entry("suffix only", "MB"), + Entry("letters only", "abc"), + Entry("negative", "-5GB"), + Entry("unknown suffix", "5XB"), + ) +}) + +var _ = Describe("ExtractHFRepoID", func() { + DescribeTable("valid repo IDs", + func(input, expectedID string) { + gotID, gotOK := ExtractHFRepoID(input) + Expect(gotOK).To(BeTrue()) + Expect(gotID).To(Equal(expectedID)) + }, + Entry("short form", "Wan-AI/Wan2.2-I2V-A14B-Diffusers", "Wan-AI/Wan2.2-I2V-A14B-Diffusers"), + Entry("short form 2", "meta-llama/Llama-3-8B", "meta-llama/Llama-3-8B"), + Entry("https URL", "https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers", "Wan-AI/Wan2.2-I2V-A14B-Diffusers"), + Entry("http URL", "http://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers", "Wan-AI/Wan2.2-I2V-A14B-Diffusers"), + Entry("no scheme", "huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers", "Wan-AI/Wan2.2-I2V-A14B-Diffusers"), + Entry("trailing slash", "https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers/", "Wan-AI/Wan2.2-I2V-A14B-Diffusers"), + Entry("extra path", "https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers/tree/main", "Wan-AI/Wan2.2-I2V-A14B-Diffusers"), + Entry("uppercase URL", "HTTPS://HUGGINGFACE.CO/org/model", "org/model"), + ) + + DescribeTable("invalid inputs", + func(input string) { + _, gotOK := ExtractHFRepoID(input) + Expect(gotOK).To(BeFalse()) + }, + Entry("empty", ""), + Entry("single word", "single-word"), + Entry("three parts", "llama-cpp/models/file.gguf"), + Entry("non-HF URL", "https://example.com/org/model"), + Entry("wrong scheme", "ftp://huggingface.co/org/model"), + Entry("has space", "has spaces/model"), + Entry("incomplete URL", "huggingface.co/"), + Entry("org only", "huggingface.co/org"), + Entry("empty org", "huggingface.co//model"), + ) +})