From 983db7bedc777bac52b135f1354c174f9c45e5f1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 28 Feb 2026 23:03:47 +0100 Subject: [PATCH] feat(ui): add model size estimation (#8684) Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/localai/import_model.go | 35 ++++- core/http/routes/ui_api.go | 71 ++++++++- core/http/views/manage.html | 9 ++ core/http/views/model-editor.html | 48 ++++++- core/http/views/models.html | 124 ++++++++++------ core/schema/localai.go | 5 + docs/content/features/model-gallery.md | 9 ++ pkg/downloader/uri.go | 62 ++++++++ pkg/downloader/uri_test.go | 83 +++++++++++ pkg/vram/cache.go | 96 +++++++++++++ pkg/vram/estimate.go | 152 ++++++++++++++++++++ pkg/vram/estimate_test.go | 137 ++++++++++++++++++ pkg/vram/gguf_reader.go | 46 ++++++ pkg/vram/types.go | 42 ++++++ pkg/vram/vram_suite_test.go | 13 ++ 15 files changed, 872 insertions(+), 60 deletions(-) create mode 100644 pkg/vram/cache.go create mode 100644 pkg/vram/estimate.go create mode 100644 pkg/vram/estimate_test.go create mode 100644 pkg/vram/gguf_reader.go create mode 100644 pkg/vram/types.go create mode 100644 pkg/vram/vram_suite_test.go diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go index 9d8926c0a..7fb03c617 100644 --- a/core/http/endpoints/localai/import_model.go +++ b/core/http/endpoints/localai/import_model.go @@ -1,6 +1,7 @@ package localai import ( + "context" "encoding/json" "fmt" "io" @@ -8,6 +9,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/google/uuid" "github.com/labstack/echo/v4" @@ -18,6 +20,7 @@ import ( "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/pkg/utils" + "github.com/mudler/LocalAI/pkg/vram" "gopkg.in/yaml.v3" ) @@ -37,6 +40,31 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl return fmt.Errorf("failed to discover model config: %w", err) } + resp := schema.GalleryResponse{ + StatusURL: fmt.Sprintf("%smodels/jobs/%s", httpUtils.BaseURL(c), ""), + } + + if len(modelConfig.Files) > 0 { + files := make([]vram.FileInput, 0, len(modelConfig.Files)) + for _, f := range modelConfig.Files { + files = append(files, vram.FileInput{URI: f.URI, Size: 0}) + } + estCtx, cancel := context.WithTimeout(c.Request().Context(), 5*time.Second) + defer cancel() + opts := vram.EstimateOptions{ContextLength: 8192} + result, err := vram.Estimate(estCtx, files, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader()) + if err == nil { + if result.SizeBytes > 0 { + resp.EstimatedSizeBytes = result.SizeBytes + resp.EstimatedSizeDisplay = result.SizeDisplay + } + if result.VRAMBytes > 0 { + resp.EstimatedVRAMBytes = result.VRAMBytes + resp.EstimatedVRAMDisplay = result.VRAMDisplay + } + } + } + uuid, err := uuid.NewUUID() if err != nil { return err @@ -63,10 +91,9 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl BackendGalleries: appConfig.BackendGalleries, } - return c.JSON(200, schema.GalleryResponse{ - ID: uuid.String(), - StatusURL: fmt.Sprintf("%smodels/jobs/%s", httpUtils.BaseURL(c), uuid.String()), - }) + resp.ID = uuid.String() + resp.StatusURL = fmt.Sprintf("%smodels/jobs/%s", httpUtils.BaseURL(c), uuid.String()) + return c.JSON(200, resp) } } diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index 208f8dc45..4dc73a823 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -8,9 +8,12 @@ import ( "math" "net/http" "net/url" + "path" "sort" "strconv" "strings" + "sync" + "time" "github.com/google/uuid" "github.com/labstack/echo/v4" @@ -22,6 +25,7 @@ import ( "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/pkg/model" + "github.com/mudler/LocalAI/pkg/vram" "github.com/mudler/LocalAI/pkg/xsysinfo" "github.com/mudler/xlog" ) @@ -263,6 +267,22 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model modelsJSON := make([]map[string]interface{}, 0, len(models)) seenIDs := make(map[string]bool) + weightExts := map[string]bool{".gguf": true, ".safetensors": true, ".bin": true, ".pt": true} + hasWeightFiles := func(files []gallery.File) bool { + for _, f := range files { + ext := strings.ToLower(path.Ext(path.Base(f.URI))) + if weightExts[ext] { + return true + } + } + return false + } + + const estimateTimeout = 3 * time.Second + const estimateConcurrency = 3 + sem := make(chan struct{}, estimateConcurrency) + var wg sync.WaitGroup + for _, m := range models { modelID := m.ID() @@ -286,7 +306,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model _, trustRemoteCodeExists := m.Overrides["trust_remote_code"] - modelsJSON = append(modelsJSON, map[string]interface{}{ + obj := map[string]interface{}{ "id": modelID, "name": m.Name, "description": m.Description, @@ -301,9 +321,48 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model "isDeletion": isDeletionOp, "trustRemoteCode": trustRemoteCodeExists, "additionalFiles": m.AdditionalFiles, - }) + } + + if hasWeightFiles(m.AdditionalFiles) { + files := make([]gallery.File, len(m.AdditionalFiles)) + copy(files, m.AdditionalFiles) + wg.Add(1) + go func(files []gallery.File, out map[string]interface{}) { + defer wg.Done() + sem <- struct{}{} + defer func() { <-sem }() + inputs := make([]vram.FileInput, 0, len(files)) + for _, f := range files { + ext := strings.ToLower(path.Ext(path.Base(f.URI))) + if weightExts[ext] { + inputs = append(inputs, vram.FileInput{URI: f.URI, Size: 0}) + } + } + if len(inputs) == 0 { + return + } + ctx, cancel := context.WithTimeout(context.Background(), estimateTimeout) + defer cancel() + opts := vram.EstimateOptions{ContextLength: 8192} + result, err := vram.Estimate(ctx, inputs, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader()) + if err == nil { + if result.SizeBytes > 0 { + out["estimated_size_bytes"] = result.SizeBytes + out["estimated_size_display"] = result.SizeDisplay + } + if result.VRAMBytes > 0 { + out["estimated_vram_bytes"] = result.VRAMBytes + out["estimated_vram_display"] = result.VRAMDisplay + } + } + }(files, obj) + } + + modelsJSON = append(modelsJSON, obj) } + wg.Wait() + prevPage := pageNum - 1 nextPage := pageNum + 1 if prevPage < 1 { @@ -318,10 +377,6 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY) installedModelsCount := len(modelConfigs) + len(modelsWithoutConfig) - // Calculate storage size and RAM info - modelsPath := appConfig.SystemState.Model.ModelsPath - storageSize, _ := getDirectorySize(modelsPath) - ramInfo, _ := xsysinfo.GetSystemRAMInfo() return c.JSON(200, map[string]interface{}{ @@ -332,7 +387,6 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model "taskTypes": taskTypes, "availableModels": totalModels, "installedModels": installedModelsCount, - "storageSize": storageSize, "ramTotal": ramInfo.Total, "ramUsed": ramInfo.Used, "ramUsagePercent": ramInfo.UsagePercent, @@ -967,12 +1021,15 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model watchdogInterval = appConfig.WatchDogInterval.String() } + storageSize, _ := getDirectorySize(appConfig.SystemState.Model.ModelsPath) + response := map[string]interface{}{ "type": resourceInfo.Type, // "gpu" or "ram" "available": resourceInfo.Available, "gpus": resourceInfo.GPUs, "ram": resourceInfo.RAM, "aggregate": resourceInfo.Aggregate, + "storage_size": storageSize, "reclaimer_enabled": appConfig.MemoryReclaimerEnabled, "reclaimer_threshold": appConfig.MemoryReclaimerThreshold, "watchdog_interval": watchdogInterval, diff --git a/core/http/views/manage.html b/core/http/views/manage.html index b54018c24..80ed7a630 100644 --- a/core/http/views/manage.html +++ b/core/http/views/manage.html @@ -141,6 +141,15 @@ + + diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html index 7aaeeada1..08cd58cf1 100644 --- a/core/http/views/model-editor.html +++ b/core/http/views/model-editor.html @@ -59,6 +59,26 @@
+ +
+

+ + Estimated requirements +

+
+ + + Download size: + + + + VRAM: + +
+
+
repositories
-
-
- - storage -
-
- - Storage exceeds RAM! -
Import Model @@ -186,7 +177,7 @@ -
+
@@ -200,6 +191,21 @@

No models found matching your criteria

+ +
+
+ + + + +

Loading page...

+
+
+
@@ -218,26 +224,7 @@
Description - -
- Repository - -
- - -
- License - -
- + Size / VRAM @@ -284,21 +271,26 @@
- + - - - - - - - - - - - - - - +
+ + + - +
@@ -414,6 +406,36 @@
+ +