mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-16 20:52:08 -04:00
* feat: Rework VRAM estimation and use known_usecases in gallery Signed-off-by: Richard Palethorpe <io@richiejp.com> Assisted-by: Claude:claude-opus-4-7[1m] [Claude Code] * chore(gallery): regenerate gallery index and add known_usecases to model entries Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
132 lines
3.7 KiB
Go
132 lines
3.7 KiB
Go
package modeladmin
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/mudler/LocalAI/core/config"
|
|
"github.com/mudler/LocalAI/pkg/system"
|
|
"github.com/mudler/LocalAI/pkg/vram"
|
|
)
|
|
|
|
// VRAMRequest is the input for EstimateVRAM. JSON tags let the HTTP
|
|
// handler bind directly into this type instead of carrying a parallel
|
|
// private struct.
|
|
type VRAMRequest struct {
|
|
Model string `json:"model"`
|
|
ContextSize uint32 `json:"context_size,omitempty"`
|
|
GPULayers int `json:"gpu_layers,omitempty"`
|
|
KVQuantBits int `json:"kv_quant_bits,omitempty"`
|
|
}
|
|
|
|
// VRAMResponse embeds vram.EstimateResult and adds the context-defaulted
|
|
// note fields the HTTP endpoint surfaces.
|
|
type VRAMResponse struct {
|
|
vram.EstimateResult
|
|
ContextNote string `json:"context_note,omitempty"`
|
|
ModelMaxContext uint64 `json:"model_max_context,omitempty"`
|
|
}
|
|
|
|
// EstimateVRAM computes a VRAM estimate for an installed model. It mirrors
|
|
// VRAMEstimateEndpoint without any HTTP coupling.
|
|
func EstimateVRAM(ctx context.Context, req VRAMRequest, cl *config.ModelConfigLoader, sysState *system.SystemState) (*VRAMResponse, error) {
|
|
if req.Model == "" {
|
|
return nil, ErrNameRequired
|
|
}
|
|
cfg, exists := cl.GetModelConfig(req.Model)
|
|
if !exists {
|
|
return nil, ErrNotFound
|
|
}
|
|
modelsPath := sysState.Model.ModelsPath
|
|
|
|
var files []vram.FileInput
|
|
seen := make(map[string]bool)
|
|
|
|
for _, f := range cfg.DownloadFiles {
|
|
addWeightFile(string(f.URI), modelsPath, &files, seen)
|
|
}
|
|
if cfg.Model != "" {
|
|
addWeightFile(cfg.Model, modelsPath, &files, seen)
|
|
}
|
|
if cfg.MMProj != "" {
|
|
addWeightFile(cfg.MMProj, modelsPath, &files, seen)
|
|
}
|
|
|
|
if len(files) == 0 {
|
|
// No weight files: the caller (HTTP or MCP) reports this as a
|
|
// non-error empty estimate. Returning a typed nil here lets both
|
|
// layers format the message consistently.
|
|
return &VRAMResponse{ContextNote: "no weight files found for estimation"}, nil
|
|
}
|
|
|
|
contextDefaulted := false
|
|
ctxLen := req.ContextSize
|
|
if ctxLen == 0 {
|
|
if cfg.ContextSize != nil {
|
|
ctxLen = uint32(*cfg.ContextSize)
|
|
} else {
|
|
ctxLen = 8192
|
|
contextDefaulted = true
|
|
}
|
|
}
|
|
|
|
opts := vram.EstimateOptions{
|
|
GPULayers: req.GPULayers,
|
|
KVQuantBits: req.KVQuantBits,
|
|
}
|
|
|
|
subCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
|
defer cancel()
|
|
|
|
multi, err := vram.EstimateMultiContext(subCtx, files, []uint32{ctxLen}, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader())
|
|
if err != nil {
|
|
return nil, fmt.Errorf("vram estimate: %w", err)
|
|
}
|
|
|
|
at := multi.Estimates[fmt.Sprint(ctxLen)]
|
|
resp := &VRAMResponse{
|
|
EstimateResult: vram.EstimateResult{
|
|
SizeBytes: multi.SizeBytes,
|
|
SizeDisplay: multi.SizeDisplay,
|
|
ContextLength: at.ContextLength,
|
|
VRAMBytes: at.VRAMBytes,
|
|
VRAMDisplay: at.VRAMDisplay,
|
|
},
|
|
ModelMaxContext: multi.ModelMaxContext,
|
|
}
|
|
|
|
if contextDefaulted && multi.ModelMaxContext > 0 {
|
|
resp.ContextNote = fmt.Sprintf(
|
|
"Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.",
|
|
multi.ModelMaxContext,
|
|
)
|
|
}
|
|
return resp, nil
|
|
}
|
|
|
|
// resolveModelURI converts a relative model path to a file:// URI so the
|
|
// size resolver can stat it on disk. URIs that already have a scheme are
|
|
// returned unchanged.
|
|
func resolveModelURI(uri, modelsPath string) string {
|
|
if strings.Contains(uri, "://") {
|
|
return uri
|
|
}
|
|
return "file://" + filepath.Join(modelsPath, uri)
|
|
}
|
|
|
|
// addWeightFile appends a resolved weight file to files.
|
|
func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, seen map[string]bool) {
|
|
if !vram.IsWeightFile(uri) {
|
|
return
|
|
}
|
|
resolved := resolveModelURI(uri, modelsPath)
|
|
if seen[resolved] {
|
|
return
|
|
}
|
|
seen[resolved] = true
|
|
*files = append(*files, vram.FileInput{URI: resolved, Size: 0})
|
|
}
|