mirror of
https://github.com/mudler/LocalAI.git
synced 2026-05-17 21:21:23 -04:00
feat(gallery): Speed up load times and clean gallery entries (#9211)
* feat: Rework VRAM estimation and use known_usecases in gallery Signed-off-by: Richard Palethorpe <io@richiejp.com> Assisted-by: Claude:claude-opus-4-7[1m] [Claude Code] * chore(gallery): regenerate gallery index and add known_usecases to model entries Signed-off-by: Richard Palethorpe <io@richiejp.com> --------- Signed-off-by: Richard Palethorpe <io@richiejp.com>
This commit is contained in:
committed by
GitHub
parent
6d56bf98fe
commit
969005b2a1
@@ -43,17 +43,16 @@ func EstimateVRAM(ctx context.Context, req VRAMRequest, cl *config.ModelConfigLo
|
||||
modelsPath := sysState.Model.ModelsPath
|
||||
|
||||
var files []vram.FileInput
|
||||
var firstGGUF string
|
||||
seen := make(map[string]bool)
|
||||
|
||||
for _, f := range cfg.DownloadFiles {
|
||||
addWeightFile(string(f.URI), modelsPath, &files, &firstGGUF, seen)
|
||||
addWeightFile(string(f.URI), modelsPath, &files, seen)
|
||||
}
|
||||
if cfg.Model != "" {
|
||||
addWeightFile(cfg.Model, modelsPath, &files, &firstGGUF, seen)
|
||||
addWeightFile(cfg.Model, modelsPath, &files, seen)
|
||||
}
|
||||
if cfg.MMProj != "" {
|
||||
addWeightFile(cfg.MMProj, modelsPath, &files, &firstGGUF, seen)
|
||||
addWeightFile(cfg.MMProj, modelsPath, &files, seen)
|
||||
}
|
||||
|
||||
if len(files) == 0 {
|
||||
@@ -64,39 +63,46 @@ func EstimateVRAM(ctx context.Context, req VRAMRequest, cl *config.ModelConfigLo
|
||||
}
|
||||
|
||||
contextDefaulted := false
|
||||
opts := vram.EstimateOptions{
|
||||
ContextLength: req.ContextSize,
|
||||
GPULayers: req.GPULayers,
|
||||
KVQuantBits: req.KVQuantBits,
|
||||
}
|
||||
if opts.ContextLength == 0 {
|
||||
ctxLen := req.ContextSize
|
||||
if ctxLen == 0 {
|
||||
if cfg.ContextSize != nil {
|
||||
opts.ContextLength = uint32(*cfg.ContextSize)
|
||||
ctxLen = uint32(*cfg.ContextSize)
|
||||
} else {
|
||||
opts.ContextLength = 8192
|
||||
ctxLen = 8192
|
||||
contextDefaulted = true
|
||||
}
|
||||
}
|
||||
|
||||
opts := vram.EstimateOptions{
|
||||
GPULayers: req.GPULayers,
|
||||
KVQuantBits: req.KVQuantBits,
|
||||
}
|
||||
|
||||
subCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
result, err := vram.Estimate(subCtx, files, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader())
|
||||
multi, err := vram.EstimateMultiContext(subCtx, files, []uint32{ctxLen}, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("vram estimate: %w", err)
|
||||
}
|
||||
|
||||
resp := &VRAMResponse{EstimateResult: result}
|
||||
at := multi.Estimates[fmt.Sprint(ctxLen)]
|
||||
resp := &VRAMResponse{
|
||||
EstimateResult: vram.EstimateResult{
|
||||
SizeBytes: multi.SizeBytes,
|
||||
SizeDisplay: multi.SizeDisplay,
|
||||
ContextLength: at.ContextLength,
|
||||
VRAMBytes: at.VRAMBytes,
|
||||
VRAMDisplay: at.VRAMDisplay,
|
||||
},
|
||||
ModelMaxContext: multi.ModelMaxContext,
|
||||
}
|
||||
|
||||
if contextDefaulted && firstGGUF != "" {
|
||||
ggufMeta, err := vram.DefaultCachedGGUFReader().ReadMetadata(subCtx, firstGGUF)
|
||||
if err == nil && ggufMeta != nil && ggufMeta.MaximumContextLength > 0 {
|
||||
resp.ModelMaxContext = ggufMeta.MaximumContextLength
|
||||
resp.ContextNote = fmt.Sprintf(
|
||||
"Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.",
|
||||
ggufMeta.MaximumContextLength,
|
||||
)
|
||||
}
|
||||
if contextDefaulted && multi.ModelMaxContext > 0 {
|
||||
resp.ContextNote = fmt.Sprintf(
|
||||
"Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.",
|
||||
multi.ModelMaxContext,
|
||||
)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
@@ -111,8 +117,8 @@ func resolveModelURI(uri, modelsPath string) string {
|
||||
return "file://" + filepath.Join(modelsPath, uri)
|
||||
}
|
||||
|
||||
// addWeightFile appends a resolved weight file to files and tracks the first GGUF.
|
||||
func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, firstGGUF *string, seen map[string]bool) {
|
||||
// addWeightFile appends a resolved weight file to files.
|
||||
func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, seen map[string]bool) {
|
||||
if !vram.IsWeightFile(uri) {
|
||||
return
|
||||
}
|
||||
@@ -122,7 +128,4 @@ func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, firstGGUF *s
|
||||
}
|
||||
seen[resolved] = true
|
||||
*files = append(*files, vram.FileInput{URI: resolved, Size: 0})
|
||||
if *firstGGUF == "" && vram.IsGGUF(uri) {
|
||||
*firstGGUF = resolved
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user