package modeladmin import ( "context" "fmt" "path/filepath" "strings" "time" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/pkg/system" "github.com/mudler/LocalAI/pkg/vram" ) // VRAMRequest is the input for EstimateVRAM. JSON tags let the HTTP // handler bind directly into this type instead of carrying a parallel // private struct. type VRAMRequest struct { Model string `json:"model"` ContextSize uint32 `json:"context_size,omitempty"` GPULayers int `json:"gpu_layers,omitempty"` KVQuantBits int `json:"kv_quant_bits,omitempty"` } // VRAMResponse embeds vram.EstimateResult and adds the context-defaulted // note fields the HTTP endpoint surfaces. type VRAMResponse struct { vram.EstimateResult ContextNote string `json:"context_note,omitempty"` ModelMaxContext uint64 `json:"model_max_context,omitempty"` } // EstimateVRAM computes a VRAM estimate for an installed model. It mirrors // VRAMEstimateEndpoint without any HTTP coupling. func EstimateVRAM(ctx context.Context, req VRAMRequest, cl *config.ModelConfigLoader, sysState *system.SystemState) (*VRAMResponse, error) { if req.Model == "" { return nil, ErrNameRequired } cfg, exists := cl.GetModelConfig(req.Model) if !exists { return nil, ErrNotFound } modelsPath := sysState.Model.ModelsPath var files []vram.FileInput var firstGGUF string seen := make(map[string]bool) for _, f := range cfg.DownloadFiles { addWeightFile(string(f.URI), modelsPath, &files, &firstGGUF, seen) } if cfg.Model != "" { addWeightFile(cfg.Model, modelsPath, &files, &firstGGUF, seen) } if cfg.MMProj != "" { addWeightFile(cfg.MMProj, modelsPath, &files, &firstGGUF, seen) } if len(files) == 0 { // No weight files: the caller (HTTP or MCP) reports this as a // non-error empty estimate. Returning a typed nil here lets both // layers format the message consistently. return &VRAMResponse{ContextNote: "no weight files found for estimation"}, nil } contextDefaulted := false opts := vram.EstimateOptions{ ContextLength: req.ContextSize, GPULayers: req.GPULayers, KVQuantBits: req.KVQuantBits, } if opts.ContextLength == 0 { if cfg.ContextSize != nil { opts.ContextLength = uint32(*cfg.ContextSize) } else { opts.ContextLength = 8192 contextDefaulted = true } } subCtx, cancel := context.WithTimeout(ctx, 10*time.Second) defer cancel() result, err := vram.Estimate(subCtx, files, opts, vram.DefaultCachedSizeResolver(), vram.DefaultCachedGGUFReader()) if err != nil { return nil, fmt.Errorf("vram estimate: %w", err) } resp := &VRAMResponse{EstimateResult: result} if contextDefaulted && firstGGUF != "" { ggufMeta, err := vram.DefaultCachedGGUFReader().ReadMetadata(subCtx, firstGGUF) if err == nil && ggufMeta != nil && ggufMeta.MaximumContextLength > 0 { resp.ModelMaxContext = ggufMeta.MaximumContextLength resp.ContextNote = fmt.Sprintf( "Estimate used default context_size=8192. The model's trained maximum context is %d; VRAM usage will be higher at larger context sizes.", ggufMeta.MaximumContextLength, ) } } return resp, nil } // resolveModelURI converts a relative model path to a file:// URI so the // size resolver can stat it on disk. URIs that already have a scheme are // returned unchanged. func resolveModelURI(uri, modelsPath string) string { if strings.Contains(uri, "://") { return uri } return "file://" + filepath.Join(modelsPath, uri) } // addWeightFile appends a resolved weight file to files and tracks the first GGUF. func addWeightFile(uri, modelsPath string, files *[]vram.FileInput, firstGGUF *string, seen map[string]bool) { if !vram.IsWeightFile(uri) { return } resolved := resolveModelURI(uri, modelsPath) if seen[resolved] { return } seen[resolved] = true *files = append(*files, vram.FileInput{URI: resolved, Size: 0}) if *firstGGUF == "" && vram.IsGGUF(uri) { *firstGGUF = resolved } }