Files
LocalAI/pkg/vram/cache.go
Ettore Di Giacinto 983db7bedc feat(ui): add model size estimation (#8684)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
2026-02-28 23:03:47 +01:00

97 lines
2.8 KiB
Go

package vram
import (
"context"
"sync"
"time"
)
const defaultEstimateCacheTTL = 15 * time.Minute
type sizeCacheEntry struct {
size int64
err error
until time.Time
}
type cachedSizeResolver struct {
underlying SizeResolver
ttl time.Duration
mu sync.Mutex
cache map[string]sizeCacheEntry
}
func (c *cachedSizeResolver) ContentLength(ctx context.Context, uri string) (int64, error) {
c.mu.Lock()
e, ok := c.cache[uri]
c.mu.Unlock()
if ok && time.Now().Before(e.until) {
return e.size, e.err
}
size, err := c.underlying.ContentLength(ctx, uri)
c.mu.Lock()
if c.cache == nil {
c.cache = make(map[string]sizeCacheEntry)
}
c.cache[uri] = sizeCacheEntry{size: size, err: err, until: time.Now().Add(c.ttl)}
c.mu.Unlock()
return size, err
}
type ggufCacheEntry struct {
meta *GGUFMeta
err error
until time.Time
}
type cachedGGUFReader struct {
underlying GGUFMetadataReader
ttl time.Duration
mu sync.Mutex
cache map[string]ggufCacheEntry
}
func (c *cachedGGUFReader) ReadMetadata(ctx context.Context, uri string) (*GGUFMeta, error) {
c.mu.Lock()
e, ok := c.cache[uri]
c.mu.Unlock()
if ok && time.Now().Before(e.until) {
return e.meta, e.err
}
meta, err := c.underlying.ReadMetadata(ctx, uri)
c.mu.Lock()
if c.cache == nil {
c.cache = make(map[string]ggufCacheEntry)
}
c.cache[uri] = ggufCacheEntry{meta: meta, err: err, until: time.Now().Add(c.ttl)}
c.mu.Unlock()
return meta, err
}
// CachedSizeResolver returns a SizeResolver that caches ContentLength results by URI for the given TTL.
func CachedSizeResolver(underlying SizeResolver, ttl time.Duration) SizeResolver {
return &cachedSizeResolver{underlying: underlying, ttl: ttl, cache: make(map[string]sizeCacheEntry)}
}
// CachedGGUFReader returns a GGUFMetadataReader that caches ReadMetadata results by URI for the given TTL.
func CachedGGUFReader(underlying GGUFMetadataReader, ttl time.Duration) GGUFMetadataReader {
return &cachedGGUFReader{underlying: underlying, ttl: ttl, cache: make(map[string]ggufCacheEntry)}
}
// DefaultCachedSizeResolver returns a cached SizeResolver using the default implementation and default TTL (15 min).
// A single shared cache is used so repeated HEAD requests for the same URI are avoided across requests.
func DefaultCachedSizeResolver() SizeResolver {
return defaultCachedSizeResolver
}
// DefaultCachedGGUFReader returns a cached GGUFMetadataReader using the default implementation and default TTL (15 min).
// A single shared cache is used so repeated GGUF metadata fetches for the same URI are avoided across requests.
func DefaultCachedGGUFReader() GGUFMetadataReader {
return defaultCachedGGUFReader
}
var (
defaultCachedSizeResolver = CachedSizeResolver(defaultSizeResolver{}, defaultEstimateCacheTTL)
defaultCachedGGUFReader = CachedGGUFReader(defaultGGUFReader{}, defaultEstimateCacheTTL)
)