mirror of
https://github.com/kopia/kopia.git
synced 2026-01-23 13:58:08 -05:00
363 lines
9.1 KiB
Go
363 lines
9.1 KiB
Go
// Package cache implements durable on-disk cache with LRU expiration.
|
|
package cache
|
|
|
|
import (
|
|
"container/heap"
|
|
"context"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
lru "github.com/hashicorp/golang-lru"
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/kopia/kopia/internal/clock"
|
|
"github.com/kopia/kopia/internal/ctxutil"
|
|
"github.com/kopia/kopia/internal/gather"
|
|
"github.com/kopia/kopia/internal/releasable"
|
|
"github.com/kopia/kopia/internal/timetrack"
|
|
"github.com/kopia/kopia/repo/blob"
|
|
"github.com/kopia/kopia/repo/logging"
|
|
)
|
|
|
|
var log = logging.Module("cache")
|
|
|
|
const (
|
|
// DefaultSweepFrequency is how frequently the contents of cache are sweeped to remove excess data.
|
|
DefaultSweepFrequency = 1 * time.Minute
|
|
|
|
// DefaultTouchThreshold specifies the resolution of timestamps used to determine which cache items
|
|
// to expire. This helps cache storage writes on frequently accessed items.
|
|
DefaultTouchThreshold = 10 * time.Minute
|
|
|
|
// Size of the mutex cache LRU.
|
|
// In case a mutex is evicted of the cache, the impact will be some redundant read,
|
|
// which given the size should be extremely rare.
|
|
mutexCacheSize = 10000
|
|
)
|
|
|
|
// PersistentCache provides persistent on-disk cache.
|
|
type PersistentCache struct {
|
|
// +checkatomic
|
|
anyChange int32
|
|
|
|
cacheStorage Storage
|
|
storageProtection StorageProtection
|
|
sweep SweepSettings
|
|
|
|
description string
|
|
|
|
periodicSweepRunning sync.WaitGroup
|
|
periodicSweepClosed chan struct{}
|
|
|
|
mutexCache *lru.Cache
|
|
}
|
|
|
|
// CacheStorage returns cache storage.
|
|
func (c *PersistentCache) CacheStorage() Storage {
|
|
return c.cacheStorage
|
|
}
|
|
|
|
// GetFetchingMutex returns a RWMutex used to lock a blob or content during loading.
|
|
func (c *PersistentCache) GetFetchingMutex(key string) *sync.RWMutex {
|
|
if v, ok := c.mutexCache.Get(key); ok {
|
|
// nolint:forcetypeassert
|
|
return v.(*sync.RWMutex)
|
|
}
|
|
|
|
newVal := &sync.RWMutex{}
|
|
|
|
if prevVal, ok, _ := c.mutexCache.PeekOrAdd(key, newVal); ok {
|
|
// nolint:forcetypeassert
|
|
return prevVal.(*sync.RWMutex)
|
|
}
|
|
|
|
return newVal
|
|
}
|
|
|
|
// GetOrLoad is utility function gets the provided item from the cache or invokes the provided fetch function.
|
|
// The function also appends and verifies HMAC checksums using provided secret on all cached items to ensure data integrity.
|
|
func (c *PersistentCache) GetOrLoad(ctx context.Context, key string, fetch func(output *gather.WriteBuffer) error, output *gather.WriteBuffer) error {
|
|
if c == nil {
|
|
// special case - also works on non-initialized cache pointer.
|
|
return fetch(output)
|
|
}
|
|
|
|
if c.GetFull(ctx, key, output) {
|
|
return nil
|
|
}
|
|
|
|
output.Reset()
|
|
|
|
mut := c.GetFetchingMutex(key)
|
|
mut.Lock()
|
|
defer mut.Unlock()
|
|
|
|
// check again while holding the mutex
|
|
if c.GetFull(ctx, key, output) {
|
|
return nil
|
|
}
|
|
|
|
if err := fetch(output); err != nil {
|
|
reportMissError()
|
|
|
|
return err
|
|
}
|
|
|
|
reportMissBytes(int64(output.Length()))
|
|
|
|
c.Put(ctx, key, output.Bytes())
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetFull fetches the contents of a full blob. Returns false if not found.
|
|
func (c *PersistentCache) GetFull(ctx context.Context, key string, output *gather.WriteBuffer) bool {
|
|
return c.GetPartial(ctx, key, 0, -1, output)
|
|
}
|
|
|
|
// GetPartial fetches the contents of a cached blob when (length < 0) or a subset of it (when length >= 0).
|
|
// returns false if not found.
|
|
func (c *PersistentCache) GetPartial(ctx context.Context, key string, offset, length int64, output *gather.WriteBuffer) bool {
|
|
if c == nil {
|
|
return false
|
|
}
|
|
|
|
var tmp gather.WriteBuffer
|
|
defer tmp.Close()
|
|
|
|
if err := c.cacheStorage.GetBlob(ctx, blob.ID(key), offset, length, &tmp); err == nil {
|
|
prot := c.storageProtection
|
|
if length >= 0 {
|
|
// only full items have protection.
|
|
prot = nullStorageProtection{}
|
|
}
|
|
|
|
if err := prot.Verify(key, tmp.Bytes(), output); err == nil {
|
|
// cache hit
|
|
reportHitBytes(int64(output.Length()))
|
|
|
|
// cache hit
|
|
c.cacheStorage.TouchBlob(ctx, blob.ID(key), c.sweep.TouchThreshold) //nolint:errcheck
|
|
|
|
return true
|
|
}
|
|
|
|
// delete invalid blob
|
|
reportMalformedData()
|
|
|
|
if err := c.cacheStorage.DeleteBlob(ctx, blob.ID(key)); err != nil && !errors.Is(err, blob.ErrBlobNotFound) {
|
|
log(ctx).Errorf("unable to delete %v entry %v: %v", c.description, key, err)
|
|
}
|
|
}
|
|
|
|
// cache miss
|
|
l := length
|
|
if l < 0 {
|
|
l = 0
|
|
}
|
|
|
|
reportMissBytes(l)
|
|
|
|
return false
|
|
}
|
|
|
|
// Put adds the provided key-value pair to the cache.
|
|
func (c *PersistentCache) Put(ctx context.Context, key string, data gather.Bytes) {
|
|
if c == nil {
|
|
return
|
|
}
|
|
|
|
atomic.StoreInt32(&c.anyChange, 1)
|
|
|
|
var protected gather.WriteBuffer
|
|
defer protected.Close()
|
|
|
|
c.storageProtection.Protect(key, data, &protected)
|
|
|
|
if err := c.cacheStorage.PutBlob(ctx, blob.ID(key), protected.Bytes(), blob.PutOptions{}); err != nil {
|
|
reportStoreError()
|
|
|
|
log(ctx).Errorf("unable to add %v to %v: %v", key, c.description, err)
|
|
}
|
|
}
|
|
|
|
// Close closes the instance of persistent cache possibly waiting for at least one sweep to complete.
|
|
func (c *PersistentCache) Close(ctx context.Context) {
|
|
if c == nil {
|
|
return
|
|
}
|
|
|
|
close(c.periodicSweepClosed)
|
|
c.periodicSweepRunning.Wait()
|
|
|
|
// if we added anything to the cache in this sesion, run one last sweep before shutting down.
|
|
if atomic.LoadInt32(&c.anyChange) == 1 {
|
|
if err := c.sweepDirectory(ctx); err != nil {
|
|
log(ctx).Errorf("error during final sweep of the %v: %v", c.description, err)
|
|
}
|
|
}
|
|
|
|
releasable.Released("persistent-cache", c)
|
|
}
|
|
|
|
func (c *PersistentCache) sweepDirectoryPeriodically(ctx context.Context) {
|
|
defer c.periodicSweepRunning.Done()
|
|
|
|
for {
|
|
select {
|
|
case <-c.periodicSweepClosed:
|
|
return
|
|
|
|
case <-time.After(c.sweep.SweepFrequency):
|
|
if err := c.sweepDirectory(ctx); err != nil {
|
|
log(ctx).Errorf("error during periodic sweep of %v: %v", c.description, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// A contentMetadataHeap implements heap.Interface and holds blob.Metadata.
|
|
type contentMetadataHeap []blob.Metadata
|
|
|
|
func (h contentMetadataHeap) Len() int { return len(h) }
|
|
|
|
func (h contentMetadataHeap) Less(i, j int) bool {
|
|
return h[i].Timestamp.Before(h[j].Timestamp)
|
|
}
|
|
|
|
func (h contentMetadataHeap) Swap(i, j int) {
|
|
h[i], h[j] = h[j], h[i]
|
|
}
|
|
|
|
func (h *contentMetadataHeap) Push(x interface{}) {
|
|
*h = append(*h, x.(blob.Metadata)) // nolint:forcetypeassert
|
|
}
|
|
|
|
func (h *contentMetadataHeap) Pop() interface{} {
|
|
old := *h
|
|
n := len(old)
|
|
item := old[n-1]
|
|
*h = old[0 : n-1]
|
|
|
|
return item
|
|
}
|
|
|
|
func (c *PersistentCache) sweepDirectory(ctx context.Context) (err error) {
|
|
timer := timetrack.StartTimer()
|
|
|
|
var h contentMetadataHeap
|
|
|
|
var (
|
|
totalRetainedSize int64
|
|
tooRecentBytes int64
|
|
tooRecentCount int
|
|
)
|
|
|
|
err = c.cacheStorage.ListBlobs(ctx, "", func(it blob.Metadata) error {
|
|
// ignore items below minimal age.
|
|
if age := clock.Now().Sub(it.Timestamp); age < c.sweep.MinSweepAge {
|
|
tooRecentCount++
|
|
tooRecentBytes += it.Length
|
|
|
|
return nil
|
|
}
|
|
|
|
heap.Push(&h, it)
|
|
totalRetainedSize += it.Length
|
|
|
|
if totalRetainedSize > c.sweep.MaxSizeBytes {
|
|
oldest := heap.Pop(&h).(blob.Metadata) //nolint:forcetypeassert
|
|
if delerr := c.cacheStorage.DeleteBlob(ctx, oldest.BlobID); delerr != nil {
|
|
log(ctx).Errorf("unable to remove %v: %v", oldest.BlobID, delerr)
|
|
} else {
|
|
totalRetainedSize -= oldest.Length
|
|
}
|
|
}
|
|
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error listing %v", c.description)
|
|
}
|
|
|
|
dur := timer.Elapsed()
|
|
|
|
const hundredPercent = 100
|
|
|
|
inUsePercent := int64(hundredPercent)
|
|
|
|
if c.sweep.MaxSizeBytes != 0 {
|
|
inUsePercent = hundredPercent * totalRetainedSize / c.sweep.MaxSizeBytes
|
|
}
|
|
|
|
log(ctx).Debugw(
|
|
"finished sweeping",
|
|
"cache", c.description,
|
|
"duration", dur,
|
|
"totalRetainedSize", totalRetainedSize,
|
|
"tooRecentBytes", tooRecentBytes,
|
|
"tooRecentCount", tooRecentCount,
|
|
"maxSizeBytes", c.sweep.MaxSizeBytes,
|
|
"inUsePercent", inUsePercent,
|
|
)
|
|
|
|
return nil
|
|
}
|
|
|
|
// SweepSettings encapsulates settings that impact cache item sweep/expiration.
|
|
type SweepSettings struct {
|
|
MaxSizeBytes int64
|
|
SweepFrequency time.Duration
|
|
MinSweepAge time.Duration
|
|
TouchThreshold time.Duration
|
|
}
|
|
|
|
func (s SweepSettings) applyDefaults() SweepSettings {
|
|
if s.TouchThreshold == 0 {
|
|
s.TouchThreshold = DefaultTouchThreshold
|
|
}
|
|
|
|
if s.SweepFrequency == 0 {
|
|
s.SweepFrequency = DefaultSweepFrequency
|
|
}
|
|
|
|
return s
|
|
}
|
|
|
|
// NewPersistentCache creates the persistent cache in the provided storage.
|
|
func NewPersistentCache(ctx context.Context, description string, cacheStorage Storage, storageProtection StorageProtection, sweep SweepSettings) (*PersistentCache, error) {
|
|
if cacheStorage == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
sweep = sweep.applyDefaults()
|
|
|
|
if storageProtection == nil {
|
|
storageProtection = NoProtection()
|
|
}
|
|
|
|
c := &PersistentCache{
|
|
cacheStorage: cacheStorage,
|
|
sweep: sweep,
|
|
periodicSweepClosed: make(chan struct{}),
|
|
description: description,
|
|
storageProtection: storageProtection,
|
|
}
|
|
|
|
c.mutexCache, _ = lru.New(mutexCacheSize)
|
|
|
|
// verify that cache storage is functional by listing from it
|
|
if _, err := c.cacheStorage.GetMetadata(ctx, "test-blob"); err != nil && !errors.Is(err, blob.ErrBlobNotFound) {
|
|
return nil, errors.Wrapf(err, "unable to open %v", c.description)
|
|
}
|
|
|
|
releasable.Created("persistent-cache", c)
|
|
|
|
c.periodicSweepRunning.Add(1)
|
|
|
|
go c.sweepDirectoryPeriodically(ctxutil.Detach(ctx))
|
|
|
|
return c, nil
|
|
}
|