Files
kopia/repo/content/content_cache_metadata.go
Jarek Kowalski 35d0f31c0d huge: replaced the use of allocated byte slices with populating gather.WriteBuffer in the repository (#1244)
This helps recycle buffers more efficiently during snapshots.
Also, improved memory tracking, enabled profiling flags and added pprof
by default.
2021-08-20 08:45:10 -07:00

143 lines
3.5 KiB
Go

package content
import (
"context"
"hash/fnv"
"io"
"sync"
"github.com/pkg/errors"
"go.opencensus.io/stats"
"golang.org/x/sync/errgroup"
"github.com/kopia/kopia/internal/cache"
"github.com/kopia/kopia/internal/gather"
"github.com/kopia/kopia/repo/blob"
)
const (
metadataCacheSyncParallelism = 16
metadataCacheMutexShards = 256
)
type contentCacheForMetadata struct {
pc *cache.PersistentCache
st blob.Storage
shardedMutexes [metadataCacheMutexShards]sync.Mutex
}
// sync synchronizes metadata cache with all blobs found in the storage.
func (c *contentCacheForMetadata) sync(ctx context.Context) error {
sem := make(chan struct{}, metadataCacheSyncParallelism)
var eg errgroup.Group
// list all blobs and fetch contents into cache in parallel.
if err := c.st.ListBlobs(ctx, PackBlobIDPrefixSpecial, func(bm blob.Metadata) error {
// acquire semaphore
sem <- struct{}{}
eg.Go(func() error {
defer func() {
<-sem
}()
var tmp gather.WriteBuffer
defer tmp.Close()
return c.getContent(ctx, "dummy", bm.BlobID, 0, 1, &tmp)
})
return nil
}); err != nil {
return errors.Wrap(err, "error listing blobs")
}
return errors.Wrap(eg.Wait(), "error synchronizing metadata cache")
}
func (c *contentCacheForMetadata) mutexForBlob(blobID blob.ID) *sync.Mutex {
// hash the blob ID to pick one of the sharded mutexes.
h := fnv.New32()
io.WriteString(h, string(blobID)) //nolint:errcheck
mutexID := h.Sum32() % metadataCacheMutexShards
return &c.shardedMutexes[mutexID]
}
func (c *contentCacheForMetadata) getContent(ctx context.Context, cacheKey cacheKey, blobID blob.ID, offset, length int64, output *gather.WriteBuffer) error {
// try getting from cache first
if c.pc.Get(ctx, string(blobID), offset, length, output) {
return nil
}
m := c.mutexForBlob(blobID)
m.Lock()
defer m.Unlock()
var blobData gather.WriteBuffer
defer blobData.Close()
// read the entire blob
err := c.st.GetBlob(ctx, blobID, 0, -1, &blobData)
if err != nil {
stats.Record(ctx, cache.MetricMissErrors.M(1))
} else {
stats.Record(ctx, cache.MetricMissBytes.M(int64(blobData.Length())))
}
if errors.Is(err, blob.ErrBlobNotFound) {
// not found in underlying storage
// nolint:wrapcheck
return err
}
if err != nil {
// nolint:wrapcheck
return err
}
// store the whole blob in the cache.
c.pc.Put(ctx, string(blobID), blobData.Bytes())
if offset == 0 && length == -1 {
_, err := blobData.Bytes().WriteTo(output)
return errors.Wrap(err, "error copying results")
}
if offset < 0 || offset+length > int64(blobData.Length()) {
return errors.Errorf("invalid (offset=%v,length=%v) for blob %q of size %v", offset, length, blobID, blobData.Length())
}
output.Reset()
if err := blobData.AppendSectionTo(output, int(offset), int(length)); err != nil {
// should never happen
return errors.Wrap(err, "error appending to result")
}
return nil
}
func (c *contentCacheForMetadata) close(ctx context.Context) {
c.pc.Close(ctx)
}
func newContentCacheForMetadata(ctx context.Context, st blob.Storage, cacheStorage cache.Storage, maxSizeBytes int64) (contentCache, error) {
if cacheStorage == nil {
return passthroughContentCache{st}, nil
}
pc, err := cache.NewPersistentCache(ctx, "metadata cache", cacheStorage, cache.NoProtection(), maxSizeBytes, cache.DefaultTouchThreshold, cache.DefaultSweepFrequency)
if err != nil {
return nil, errors.Wrap(err, "unable to create base cache")
}
return &contentCacheForMetadata{
st: st,
pc: pc,
}, nil
}