Files
kopia/repo/content/content_prefetch.go
Jarek Kowalski b55d5b474c refactor(repository): refactored internal index read API to reduce memory allocations (#3754)
* refactor(repository): refactored internal index read API to reduce memory allocations

* fixed stress test flake, improved debuggability

* fixed spurious checklocks failures

* post-merge fixes

* pr feedback
2024-04-12 22:59:11 -07:00

140 lines
3.1 KiB
Go

package content
import (
"context"
"math"
"strings"
"sync"
"github.com/kopia/kopia/internal/gather"
"github.com/kopia/kopia/repo/blob"
)
type prefetchOptions struct {
fullBlobPrefetchCountThreshold int
fullBlobPrefetchBytesThreshold int64
}
//nolint:gochecknoglobals
var defaultPrefetchOptions = &prefetchOptions{2, 5e6}
//nolint:gochecknoglobals
var prefetchHintToOptions = map[string]*prefetchOptions{
"": defaultPrefetchOptions,
"default": defaultPrefetchOptions,
"contents": {
fullBlobPrefetchCountThreshold: math.MaxInt,
fullBlobPrefetchBytesThreshold: math.MaxInt64,
},
"blobs": {
fullBlobPrefetchCountThreshold: 0,
fullBlobPrefetchBytesThreshold: 0,
},
}
func (o *prefetchOptions) shouldPrefetchEntireBlob(infos []Info) bool {
if len(infos) < o.fullBlobPrefetchCountThreshold {
return false
}
var total int64
for _, i := range infos {
total += int64(i.PackedLength)
}
return total >= o.fullBlobPrefetchBytesThreshold
}
// PrefetchContents fetches the provided content IDs into the cache.
// Note that due to cache configuration, it's not guaranteed that all contents will
// actually be added to the cache.
//
//nolint:gocyclo
func (bm *WriteManager) PrefetchContents(ctx context.Context, contentIDs []ID, hint string) []ID {
o := prefetchHintToOptions[hint]
if o == nil {
o = defaultPrefetchOptions
}
bm.mu.RLock()
defer bm.mu.RUnlock()
var (
prefetched []ID
// keep track of how contents we'll be fetching from each blob
contentsByBlob = map[blob.ID][]Info{}
)
for _, ci := range contentIDs {
_, bi, _ := bm.getContentInfoReadLocked(ctx, ci)
if bi == (Info{}) {
continue
}
contentsByBlob[bi.PackBlobID] = append(contentsByBlob[bi.PackBlobID], bi)
prefetched = append(prefetched, ci)
}
if hint == "none" {
return prefetched
}
type work struct {
blobID blob.ID
contentID ID
}
workCh := make(chan work)
var wg sync.WaitGroup
go func() {
defer close(workCh)
for b, infos := range contentsByBlob {
if o.shouldPrefetchEntireBlob(infos) {
workCh <- work{blobID: b}
} else {
for _, bi := range infos {
workCh <- work{contentID: bi.ContentID}
}
}
}
}()
for range parallelFetches {
wg.Add(1)
go func() {
defer wg.Done()
var tmp gather.WriteBuffer
defer tmp.Close()
for w := range workCh {
switch {
case strings.HasPrefix(string(w.blobID), string(PackBlobIDPrefixRegular)):
if err := bm.contentCache.PrefetchBlob(ctx, w.blobID); err != nil {
bm.log.Debugw("error prefetching data blob", "blobID", w.blobID, "err", err)
}
case strings.HasPrefix(string(w.blobID), string(PackBlobIDPrefixSpecial)):
if err := bm.metadataCache.PrefetchBlob(ctx, w.blobID); err != nil {
bm.log.Debugw("error prefetching metadata blob", "blobID", w.blobID, "err", err)
}
case w.contentID != EmptyID:
tmp.Reset()
if _, err := bm.getContentDataAndInfo(ctx, w.contentID, &tmp); err != nil {
bm.log.Debugw("error prefetching content", "contentID", w.contentID, "err", err)
}
}
}
}()
}
wg.Wait()
return prefetched
}