mirror of
https://github.com/kopia/kopia.git
synced 2026-03-27 10:32:08 -04:00
This is done by protecting newly added cache items from being swept for X amount of time where X defaults to: * `metadata` - 24 hours (new) * `data` - 10 min (new) * `indexes` - 1 hours (same as today) Fixes #1540
191 lines
4.6 KiB
Go
191 lines
4.6 KiB
Go
package content
|
|
|
|
import (
|
|
"context"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/pkg/errors"
|
|
"golang.org/x/exp/mmap"
|
|
|
|
"github.com/kopia/kopia/internal/cache"
|
|
"github.com/kopia/kopia/internal/gather"
|
|
"github.com/kopia/kopia/repo/blob"
|
|
"github.com/kopia/kopia/repo/logging"
|
|
)
|
|
|
|
const (
|
|
simpleIndexSuffix = ".sndx"
|
|
)
|
|
|
|
type diskCommittedContentIndexCache struct {
|
|
dirname string
|
|
timeNow func() time.Time
|
|
v1PerContentOverhead uint32
|
|
log logging.Logger
|
|
minSweepAge time.Duration
|
|
}
|
|
|
|
func (c *diskCommittedContentIndexCache) indexBlobPath(indexBlobID blob.ID) string {
|
|
return filepath.Join(c.dirname, string(indexBlobID)+simpleIndexSuffix)
|
|
}
|
|
|
|
func (c *diskCommittedContentIndexCache) openIndex(ctx context.Context, indexBlobID blob.ID) (packIndex, error) {
|
|
fullpath := c.indexBlobPath(indexBlobID)
|
|
|
|
f, err := c.mmapOpenWithRetry(fullpath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return openPackIndex(f, c.v1PerContentOverhead)
|
|
}
|
|
|
|
// mmapOpenWithRetry attempts mmap.Open() with exponential back-off to work around rare issue specific to Windows where
|
|
// we can't open the file right after it has been written.
|
|
func (c *diskCommittedContentIndexCache) mmapOpenWithRetry(path string) (*mmap.ReaderAt, error) {
|
|
const (
|
|
maxRetries = 8
|
|
startingDelay = 10 * time.Millisecond
|
|
)
|
|
|
|
// retry milliseconds: 10, 20, 40, 80, 160, 320, 640, 1280, total ~2.5s
|
|
f, err := mmap.Open(path)
|
|
nextDelay := startingDelay
|
|
|
|
retryCount := 0
|
|
for err != nil && retryCount < maxRetries {
|
|
retryCount++
|
|
c.log.Debugf("retry #%v unable to mmap.Open(): %v", retryCount, err)
|
|
time.Sleep(nextDelay)
|
|
nextDelay *= 2
|
|
f, err = mmap.Open(path)
|
|
}
|
|
|
|
return f, errors.Wrap(err, "mmap() error")
|
|
}
|
|
|
|
func (c *diskCommittedContentIndexCache) hasIndexBlobID(ctx context.Context, indexBlobID blob.ID) (bool, error) {
|
|
_, err := os.Stat(c.indexBlobPath(indexBlobID))
|
|
if err == nil {
|
|
return true, nil
|
|
}
|
|
|
|
if os.IsNotExist(err) {
|
|
return false, nil
|
|
}
|
|
|
|
return false, errors.Wrapf(err, "error checking %v", indexBlobID)
|
|
}
|
|
|
|
func (c *diskCommittedContentIndexCache) addContentToCache(ctx context.Context, indexBlobID blob.ID, data gather.Bytes) error {
|
|
exists, err := c.hasIndexBlobID(ctx, indexBlobID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if exists {
|
|
return nil
|
|
}
|
|
|
|
tmpFile, err := writeTempFileAtomic(c.dirname, data.ToByteSlice())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// rename() is atomic, so one process will succeed, but the other will fail
|
|
if err := os.Rename(tmpFile, c.indexBlobPath(indexBlobID)); err != nil {
|
|
// verify that the content exists
|
|
exists, err := c.hasIndexBlobID(ctx, indexBlobID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !exists {
|
|
return errors.Errorf("unsuccessful index write of content %q", indexBlobID)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func writeTempFileAtomic(dirname string, data []byte) (string, error) {
|
|
// write to a temp file to avoid race where two processes are writing at the same time.
|
|
tf, err := os.CreateTemp(dirname, "tmp")
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
os.MkdirAll(dirname, cache.DirMode) //nolint:errcheck
|
|
tf, err = os.CreateTemp(dirname, "tmp")
|
|
}
|
|
}
|
|
|
|
if err != nil {
|
|
return "", errors.Wrap(err, "can't create tmp file")
|
|
}
|
|
|
|
if _, err := tf.Write(data); err != nil {
|
|
return "", errors.Wrap(err, "can't write to temp file")
|
|
}
|
|
|
|
if err := tf.Close(); err != nil {
|
|
return "", errors.Errorf("can't close tmp file")
|
|
}
|
|
|
|
return tf.Name(), nil
|
|
}
|
|
|
|
func (c *diskCommittedContentIndexCache) expireUnused(ctx context.Context, used []blob.ID) error {
|
|
c.log.Debugw("expireUnused",
|
|
"except", used,
|
|
"minSweepAge", c.minSweepAge)
|
|
|
|
entries, err := os.ReadDir(c.dirname)
|
|
if err != nil {
|
|
return errors.Wrap(err, "can't list cache")
|
|
}
|
|
|
|
remaining := map[blob.ID]os.FileInfo{}
|
|
|
|
for _, ent := range entries {
|
|
fi, err := ent.Info()
|
|
if os.IsNotExist(err) {
|
|
// we lost the race, the file was deleted since it was listed.
|
|
continue
|
|
}
|
|
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to read file info")
|
|
}
|
|
|
|
if strings.HasSuffix(ent.Name(), simpleIndexSuffix) {
|
|
n := strings.TrimSuffix(ent.Name(), simpleIndexSuffix)
|
|
remaining[blob.ID(n)] = fi
|
|
}
|
|
}
|
|
|
|
for _, u := range used {
|
|
delete(remaining, u)
|
|
}
|
|
|
|
for _, rem := range remaining {
|
|
if c.timeNow().Sub(rem.ModTime()) > c.minSweepAge {
|
|
c.log.Debugw("removing unused",
|
|
"name", rem.Name(),
|
|
"mtime", rem.ModTime())
|
|
|
|
if err := os.Remove(filepath.Join(c.dirname, rem.Name())); err != nil {
|
|
c.log.Errorf("unable to remove unused index file: %v", err)
|
|
}
|
|
} else {
|
|
c.log.Debugw("keeping unused index because it's too new",
|
|
"name", rem.Name(),
|
|
"mtime", rem.ModTime(),
|
|
"threshold", c.minSweepAge)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|