mirror of
https://github.com/kopia/kopia.git
synced 2026-05-04 04:43:07 -04:00
`snapshot gc` marks contents not reachable from the root of any snapshot as soft-deleted The algorithm is a mark-and-sweep with parallel iteration of objects. Currently it stores content IDs and object IDs in a map, so won't scale to huge repositories, but this can be fixed in the future. This fixes #110 at least for reasonable repository sizes.
131 lines
4.1 KiB
Go
131 lines
4.1 KiB
Go
package cli
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/kopia/kopia/internal/units"
|
|
"github.com/kopia/kopia/repo"
|
|
"github.com/kopia/kopia/repo/content"
|
|
"github.com/kopia/kopia/repo/manifest"
|
|
"github.com/kopia/kopia/repo/object"
|
|
"github.com/kopia/kopia/snapshot"
|
|
"github.com/kopia/kopia/snapshot/snapshotfs"
|
|
)
|
|
|
|
var (
|
|
snapshotGCCommand = snapshotCommands.Command("gc", "Remove contents not used by any snapshot")
|
|
snapshotGCMinContentAge = snapshotGCCommand.Flag("min-age", "Minimum content age to allow deletion").Default("24h").Duration()
|
|
snapshotGCDelete = snapshotGCCommand.Flag("delete", "Delete unreferenced contents").Bool()
|
|
)
|
|
|
|
func findInUseContentIDs(ctx context.Context, rep *repo.Repository, used *sync.Map) error {
|
|
w := snapshotfs.NewTreeWalker()
|
|
|
|
ids, err := snapshot.ListSnapshotManifests(ctx, rep, nil)
|
|
if err != nil {
|
|
return errors.Wrap(err, "unable to list snapshot manifest IDs")
|
|
}
|
|
|
|
manifests, err := snapshot.LoadSnapshots(ctx, rep, ids)
|
|
if err != nil {
|
|
return errors.Wrap(err, "unable to load manifest IDs")
|
|
}
|
|
|
|
for _, m := range manifests {
|
|
root, err := snapshotfs.SnapshotRoot(rep, m)
|
|
if err != nil {
|
|
return errors.Wrap(err, "unable to get snapshot root")
|
|
}
|
|
w.RootEntries = append(w.RootEntries, root)
|
|
}
|
|
|
|
w.ObjectCallback = func(oid object.ID) error {
|
|
_, contentIDs, err := rep.Objects.VerifyObject(ctx, oid)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "error verifying %v", oid)
|
|
}
|
|
for _, cid := range contentIDs {
|
|
used.Store(cid, nil)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
log.Infof("looking for active contents")
|
|
if err := w.Run(ctx); err != nil {
|
|
return errors.Wrap(err, "error walking snapshot tree")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func runSnapshotGCCommand(ctx context.Context, rep *repo.Repository) error {
|
|
var used sync.Map
|
|
if err := findInUseContentIDs(ctx, rep, &used); err != nil {
|
|
return errors.Wrap(err, "unable to find in-use content ID")
|
|
}
|
|
|
|
var unusedCount, inUseCount, systemCount, tooRecentCount int32
|
|
var totalUnusedBytes, totalInUseBytes, totalSystemBytes, totalTooRecentBytes int64
|
|
|
|
log.Infof("looking for unreferenced contents")
|
|
if err := rep.Content.IterateContents(content.IterateOptions{}, func(ci content.Info) error {
|
|
if manifest.ContentPrefix == ci.ID.Prefix() {
|
|
atomic.AddInt32(&systemCount, 1)
|
|
atomic.AddInt64(&totalSystemBytes, int64(ci.Length))
|
|
return nil
|
|
}
|
|
|
|
if _, ok := used.Load(ci.ID); !ok {
|
|
if time.Since(ci.Timestamp()) < *snapshotGCMinContentAge {
|
|
log.Debugf("recent unreferenced content %v (%v bytes, modified %v)", ci.ID, ci.Length, ci.Timestamp())
|
|
atomic.AddInt32(&tooRecentCount, 1)
|
|
atomic.AddInt64(&totalTooRecentBytes, int64(ci.Length))
|
|
return nil
|
|
}
|
|
log.Debugf("unreferenced %v (%v bytes, modified %v)", ci.ID, ci.Length, ci.Timestamp())
|
|
cnt := atomic.AddInt32(&unusedCount, 1)
|
|
totalSize := atomic.AddInt64(&totalUnusedBytes, int64(ci.Length))
|
|
if *snapshotGCDelete {
|
|
if err := rep.Content.DeleteContent(ci.ID); err != nil {
|
|
return errors.Wrap(err, "error deleting content")
|
|
}
|
|
}
|
|
|
|
if cnt%100000 == 0 {
|
|
log.Infof("... found %v unused contents so far (%v bytes)", cnt, units.BytesStringBase2(totalSize))
|
|
if *snapshotGCDelete {
|
|
if err := rep.Flush(ctx); err != nil {
|
|
return errors.Wrap(err, "flush error")
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
atomic.AddInt32(&inUseCount, 1)
|
|
atomic.AddInt64(&totalInUseBytes, int64(ci.Length))
|
|
}
|
|
return nil
|
|
}); err != nil {
|
|
return errors.Wrap(err, "error iterating contents")
|
|
}
|
|
|
|
log.Infof("found %v unused contents (%v bytes)", unusedCount, units.BytesStringBase2(totalUnusedBytes))
|
|
log.Infof("found %v unused contents that are too recent to delete (%v bytes)", tooRecentCount, units.BytesStringBase2(totalTooRecentBytes))
|
|
log.Infof("found %v in-use contents (%v bytes)", inUseCount, units.BytesStringBase2(totalInUseBytes))
|
|
log.Infof("found %v in-use system-contents (%v bytes)", systemCount, units.BytesStringBase2(totalSystemBytes))
|
|
|
|
if unusedCount > 0 && !*snapshotGCDelete {
|
|
return errors.Errorf("Not deleting because '--delete' flag was not set.")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func init() {
|
|
snapshotGCCommand.Action(repositoryAction(runSnapshotGCCommand))
|
|
}
|