Files
kopia/cli/command_snapshot_gc.go
Jarek Kowalski 3b1177cc4e cli: implemented 'snapshot gc' and moved verify to be snapshot verify
`snapshot gc` marks contents not reachable from the root of any snapshot
as soft-deleted

The algorithm is a mark-and-sweep with parallel iteration of objects.
Currently it stores content IDs and object IDs in a map, so won't scale
to huge repositories, but this can be fixed in the future.

This fixes #110 at least for reasonable repository sizes.
2019-09-15 21:25:48 -07:00

131 lines
4.1 KiB
Go

package cli
import (
"context"
"sync"
"sync/atomic"
"time"
"github.com/pkg/errors"
"github.com/kopia/kopia/internal/units"
"github.com/kopia/kopia/repo"
"github.com/kopia/kopia/repo/content"
"github.com/kopia/kopia/repo/manifest"
"github.com/kopia/kopia/repo/object"
"github.com/kopia/kopia/snapshot"
"github.com/kopia/kopia/snapshot/snapshotfs"
)
var (
snapshotGCCommand = snapshotCommands.Command("gc", "Remove contents not used by any snapshot")
snapshotGCMinContentAge = snapshotGCCommand.Flag("min-age", "Minimum content age to allow deletion").Default("24h").Duration()
snapshotGCDelete = snapshotGCCommand.Flag("delete", "Delete unreferenced contents").Bool()
)
func findInUseContentIDs(ctx context.Context, rep *repo.Repository, used *sync.Map) error {
w := snapshotfs.NewTreeWalker()
ids, err := snapshot.ListSnapshotManifests(ctx, rep, nil)
if err != nil {
return errors.Wrap(err, "unable to list snapshot manifest IDs")
}
manifests, err := snapshot.LoadSnapshots(ctx, rep, ids)
if err != nil {
return errors.Wrap(err, "unable to load manifest IDs")
}
for _, m := range manifests {
root, err := snapshotfs.SnapshotRoot(rep, m)
if err != nil {
return errors.Wrap(err, "unable to get snapshot root")
}
w.RootEntries = append(w.RootEntries, root)
}
w.ObjectCallback = func(oid object.ID) error {
_, contentIDs, err := rep.Objects.VerifyObject(ctx, oid)
if err != nil {
return errors.Wrapf(err, "error verifying %v", oid)
}
for _, cid := range contentIDs {
used.Store(cid, nil)
}
return nil
}
log.Infof("looking for active contents")
if err := w.Run(ctx); err != nil {
return errors.Wrap(err, "error walking snapshot tree")
}
return nil
}
func runSnapshotGCCommand(ctx context.Context, rep *repo.Repository) error {
var used sync.Map
if err := findInUseContentIDs(ctx, rep, &used); err != nil {
return errors.Wrap(err, "unable to find in-use content ID")
}
var unusedCount, inUseCount, systemCount, tooRecentCount int32
var totalUnusedBytes, totalInUseBytes, totalSystemBytes, totalTooRecentBytes int64
log.Infof("looking for unreferenced contents")
if err := rep.Content.IterateContents(content.IterateOptions{}, func(ci content.Info) error {
if manifest.ContentPrefix == ci.ID.Prefix() {
atomic.AddInt32(&systemCount, 1)
atomic.AddInt64(&totalSystemBytes, int64(ci.Length))
return nil
}
if _, ok := used.Load(ci.ID); !ok {
if time.Since(ci.Timestamp()) < *snapshotGCMinContentAge {
log.Debugf("recent unreferenced content %v (%v bytes, modified %v)", ci.ID, ci.Length, ci.Timestamp())
atomic.AddInt32(&tooRecentCount, 1)
atomic.AddInt64(&totalTooRecentBytes, int64(ci.Length))
return nil
}
log.Debugf("unreferenced %v (%v bytes, modified %v)", ci.ID, ci.Length, ci.Timestamp())
cnt := atomic.AddInt32(&unusedCount, 1)
totalSize := atomic.AddInt64(&totalUnusedBytes, int64(ci.Length))
if *snapshotGCDelete {
if err := rep.Content.DeleteContent(ci.ID); err != nil {
return errors.Wrap(err, "error deleting content")
}
}
if cnt%100000 == 0 {
log.Infof("... found %v unused contents so far (%v bytes)", cnt, units.BytesStringBase2(totalSize))
if *snapshotGCDelete {
if err := rep.Flush(ctx); err != nil {
return errors.Wrap(err, "flush error")
}
}
}
} else {
atomic.AddInt32(&inUseCount, 1)
atomic.AddInt64(&totalInUseBytes, int64(ci.Length))
}
return nil
}); err != nil {
return errors.Wrap(err, "error iterating contents")
}
log.Infof("found %v unused contents (%v bytes)", unusedCount, units.BytesStringBase2(totalUnusedBytes))
log.Infof("found %v unused contents that are too recent to delete (%v bytes)", tooRecentCount, units.BytesStringBase2(totalTooRecentBytes))
log.Infof("found %v in-use contents (%v bytes)", inUseCount, units.BytesStringBase2(totalInUseBytes))
log.Infof("found %v in-use system-contents (%v bytes)", systemCount, units.BytesStringBase2(totalSystemBytes))
if unusedCount > 0 && !*snapshotGCDelete {
return errors.Errorf("Not deleting because '--delete' flag was not set.")
}
return nil
}
func init() {
snapshotGCCommand.Action(repositoryAction(runSnapshotGCCommand))
}