cli: improvements to 'snapshot verify'

* When running against direct repository, it will verify that all
  backing blobs exist based on results of listing.
* Deprecated annoying --all-sources flag which is now default if no
  sources are provided.
This commit is contained in:
Jarek Kowalski
2020-12-21 09:46:19 -08:00
committed by Julio López
parent eecd9d13c9
commit d7ca543356
3 changed files with 60 additions and 15 deletions

View File

@@ -19,23 +19,36 @@
contentVerifyIncludeDeleted = contentVerifyCommand.Flag("include-deleted", "Include deleted contents").Bool()
)
func readBlobMap(ctx context.Context, rep *repo.DirectRepository) (map[blob.ID]blob.Metadata, error) {
blobMap := map[blob.ID]blob.Metadata{}
log(ctx).Infof("Listing blobs...")
if err := rep.Blobs.ListBlobs(ctx, "", func(bm blob.Metadata) error {
blobMap[bm.BlobID] = bm
if len(blobMap)%10000 == 0 {
log(ctx).Infof(" %v blobs...", len(blobMap))
}
return nil
}); err != nil {
return nil, errors.Wrap(err, "unable to list blobs")
}
log(ctx).Infof("Listed %v blobs.", len(blobMap))
return blobMap, nil
}
func runContentVerifyCommand(ctx context.Context, rep *repo.DirectRepository) error {
blobMap := map[blob.ID]blob.Metadata{}
if !*contentVerifyFull {
log(ctx).Infof("Listing blobs...")
if err := rep.Blobs.ListBlobs(ctx, "", func(bm blob.Metadata) error {
blobMap[bm.BlobID] = bm
if len(blobMap)%10000 == 0 {
log(ctx).Infof(" %v blobs...", len(blobMap))
}
return nil
}); err != nil {
return errors.Wrap(err, "unable to list blobs")
m, err := readBlobMap(ctx, rep)
if err != nil {
return err
}
log(ctx).Infof("Listed %v blobs.", len(blobMap))
blobMap = m
}
var totalCount, successCount, errorCount int32

View File

@@ -14,6 +14,7 @@
"github.com/kopia/kopia/internal/iocopy"
"github.com/kopia/kopia/internal/parallelwork"
"github.com/kopia/kopia/repo"
"github.com/kopia/kopia/repo/blob"
"github.com/kopia/kopia/repo/content"
"github.com/kopia/kopia/repo/manifest"
"github.com/kopia/kopia/repo/object"
@@ -26,7 +27,7 @@
verifyCommandErrorThreshold = verifyCommand.Flag("max-errors", "Maximum number of errors before stopping").Default("0").Int()
verifyCommandDirObjectIDs = verifyCommand.Flag("directory-id", "Directory object IDs to verify").Strings()
verifyCommandFileObjectIDs = verifyCommand.Flag("file-id", "File object IDs to verify").Strings()
verifyCommandAllSources = verifyCommand.Flag("all-sources", "Verify all snapshots").Bool()
verifyCommandAllSources = verifyCommand.Flag("all-sources", "Verify all snapshots (DEPRECATED)").Hidden().Bool()
verifyCommandSources = verifyCommand.Flag("sources", "Verify the provided sources").Strings()
verifyCommandParallel = verifyCommand.Flag("parallel", "Parallelization").Default("16").Int()
verifyCommandFilesPercent = verifyCommand.Flag("verify-files-percent", "Randomly verify a percentage of files").Default("0").Int()
@@ -40,6 +41,8 @@ type verifier struct {
mu sync.Mutex
seen map[object.ID]bool
blobMap map[blob.ID]blob.Metadata
errors []error
}
@@ -147,10 +150,26 @@ func (v *verifier) doVerifyDirectory(ctx context.Context, oid object.ID, path st
func (v *verifier) doVerifyObject(ctx context.Context, oid object.ID, path string) error {
log(ctx).Debugf("verifying object %v", oid)
if _, err := v.rep.VerifyObject(ctx, oid); err != nil {
contentIDs, err := v.rep.VerifyObject(ctx, oid)
if err != nil {
v.reportError(ctx, path, errors.Wrapf(err, "error verifying %v", oid))
}
if dr, ok := v.rep.(*repo.DirectRepository); v.blobMap != nil && ok {
for _, cid := range contentIDs {
ci, err := dr.Content.ContentInfo(ctx, cid)
if err != nil {
v.reportError(ctx, path, errors.Wrapf(err, "error verifying content %v: %v", cid, err))
continue
}
if _, ok := v.blobMap[ci.PackBlobID]; !ok {
v.reportError(ctx, path, errors.Errorf("object %v is backed by missing blob %v", oid, ci.PackBlobID))
continue
}
}
}
//nolint:gomnd,gosec
if rand.Intn(100) < *verifyCommandFilesPercent {
if err := v.readEntireObject(ctx, oid, path); err != nil {
@@ -179,6 +198,10 @@ func (v *verifier) readEntireObject(ctx context.Context, oid object.ID, path str
}
func runVerifyCommand(ctx context.Context, rep repo.Repository) error {
if *verifyCommandAllSources {
log(ctx).Noticef("DEPRECATED: --all-sources flag has no effect and is the default when no sources are provided.")
}
v := &verifier{
rep: rep,
startTime: clock.Now(),
@@ -186,6 +209,15 @@ func runVerifyCommand(ctx context.Context, rep repo.Repository) error {
seen: map[object.ID]bool{},
}
if dr, ok := rep.(*repo.DirectRepository); ok {
blobMap, err := readBlobMap(ctx, dr)
if err != nil {
return err
}
v.blobMap = blobMap
}
if err := enqueueRootsToVerify(ctx, v, rep); err != nil {
return err
}
@@ -246,7 +278,7 @@ func enqueueRootsToVerify(ctx context.Context, v *verifier, rep repo.Repository)
func loadSourceManifests(ctx context.Context, rep repo.Repository, sources []string) ([]*snapshot.Manifest, error) {
var manifestIDs []manifest.ID
if *verifyCommandAllSources {
if len(sources)+len(*verifyCommandDirObjectIDs)+len(*verifyCommandFileObjectIDs) == 0 {
man, err := snapshot.ListSnapshotManifests(ctx, rep, nil)
if err != nil {
return nil, err

View File

@@ -48,7 +48,7 @@ func TestSnapshotGC(t *testing.T) {
expectedContentCount++
// run verification
e.RunAndExpectSuccess(t, "snapshot", "verify", "--all-sources")
e.RunAndExpectSuccess(t, "snapshot", "verify")
// garbage-collect in dry run mode
e.RunAndExpectSuccess(t, "snapshot", "gc")