diff --git a/cli/command_content_verify.go b/cli/command_content_verify.go index 80254041f..b763d2050 100644 --- a/cli/command_content_verify.go +++ b/cli/command_content_verify.go @@ -19,23 +19,36 @@ contentVerifyIncludeDeleted = contentVerifyCommand.Flag("include-deleted", "Include deleted contents").Bool() ) +func readBlobMap(ctx context.Context, rep *repo.DirectRepository) (map[blob.ID]blob.Metadata, error) { + blobMap := map[blob.ID]blob.Metadata{} + + log(ctx).Infof("Listing blobs...") + + if err := rep.Blobs.ListBlobs(ctx, "", func(bm blob.Metadata) error { + blobMap[bm.BlobID] = bm + if len(blobMap)%10000 == 0 { + log(ctx).Infof(" %v blobs...", len(blobMap)) + } + return nil + }); err != nil { + return nil, errors.Wrap(err, "unable to list blobs") + } + + log(ctx).Infof("Listed %v blobs.", len(blobMap)) + + return blobMap, nil +} + func runContentVerifyCommand(ctx context.Context, rep *repo.DirectRepository) error { blobMap := map[blob.ID]blob.Metadata{} if !*contentVerifyFull { - log(ctx).Infof("Listing blobs...") - - if err := rep.Blobs.ListBlobs(ctx, "", func(bm blob.Metadata) error { - blobMap[bm.BlobID] = bm - if len(blobMap)%10000 == 0 { - log(ctx).Infof(" %v blobs...", len(blobMap)) - } - return nil - }); err != nil { - return errors.Wrap(err, "unable to list blobs") + m, err := readBlobMap(ctx, rep) + if err != nil { + return err } - log(ctx).Infof("Listed %v blobs.", len(blobMap)) + blobMap = m } var totalCount, successCount, errorCount int32 diff --git a/cli/command_snapshot_verify.go b/cli/command_snapshot_verify.go index 10fc2a1ba..1caf17592 100644 --- a/cli/command_snapshot_verify.go +++ b/cli/command_snapshot_verify.go @@ -14,6 +14,7 @@ "github.com/kopia/kopia/internal/iocopy" "github.com/kopia/kopia/internal/parallelwork" "github.com/kopia/kopia/repo" + "github.com/kopia/kopia/repo/blob" "github.com/kopia/kopia/repo/content" "github.com/kopia/kopia/repo/manifest" "github.com/kopia/kopia/repo/object" @@ -26,7 +27,7 @@ verifyCommandErrorThreshold = verifyCommand.Flag("max-errors", "Maximum number of errors before stopping").Default("0").Int() verifyCommandDirObjectIDs = verifyCommand.Flag("directory-id", "Directory object IDs to verify").Strings() verifyCommandFileObjectIDs = verifyCommand.Flag("file-id", "File object IDs to verify").Strings() - verifyCommandAllSources = verifyCommand.Flag("all-sources", "Verify all snapshots").Bool() + verifyCommandAllSources = verifyCommand.Flag("all-sources", "Verify all snapshots (DEPRECATED)").Hidden().Bool() verifyCommandSources = verifyCommand.Flag("sources", "Verify the provided sources").Strings() verifyCommandParallel = verifyCommand.Flag("parallel", "Parallelization").Default("16").Int() verifyCommandFilesPercent = verifyCommand.Flag("verify-files-percent", "Randomly verify a percentage of files").Default("0").Int() @@ -40,6 +41,8 @@ type verifier struct { mu sync.Mutex seen map[object.ID]bool + blobMap map[blob.ID]blob.Metadata + errors []error } @@ -147,10 +150,26 @@ func (v *verifier) doVerifyDirectory(ctx context.Context, oid object.ID, path st func (v *verifier) doVerifyObject(ctx context.Context, oid object.ID, path string) error { log(ctx).Debugf("verifying object %v", oid) - if _, err := v.rep.VerifyObject(ctx, oid); err != nil { + contentIDs, err := v.rep.VerifyObject(ctx, oid) + if err != nil { v.reportError(ctx, path, errors.Wrapf(err, "error verifying %v", oid)) } + if dr, ok := v.rep.(*repo.DirectRepository); v.blobMap != nil && ok { + for _, cid := range contentIDs { + ci, err := dr.Content.ContentInfo(ctx, cid) + if err != nil { + v.reportError(ctx, path, errors.Wrapf(err, "error verifying content %v: %v", cid, err)) + continue + } + + if _, ok := v.blobMap[ci.PackBlobID]; !ok { + v.reportError(ctx, path, errors.Errorf("object %v is backed by missing blob %v", oid, ci.PackBlobID)) + continue + } + } + } + //nolint:gomnd,gosec if rand.Intn(100) < *verifyCommandFilesPercent { if err := v.readEntireObject(ctx, oid, path); err != nil { @@ -179,6 +198,10 @@ func (v *verifier) readEntireObject(ctx context.Context, oid object.ID, path str } func runVerifyCommand(ctx context.Context, rep repo.Repository) error { + if *verifyCommandAllSources { + log(ctx).Noticef("DEPRECATED: --all-sources flag has no effect and is the default when no sources are provided.") + } + v := &verifier{ rep: rep, startTime: clock.Now(), @@ -186,6 +209,15 @@ func runVerifyCommand(ctx context.Context, rep repo.Repository) error { seen: map[object.ID]bool{}, } + if dr, ok := rep.(*repo.DirectRepository); ok { + blobMap, err := readBlobMap(ctx, dr) + if err != nil { + return err + } + + v.blobMap = blobMap + } + if err := enqueueRootsToVerify(ctx, v, rep); err != nil { return err } @@ -246,7 +278,7 @@ func enqueueRootsToVerify(ctx context.Context, v *verifier, rep repo.Repository) func loadSourceManifests(ctx context.Context, rep repo.Repository, sources []string) ([]*snapshot.Manifest, error) { var manifestIDs []manifest.ID - if *verifyCommandAllSources { + if len(sources)+len(*verifyCommandDirObjectIDs)+len(*verifyCommandFileObjectIDs) == 0 { man, err := snapshot.ListSnapshotManifests(ctx, rep, nil) if err != nil { return nil, err diff --git a/tests/end_to_end_test/snapshot_gc_test.go b/tests/end_to_end_test/snapshot_gc_test.go index 461b64aa6..ec919d372 100644 --- a/tests/end_to_end_test/snapshot_gc_test.go +++ b/tests/end_to_end_test/snapshot_gc_test.go @@ -48,7 +48,7 @@ func TestSnapshotGC(t *testing.T) { expectedContentCount++ // run verification - e.RunAndExpectSuccess(t, "snapshot", "verify", "--all-sources") + e.RunAndExpectSuccess(t, "snapshot", "verify") // garbage-collect in dry run mode e.RunAndExpectSuccess(t, "snapshot", "gc")