From 9b68189d29efe4c8b548d67b3c7a37e4f65770ae Mon Sep 17 00:00:00 2001 From: Rohit-BM18 Date: Fri, 7 Mar 2025 07:41:14 -0500 Subject: [PATCH] feat(cli): compute snapshot diff stats (#4444) Added functionality to calculate aggregate statistics when comparing what's changed between snapshots using kopia diff Statistics collected during snapshot diff computation includes: - files added/removed/modified - dirs added/removed/modified - files/dirs with metadata changes but same underlying content (OID) Testing approach: Added a test for verifying stats collected when comparing two directories with the same objectID but metadata changes across snapshots (dir mode, dir mod time, dir owner, etc), expectation is all the appropriate dir stats fields are updated. Added another test for verifying stats collected when comparing two directories with similar file contents but the metadata for the files have changed between snapshots but content remains unchanged. Expectation is all the relevant file level stats fields are updated. Existing tests have been updated due to stats now being printed in addition to previous output. --- cli/command_diff.go | 16 +- internal/diff/diff.go | 167 ++++++++-- internal/diff/diff_test.go | 313 +++++++++++++++--- tests/end_to_end_test/restore_test.go | 2 +- tests/recovery/recovery_test/recovery_test.go | 2 +- 5 files changed, 433 insertions(+), 67 deletions(-) diff --git a/cli/command_diff.go b/cli/command_diff.go index 6e26a8d32..c3d9f6e2b 100644 --- a/cli/command_diff.go +++ b/cli/command_diff.go @@ -2,6 +2,8 @@ import ( "context" + "encoding/json" + "fmt" "strings" "github.com/pkg/errors" @@ -63,7 +65,19 @@ func (c *commandDiff) run(ctx context.Context, rep repo.Repository) error { } if isDir1 { - return errors.Wrap(d.Compare(ctx, ent1, ent2), "error comparing directories") + snapshotDiffStats, err := d.Compare(ctx, ent1, ent2) + if err != nil { + return errors.Wrap(err, "error comparing directories") + } + + b, err := json.Marshal(snapshotDiffStats) + if err != nil { + return errors.Wrap(err, "error marshaling computed snapshot diff stats") + } + + fmt.Fprintf(c.out.stdout(), "%s", b) //nolint:errcheck + + return nil } return errors.New("comparing files not implemented yet") diff --git a/internal/diff/diff.go b/internal/diff/diff.go index 59245bc66..84cd6bb46 100644 --- a/internal/diff/diff.go +++ b/internal/diff/diff.go @@ -21,18 +21,47 @@ var log = logging.Module("diff") +// EntryTypeStats accumulates specific stats for the snapshots being compared. +type EntryTypeStats struct { + Added uint32 `json:"added"` + Removed uint32 `json:"removed"` + Modified uint32 `json:"modified"` + + // aggregate stats + SameContentButDifferentMetadata uint32 `json:"sameContentButDifferentMetadata"` + + // stats categorized based on metadata + SameContentButDifferentMode uint32 `json:"sameContentButDifferentMode"` + SameContentButDifferentModificationTime uint32 `json:"sameContentButDifferentModificationTime"` + SameContentButDifferentUserOwner uint32 `json:"sameContentButDifferentUserOwner"` + SameContentButDifferentGroupOwner uint32 `json:"sameContentButDifferentGroupOwner"` +} + +// Stats accumulates stats between snapshots being compared. +type Stats struct { + FileEntries EntryTypeStats `json:"fileEntries"` + DirectoryEntries EntryTypeStats `json:"directoryEntries"` +} + // Comparer outputs diff information between two filesystems. type Comparer struct { - out io.Writer - tmpDir string - + stats Stats + out io.Writer + tmpDir string DiffCommand string DiffArguments []string } // Compare compares two filesystem entries and emits their diff information. -func (c *Comparer) Compare(ctx context.Context, e1, e2 fs.Entry) error { - return c.compareEntry(ctx, e1, e2, ".") +func (c *Comparer) Compare(ctx context.Context, e1, e2 fs.Entry) (Stats, error) { + c.stats = Stats{} + + err := c.compareEntry(ctx, e1, e2, ".") + if err != nil { + return c.stats, err + } + + return c.stats, errors.Wrap(err, "error comparing fs entries") } // Close removes all temporary files used by the comparer. @@ -76,23 +105,34 @@ func (c *Comparer) compareDirectories(ctx context.Context, dir1, dir2 fs.Directo //nolint:gocyclo func (c *Comparer) compareEntry(ctx context.Context, e1, e2 fs.Entry, path string) error { // see if we have the same object IDs, which implies identical objects, thanks to content-addressable-storage - if h1, ok := e1.(object.HasObjectID); ok { - if h2, ok := e2.(object.HasObjectID); ok { - if h1.ObjectID() == h2.ObjectID() { - log(ctx).Debugf("unchanged %v", path) - return nil + h1, e1HasObjectID := e1.(object.HasObjectID) + h2, e2HasObjectID := e2.(object.HasObjectID) + + if e1HasObjectID && e2HasObjectID { + if h1.ObjectID() == h2.ObjectID() { + if _, isDir := e1.(fs.Directory); isDir { + c.compareDirMetadataAndComputeStats(ctx, e1, e2, path) + } else { + c.compareFileMetadataAndComputeStats(ctx, e1, e2, path) } + + return nil } } if e1 == nil { if dir2, isDir2 := e2.(fs.Directory); isDir2 { c.output("added directory %v\n", path) + + c.stats.DirectoryEntries.Added++ + return c.compareDirectories(ctx, nil, dir2, path) } c.output("added file %v (%v bytes)\n", path, e2.Size()) + c.stats.FileEntries.Added++ + if f, ok := e2.(fs.File); ok { if err := c.compareFiles(ctx, nil, f, path); err != nil { return err @@ -105,11 +145,16 @@ func (c *Comparer) compareEntry(ctx context.Context, e1, e2 fs.Entry, path strin if e2 == nil { if dir1, isDir1 := e1.(fs.Directory); isDir1 { c.output("removed directory %v\n", path) + + c.stats.DirectoryEntries.Removed++ + return c.compareDirectories(ctx, dir1, nil, path) } c.output("removed file %v (%v bytes)\n", path, e1.Size()) + c.stats.FileEntries.Removed++ + if f, ok := e1.(fs.File); ok { if err := c.compareFiles(ctx, f, nil, path); err != nil { return err @@ -119,7 +164,7 @@ func (c *Comparer) compareEntry(ctx context.Context, e1, e2 fs.Entry, path strin return nil } - compareEntry(e1, e2, path, c.out) + c.compareEntryMetadata(e1, e2, path) dir1, isDir1 := e1.(fs.Directory) dir2, isDir2 := e2.(fs.Directory) @@ -137,6 +182,8 @@ func (c *Comparer) compareEntry(ctx context.Context, e1, e2 fs.Entry, path strin if isDir2 { // left is non-directory, right is a directory log(ctx).Infof("changed %v from non-directory to a directory", path) + c.output("changed %v from non-directory to a directory\n", path) + return nil } @@ -144,6 +191,8 @@ func (c *Comparer) compareEntry(ctx context.Context, e1, e2 fs.Entry, path strin if f2, ok := e2.(fs.File); ok { c.output("changed %v at %v (size %v -> %v)\n", path, e2.ModTime().String(), e1.Size(), e2.Size()) + c.stats.FileEntries.Modified++ + if err := c.compareFiles(ctx, f1, f2, path); err != nil { return err } @@ -153,18 +202,81 @@ func (c *Comparer) compareEntry(ctx context.Context, e1, e2 fs.Entry, path strin return nil } -func compareEntry(e1, e2 fs.Entry, fullpath string, out io.Writer) bool { +func (c *Comparer) compareDirMetadataAndComputeStats(ctx context.Context, e1, e2 fs.Entry, path string) { + // check for metadata changes pertaining to directories given that content hasn't changed and gather aggregate statistics + equal := true + + if m1, m2 := e1.Mode(), e2.Mode(); m1 != m2 { + equal = false + c.stats.DirectoryEntries.SameContentButDifferentMode++ + } + + if mt1, mt2 := e1.ModTime(), e2.ModTime(); !mt1.Equal(mt2) { + equal = false + c.stats.DirectoryEntries.SameContentButDifferentModificationTime++ + } + + o1, o2 := e1.Owner(), e2.Owner() + if o1.UserID != o2.UserID { + equal = false + c.stats.DirectoryEntries.SameContentButDifferentUserOwner++ + } + + if o1.GroupID != o2.GroupID { + equal = false + c.stats.DirectoryEntries.SameContentButDifferentGroupOwner++ + } + + if !equal { + c.stats.DirectoryEntries.SameContentButDifferentMetadata++ + + log(ctx).Debugf("content unchanged but metadata has been modified: %v", path) + } +} + +func (c *Comparer) compareFileMetadataAndComputeStats(ctx context.Context, e1, e2 fs.Entry, path string) { + // check for metadata changes pertaining to files given that content hasn't changed and gather aggregate statistics + equal := true + if m1, m2 := e1.Mode(), e2.Mode(); m1 != m2 { + equal = false + c.stats.FileEntries.SameContentButDifferentMode++ + } + + if mt1, mt2 := e1.ModTime(), e2.ModTime(); !mt1.Equal(mt2) { + equal = false + c.stats.FileEntries.SameContentButDifferentModificationTime++ + } + + o1, o2 := e1.Owner(), e2.Owner() + if o1.UserID != o2.UserID { + equal = false + c.stats.FileEntries.SameContentButDifferentUserOwner++ + } + + if o1.GroupID != o2.GroupID { + equal = false + c.stats.FileEntries.SameContentButDifferentGroupOwner++ + } + + if !equal { + c.stats.FileEntries.SameContentButDifferentMetadata++ + + log(ctx).Debugf("content unchanged but metadata has been modified: %v", path) + } +} + +func (c *Comparer) compareEntryMetadata(e1, e2 fs.Entry, fullpath string) bool { if e1 == e2 { // in particular e1 == nil && e2 == nil return true } if e1 == nil { - fmt.Fprintln(out, fullpath, "does not exist in source directory") //nolint:errcheck + c.output("%v does not exist in source directory\n", fullpath) return false } if e2 == nil { - fmt.Fprintln(out, fullpath, "does not exist in destination directory") //nolint:errcheck + c.output("%v does not exist in destination directory\n", fullpath) return false } @@ -173,32 +285,43 @@ func compareEntry(e1, e2 fs.Entry, fullpath string, out io.Writer) bool { if m1, m2 := e1.Mode(), e2.Mode(); m1 != m2 { equal = false - fmt.Fprintln(out, fullpath, "modes differ: ", m1, m2) //nolint:errcheck + c.output("%v modes differ: %v %v\n", fullpath, m1, m2) } if s1, s2 := e1.Size(), e2.Size(); s1 != s2 { equal = false - fmt.Fprintln(out, fullpath, "sizes differ: ", s1, s2) //nolint:errcheck + c.output("%v sizes differ: %v %v\n", fullpath, s1, s2) } if mt1, mt2 := e1.ModTime(), e2.ModTime(); !mt1.Equal(mt2) { equal = false - fmt.Fprintln(out, fullpath, "modification times differ: ", mt1, mt2) //nolint:errcheck + c.output("%v modification times differ: %v %v\n", fullpath, mt1, mt2) } o1, o2 := e1.Owner(), e2.Owner() if o1.UserID != o2.UserID { equal = false - fmt.Fprintln(out, fullpath, "owner users differ: ", o1.UserID, o2.UserID) //nolint:errcheck + c.output("%v owner users differ: %v %v\n", fullpath, o1.UserID, o2.UserID) } if o1.GroupID != o2.GroupID { equal = false - fmt.Fprintln(out, fullpath, "owner groups differ: ", o1.GroupID, o2.GroupID) //nolint:errcheck + c.output("%v owner groups differ: %v %v\n", fullpath, o1.GroupID, o2.GroupID) + } + + _, isDir1 := e1.(fs.Directory) + _, isDir2 := e2.(fs.Directory) + + if !equal { + if isDir1 && isDir2 { + c.stats.DirectoryEntries.Modified++ + } else { + c.stats.FileEntries.Modified++ + } } // don't compare filesystem boundaries (e1.Device()), it's pretty useless and is not stored in backups @@ -297,6 +420,12 @@ func downloadFile(ctx context.Context, f fs.File, fname string) error { return errors.Wrap(iocopy.JustCopy(dst, src), "error downloading file") } +// Stats returns aggregated statistics computed during snapshot comparison +// must be invoked after a call to Compare which populates ComparerStats struct. +func (c *Comparer) Stats() Stats { + return c.stats +} + func (c *Comparer) output(msg string, args ...interface{}) { fmt.Fprintf(c.out, msg, args...) //nolint:errcheck } diff --git a/internal/diff/diff_test.go b/internal/diff/diff_test.go index c360c2952..30deb20e2 100644 --- a/internal/diff/diff_test.go +++ b/internal/diff/diff_test.go @@ -13,6 +13,8 @@ "github.com/kopia/kopia/fs" "github.com/kopia/kopia/internal/diff" + "github.com/kopia/kopia/repo/content/index" + "github.com/kopia/kopia/repo/object" ) var ( @@ -24,6 +26,8 @@ type testBaseEntry struct { modtime time.Time mode os.FileMode name string + owner fs.OwnerInfo + oid object.ID } func (f *testBaseEntry) IsDir() bool { return false } @@ -32,8 +36,9 @@ func (f *testBaseEntry) Close() {} func (f *testBaseEntry) Name() string { return f.name } func (f *testBaseEntry) ModTime() time.Time { return f.modtime } func (f *testBaseEntry) Sys() interface{} { return nil } -func (f *testBaseEntry) Owner() fs.OwnerInfo { return fs.OwnerInfo{UserID: 1000, GroupID: 1000} } +func (f *testBaseEntry) Owner() fs.OwnerInfo { return f.owner } func (f *testBaseEntry) Device() fs.DeviceInfo { return fs.DeviceInfo{Dev: 1} } +func (f *testBaseEntry) ObjectID() object.ID { return f.oid } func (f *testBaseEntry) Mode() os.FileMode { if f.mode == 0 { @@ -92,9 +97,18 @@ func TestCompareEmptyDirectories(t *testing.T) { ctx := context.Background() - dmodtime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) - dir1 := createTestDirectory("testDir1", dmodtime) - dir2 := createTestDirectory("testDir2", dmodtime) + dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000} + dirMode := os.FileMode(0o777) + + cid, _ := index.IDFromHash("p", []byte("sdkjfn")) + dirObjectID1 := object.DirectObjectID(cid) + + cid, _ = index.IDFromHash("i", []byte("dfjlgn")) + dirObjectID2 := object.DirectObjectID(cid) + + dir1 := createTestDirectory("testDir1", dirModTime, dirOwnerInfo, dirMode, dirObjectID1) + dir2 := createTestDirectory("testDir2", dirModTime, dirOwnerInfo, dirMode, dirObjectID2) c, err := diff.NewComparer(&buf) require.NoError(t, err) @@ -103,9 +117,12 @@ func TestCompareEmptyDirectories(t *testing.T) { _ = c.Close() }) - err = c.Compare(ctx, dir1, dir2) + expectedStats := diff.Stats{} + actualStats, err := c.Compare(ctx, dir1, dir2) + require.NoError(t, err) require.Empty(t, buf.String()) + require.Equal(t, expectedStats, actualStats) } func TestCompareIdenticalDirectories(t *testing.T) { @@ -113,21 +130,41 @@ func TestCompareIdenticalDirectories(t *testing.T) { ctx := context.Background() - dmodtime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) - fmodtime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000} + dirMode := os.FileMode(0o777) + fileModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + + cid, _ := index.IDFromHash("p", []byte("sdkjfn")) + dirObjectID1 := object.DirectObjectID(cid) + + cid, _ = index.IDFromHash("i", []byte("dfjlgn")) + dirObjectID2 := object.DirectObjectID(cid) + + file1 := &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file1.txt"}, content: "abcdefghij"} + file2 := &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"} + dir1 := createTestDirectory( "testDir1", - dmodtime, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime, name: "file1.txt"}, content: "abcdefghij"}, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"}, + dirModTime, + dirOwnerInfo, + dirMode, + dirObjectID1, + file1, + file2, ) dir2 := createTestDirectory( "testDir2", - dmodtime, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime, name: "file1.txt"}, content: "abcdefghij"}, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"}, + dirModTime, + dirOwnerInfo, + dirMode, + dirObjectID2, + file1, + file2, ) + expectedStats := diff.Stats{} + c, err := diff.NewComparer(&buf) require.NoError(t, err) @@ -135,9 +172,11 @@ func TestCompareIdenticalDirectories(t *testing.T) { _ = c.Close() }) - err = c.Compare(ctx, dir1, dir2) + actualStats, err := c.Compare(ctx, dir1, dir2) + require.NoError(t, err) require.Empty(t, buf.String()) + require.Equal(t, expectedStats, actualStats) } func TestCompareDifferentDirectories(t *testing.T) { @@ -145,19 +184,34 @@ func TestCompareDifferentDirectories(t *testing.T) { ctx := context.Background() - dmodtime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) - fmodtime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + fileModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000} + dirMode := os.FileMode(0o777) + + cid, _ := index.IDFromHash("p", []byte("sdkjfn")) + dirObjectID1 := object.DirectObjectID(cid) + + cid, _ = index.IDFromHash("i", []byte("dfjlgn")) + dirObjectID2 := object.DirectObjectID(cid) + dir1 := createTestDirectory( "testDir1", - dmodtime, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime, name: "file1.txt"}, content: "abcdefghij"}, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"}, + dirModTime, + dirOwnerInfo, + dirMode, + dirObjectID1, + &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file1.txt"}, content: "abcdefghij"}, + &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"}, ) dir2 := createTestDirectory( "testDir2", - dmodtime, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime, name: "file3.txt"}, content: "abcdefghij1"}, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime, name: "file4.txt"}, content: "klmnopqrstuvwxyz2"}, + dirModTime, + dirOwnerInfo, + dirMode, + dirObjectID2, + &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file3.txt"}, content: "abcdefghij1"}, + &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file4.txt"}, content: "klmnopqrstuvwxyz2"}, ) c, err := diff.NewComparer(&buf) @@ -167,12 +221,18 @@ func TestCompareDifferentDirectories(t *testing.T) { _ = c.Close() }) + expectedStats := diff.Stats{} + expectedStats.FileEntries.Added = 2 + expectedStats.FileEntries.Removed = 2 + expectedOutput := "added file ./file3.txt (11 bytes)\nadded file ./file4.txt (17 bytes)\n" + "removed file ./file1.txt (10 bytes)\n" + "removed file ./file2.txt (16 bytes)\n" - err = c.Compare(ctx, dir1, dir2) + actualStats, err := c.Compare(ctx, dir1, dir2) + require.NoError(t, err) + require.Equal(t, expectedStats, actualStats) require.Equal(t, expectedOutput, buf.String()) } @@ -181,22 +241,40 @@ func TestCompareDifferentDirectories_DirTimeDiff(t *testing.T) { ctx := context.Background() - dmodtime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) - dmodtime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC) - fmodtime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + fileModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + dirModTime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + dirModTime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC) + dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000} + dirMode := os.FileMode(0o777) + + cid, _ := index.IDFromHash("p", []byte("sdkjfn")) + dirObjectID1 := object.DirectObjectID(cid) + + cid, _ = index.IDFromHash("i", []byte("dfjlgn")) + dirObjectID2 := object.DirectObjectID(cid) + dir1 := createTestDirectory( "testDir1", - dmodtime1, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime, name: "file1.txt"}, content: "abcdefghij"}, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"}, + dirModTime1, + dirOwnerInfo, + dirMode, + dirObjectID1, + &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file1.txt"}, content: "abcdefghij"}, + &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"}, ) dir2 := createTestDirectory( "testDir2", - dmodtime2, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime, name: "file1.txt"}, content: "abcdefghij"}, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"}, + dirModTime2, + dirOwnerInfo, + dirMode, + dirObjectID2, + &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file1.txt"}, content: "abcdefghij"}, + &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"}, ) + expectedStats := diff.Stats{} + expectedStats.DirectoryEntries.Modified = 1 + c, err := diff.NewComparer(&buf) require.NoError(t, err) @@ -204,10 +282,12 @@ func TestCompareDifferentDirectories_DirTimeDiff(t *testing.T) { _ = c.Close() }) - expectedOutput := ". modification times differ: 2023-04-12 10:30:00 +0000 UTC 2022-04-12 10:30:00 +0000 UTC\n" - err = c.Compare(ctx, dir1, dir2) + expectedOutput := ". modification times differ: 2023-04-12 10:30:00 +0000 UTC 2022-04-12 10:30:00 +0000 UTC\n" + actualStats, err := c.Compare(ctx, dir1, dir2) + require.NoError(t, err) require.Equal(t, expectedOutput, buf.String()) + require.Equal(t, expectedStats, actualStats) } func TestCompareDifferentDirectories_FileTimeDiff(t *testing.T) { @@ -215,18 +295,33 @@ func TestCompareDifferentDirectories_FileTimeDiff(t *testing.T) { ctx := context.Background() - fmodtime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) - fmodtime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC) - dmodtime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + fileModTime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + fileModTime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC) + dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000} + dirMode := os.FileMode(0o700) + + cid, _ := index.IDFromHash("p", []byte("sdkjfn")) + OID1 := object.DirectObjectID(cid) + + cid, _ = index.IDFromHash("i", []byte("hvhjb")) + OID2 := object.DirectObjectID(cid) + dir1 := createTestDirectory( "testDir1", - dmodtime, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime1, name: "file1.txt"}, content: "abcdefghij"}, + dirModTime, + dirOwnerInfo, + dirMode, + OID1, + &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime1, name: "file1.txt", oid: OID1}, content: "abcdefghij"}, ) dir2 := createTestDirectory( "testDir2", - dmodtime, - &testFile{testBaseEntry: testBaseEntry{modtime: fmodtime2, name: "file1.txt"}, content: "abcdefghij"}, + dirModTime, + dirOwnerInfo, + dirMode, + OID2, + &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime2, name: "file1.txt", oid: OID2}, content: "abcdefghij"}, ) c, err := diff.NewComparer(&buf) @@ -236,13 +331,141 @@ func TestCompareDifferentDirectories_FileTimeDiff(t *testing.T) { _ = c.Close() }) - expectedOutput := "./file1.txt modification times differ: 2023-04-12 10:30:00 +0000 UTC 2022-04-12 10:30:00 +0000 UTC\n" + expectedStats := diff.Stats{} + expectedStats.FileEntries.Modified = 1 + + expectedOutput := "./file1.txt modification times differ: 2023-04-12 10:30:00 +0000 UTC 2022-04-12 10:30:00 +0000 UTC\n" + + actualStats, err := c.Compare(ctx, dir1, dir2) - err = c.Compare(ctx, dir1, dir2) require.NoError(t, err) require.Equal(t, expectedOutput, buf.String()) + require.Equal(t, expectedStats, actualStats) } -func createTestDirectory(name string, modtime time.Time, files ...fs.Entry) *testDirectory { - return &testDirectory{testBaseEntry: testBaseEntry{modtime: modtime, name: name}, files: files} +func TestCompareFileWithIdenticalContentsButDiffFileMetadata(t *testing.T) { + var buf bytes.Buffer + + ctx := context.Background() + + fileModTime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + fileModTime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC) + + fileOwnerinfo1 := fs.OwnerInfo{UserID: 1000, GroupID: 1000} + fileOwnerinfo2 := fs.OwnerInfo{UserID: 1001, GroupID: 1002} + + dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000} + dirMode := os.FileMode(0o777) + dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + + cid, _ := index.IDFromHash("p", []byte("sdkjfn")) + dirObjectID1 := object.DirectObjectID(cid) + + cid, _ = index.IDFromHash("i", []byte("dfjlgn")) + dirObjectID2 := object.DirectObjectID(cid) + + dir1 := createTestDirectory( + "testDir1", + dirModTime, + dirOwnerInfo, + dirMode, + dirObjectID1, + &testFile{testBaseEntry: testBaseEntry{name: "file1.txt", modtime: fileModTime1, oid: object.ID{}, owner: fileOwnerinfo1, mode: 0o700}, content: "abcdefghij"}, + ) + + dir2 := createTestDirectory( + "testDir2", + dirModTime, + dirOwnerInfo, + dirMode, + dirObjectID2, + &testFile{testBaseEntry: testBaseEntry{name: "file1.txt", modtime: fileModTime2, oid: object.ID{}, owner: fileOwnerinfo2, mode: 0o777}, content: "abcdefghij"}, + ) + + c, err := diff.NewComparer(&buf) + require.NoError(t, err) + + t.Cleanup(func() { + _ = c.Close() + }) + + expectedStats := diff.Stats{ + FileEntries: diff.EntryTypeStats{ + SameContentButDifferentMetadata: 1, + SameContentButDifferentModificationTime: 1, + SameContentButDifferentMode: 1, + SameContentButDifferentUserOwner: 1, + SameContentButDifferentGroupOwner: 1, + }, + } + + actualStats, err := c.Compare(ctx, dir1, dir2) + + require.NoError(t, err) + require.Empty(t, buf.String()) + require.Equal(t, expectedStats, actualStats) +} + +func TestCompareIdenticalDirectoriesWithDiffDirectoryMetadata(t *testing.T) { + var buf bytes.Buffer + + ctx := context.Background() + + dirModTime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + dirModTime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC) + + dirOwnerInfo1 := fs.OwnerInfo{UserID: 1000, GroupID: 1000} + dirOwnerInfo2 := fs.OwnerInfo{UserID: 1001, GroupID: 1002} + + dirMode1 := os.FileMode(0o644) + dirMode2 := os.FileMode(0o777) + + fileModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC) + + cid, _ := index.IDFromHash("p", []byte("sdkjfn")) + dirObjectID := object.DirectObjectID(cid) + + dir1 := createTestDirectory( + "testDir1", + dirModTime1, + dirOwnerInfo1, + dirMode1, + dirObjectID, + &testFile{testBaseEntry: testBaseEntry{name: "file1.txt", modtime: fileModTime}, content: "abcdefghij"}, + ) + + dir2 := createTestDirectory( + "testDir2", + dirModTime2, + dirOwnerInfo2, + dirMode2, + dirObjectID, + &testFile{testBaseEntry: testBaseEntry{name: "file1.txt", modtime: fileModTime}, content: "abcdefghij"}, + ) + c, err := diff.NewComparer(&buf) + require.NoError(t, err) + + t.Cleanup(func() { + _ = c.Close() + }) + + expectedStats := diff.Stats{ + DirectoryEntries: diff.EntryTypeStats{ + SameContentButDifferentMetadata: 1, + SameContentButDifferentModificationTime: 1, + SameContentButDifferentMode: 1, + SameContentButDifferentUserOwner: 1, + SameContentButDifferentGroupOwner: 1, + }, + } + + actualStats, err := c.Compare(ctx, dir1, dir2) + + require.NoError(t, err) + require.Empty(t, buf.String()) + require.Equal(t, expectedStats, actualStats) +} + +func createTestDirectory(name string, modtime time.Time, owner fs.OwnerInfo, mode os.FileMode, oid object.ID, files ...fs.Entry) *testDirectory { + return &testDirectory{testBaseEntry: testBaseEntry{modtime: modtime, name: name, owner: owner, mode: mode, oid: oid}, files: files} } diff --git a/tests/end_to_end_test/restore_test.go b/tests/end_to_end_test/restore_test.go index db29ee631..cb699b2a9 100644 --- a/tests/end_to_end_test/restore_test.go +++ b/tests/end_to_end_test/restore_test.go @@ -196,7 +196,7 @@ func compareDirs(t *testing.T, source, restoreDir string) { require.NoError(t, err) cmp.DiffCommand = "cmp" - _ = cmp.Compare(ctx, s, r) + cmp.Compare(ctx, s, r) } } diff --git a/tests/recovery/recovery_test/recovery_test.go b/tests/recovery/recovery_test/recovery_test.go index 747c6d821..d588669be 100644 --- a/tests/recovery/recovery_test/recovery_test.go +++ b/tests/recovery/recovery_test/recovery_test.go @@ -310,7 +310,7 @@ func CompareDirs(t *testing.T, source, destination string) { e2, err := localfs.NewEntry(destination) require.NoError(t, err) - err = c.Compare(ctx, e1, e2) + _, err = c.Compare(ctx, e1, e2) require.NoError(t, err) }