Files
kopia/internal/diff/diff_test.go
Rohit-BM18 9b68189d29 feat(cli): compute snapshot diff stats (#4444)
Added functionality to calculate aggregate statistics when
comparing what's changed between snapshots using kopia diff

Statistics collected during snapshot diff computation includes:

- files added/removed/modified
- dirs added/removed/modified
- files/dirs with metadata changes but same underlying content (OID)

Testing approach:

Added a test for verifying stats collected when comparing two directories with the same objectID but metadata changes across snapshots (dir mode, dir mod time, dir owner, etc), expectation is all the appropriate dir stats fields are updated.
Added another test for verifying stats collected when comparing two directories with similar file contents but the metadata for the files have changed between snapshots but content remains unchanged. Expectation is all the relevant file level stats fields are updated.
Existing tests have been updated due to stats now being printed in addition to previous output.
2025-03-07 12:41:14 +00:00

472 lines
13 KiB
Go

package diff_test
import (
"bytes"
"context"
"io"
"os"
"strings"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/kopia/kopia/fs"
"github.com/kopia/kopia/internal/diff"
"github.com/kopia/kopia/repo/content/index"
"github.com/kopia/kopia/repo/object"
)
var (
_ fs.Entry = (*testFile)(nil)
_ fs.Directory = (*testDirectory)(nil)
)
type testBaseEntry struct {
modtime time.Time
mode os.FileMode
name string
owner fs.OwnerInfo
oid object.ID
}
func (f *testBaseEntry) IsDir() bool { return false }
func (f *testBaseEntry) LocalFilesystemPath() string { return f.name }
func (f *testBaseEntry) Close() {}
func (f *testBaseEntry) Name() string { return f.name }
func (f *testBaseEntry) ModTime() time.Time { return f.modtime }
func (f *testBaseEntry) Sys() interface{} { return nil }
func (f *testBaseEntry) Owner() fs.OwnerInfo { return f.owner }
func (f *testBaseEntry) Device() fs.DeviceInfo { return fs.DeviceInfo{Dev: 1} }
func (f *testBaseEntry) ObjectID() object.ID { return f.oid }
func (f *testBaseEntry) Mode() os.FileMode {
if f.mode == 0 {
return 0o644
}
return f.mode & ^os.ModeDir
}
type testFile struct {
testBaseEntry
content string
}
func (f *testFile) Open(ctx context.Context) (io.Reader, error) {
return strings.NewReader(f.content), nil
}
func (f *testFile) Size() int64 { return int64(len(f.content)) }
type testDirectory struct {
testBaseEntry
files []fs.Entry
}
func (d *testDirectory) Iterate(ctx context.Context) (fs.DirectoryIterator, error) {
return fs.StaticIterator(d.files, nil), nil
}
func (d *testDirectory) SupportsMultipleIterations() bool { return false }
func (d *testDirectory) IsDir() bool { return true }
func (d *testDirectory) LocalFilesystemPath() string { return d.name }
func (d *testDirectory) Size() int64 { return 0 }
func (d *testDirectory) Readdir(ctx context.Context) ([]fs.Entry, error) { return d.files, nil }
func (d *testDirectory) Mode() os.FileMode {
if d.mode == 0 {
return os.ModeDir | 0o755
}
return os.ModeDir | d.mode
}
func (d *testDirectory) Child(ctx context.Context, name string) (fs.Entry, error) {
for _, f := range d.files {
if f.Name() == name {
return f, nil
}
}
return nil, fs.ErrEntryNotFound
}
func TestCompareEmptyDirectories(t *testing.T) {
var buf bytes.Buffer
ctx := context.Background()
dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
dirMode := os.FileMode(0o777)
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
dirObjectID1 := object.DirectObjectID(cid)
cid, _ = index.IDFromHash("i", []byte("dfjlgn"))
dirObjectID2 := object.DirectObjectID(cid)
dir1 := createTestDirectory("testDir1", dirModTime, dirOwnerInfo, dirMode, dirObjectID1)
dir2 := createTestDirectory("testDir2", dirModTime, dirOwnerInfo, dirMode, dirObjectID2)
c, err := diff.NewComparer(&buf)
require.NoError(t, err)
t.Cleanup(func() {
_ = c.Close()
})
expectedStats := diff.Stats{}
actualStats, err := c.Compare(ctx, dir1, dir2)
require.NoError(t, err)
require.Empty(t, buf.String())
require.Equal(t, expectedStats, actualStats)
}
func TestCompareIdenticalDirectories(t *testing.T) {
var buf bytes.Buffer
ctx := context.Background()
dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
dirMode := os.FileMode(0o777)
fileModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
dirObjectID1 := object.DirectObjectID(cid)
cid, _ = index.IDFromHash("i", []byte("dfjlgn"))
dirObjectID2 := object.DirectObjectID(cid)
file1 := &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file1.txt"}, content: "abcdefghij"}
file2 := &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"}
dir1 := createTestDirectory(
"testDir1",
dirModTime,
dirOwnerInfo,
dirMode,
dirObjectID1,
file1,
file2,
)
dir2 := createTestDirectory(
"testDir2",
dirModTime,
dirOwnerInfo,
dirMode,
dirObjectID2,
file1,
file2,
)
expectedStats := diff.Stats{}
c, err := diff.NewComparer(&buf)
require.NoError(t, err)
t.Cleanup(func() {
_ = c.Close()
})
actualStats, err := c.Compare(ctx, dir1, dir2)
require.NoError(t, err)
require.Empty(t, buf.String())
require.Equal(t, expectedStats, actualStats)
}
func TestCompareDifferentDirectories(t *testing.T) {
var buf bytes.Buffer
ctx := context.Background()
dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
fileModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
dirMode := os.FileMode(0o777)
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
dirObjectID1 := object.DirectObjectID(cid)
cid, _ = index.IDFromHash("i", []byte("dfjlgn"))
dirObjectID2 := object.DirectObjectID(cid)
dir1 := createTestDirectory(
"testDir1",
dirModTime,
dirOwnerInfo,
dirMode,
dirObjectID1,
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file1.txt"}, content: "abcdefghij"},
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"},
)
dir2 := createTestDirectory(
"testDir2",
dirModTime,
dirOwnerInfo,
dirMode,
dirObjectID2,
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file3.txt"}, content: "abcdefghij1"},
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file4.txt"}, content: "klmnopqrstuvwxyz2"},
)
c, err := diff.NewComparer(&buf)
require.NoError(t, err)
t.Cleanup(func() {
_ = c.Close()
})
expectedStats := diff.Stats{}
expectedStats.FileEntries.Added = 2
expectedStats.FileEntries.Removed = 2
expectedOutput := "added file ./file3.txt (11 bytes)\nadded file ./file4.txt (17 bytes)\n" +
"removed file ./file1.txt (10 bytes)\n" +
"removed file ./file2.txt (16 bytes)\n"
actualStats, err := c.Compare(ctx, dir1, dir2)
require.NoError(t, err)
require.Equal(t, expectedStats, actualStats)
require.Equal(t, expectedOutput, buf.String())
}
func TestCompareDifferentDirectories_DirTimeDiff(t *testing.T) {
var buf bytes.Buffer
ctx := context.Background()
fileModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
dirModTime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
dirModTime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC)
dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
dirMode := os.FileMode(0o777)
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
dirObjectID1 := object.DirectObjectID(cid)
cid, _ = index.IDFromHash("i", []byte("dfjlgn"))
dirObjectID2 := object.DirectObjectID(cid)
dir1 := createTestDirectory(
"testDir1",
dirModTime1,
dirOwnerInfo,
dirMode,
dirObjectID1,
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file1.txt"}, content: "abcdefghij"},
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"},
)
dir2 := createTestDirectory(
"testDir2",
dirModTime2,
dirOwnerInfo,
dirMode,
dirObjectID2,
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file1.txt"}, content: "abcdefghij"},
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"},
)
expectedStats := diff.Stats{}
expectedStats.DirectoryEntries.Modified = 1
c, err := diff.NewComparer(&buf)
require.NoError(t, err)
t.Cleanup(func() {
_ = c.Close()
})
expectedOutput := ". modification times differ: 2023-04-12 10:30:00 +0000 UTC 2022-04-12 10:30:00 +0000 UTC\n"
actualStats, err := c.Compare(ctx, dir1, dir2)
require.NoError(t, err)
require.Equal(t, expectedOutput, buf.String())
require.Equal(t, expectedStats, actualStats)
}
func TestCompareDifferentDirectories_FileTimeDiff(t *testing.T) {
var buf bytes.Buffer
ctx := context.Background()
fileModTime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
fileModTime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC)
dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
dirMode := os.FileMode(0o700)
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
OID1 := object.DirectObjectID(cid)
cid, _ = index.IDFromHash("i", []byte("hvhjb"))
OID2 := object.DirectObjectID(cid)
dir1 := createTestDirectory(
"testDir1",
dirModTime,
dirOwnerInfo,
dirMode,
OID1,
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime1, name: "file1.txt", oid: OID1}, content: "abcdefghij"},
)
dir2 := createTestDirectory(
"testDir2",
dirModTime,
dirOwnerInfo,
dirMode,
OID2,
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime2, name: "file1.txt", oid: OID2}, content: "abcdefghij"},
)
c, err := diff.NewComparer(&buf)
require.NoError(t, err)
t.Cleanup(func() {
_ = c.Close()
})
expectedStats := diff.Stats{}
expectedStats.FileEntries.Modified = 1
expectedOutput := "./file1.txt modification times differ: 2023-04-12 10:30:00 +0000 UTC 2022-04-12 10:30:00 +0000 UTC\n"
actualStats, err := c.Compare(ctx, dir1, dir2)
require.NoError(t, err)
require.Equal(t, expectedOutput, buf.String())
require.Equal(t, expectedStats, actualStats)
}
func TestCompareFileWithIdenticalContentsButDiffFileMetadata(t *testing.T) {
var buf bytes.Buffer
ctx := context.Background()
fileModTime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
fileModTime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC)
fileOwnerinfo1 := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
fileOwnerinfo2 := fs.OwnerInfo{UserID: 1001, GroupID: 1002}
dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
dirMode := os.FileMode(0o777)
dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
dirObjectID1 := object.DirectObjectID(cid)
cid, _ = index.IDFromHash("i", []byte("dfjlgn"))
dirObjectID2 := object.DirectObjectID(cid)
dir1 := createTestDirectory(
"testDir1",
dirModTime,
dirOwnerInfo,
dirMode,
dirObjectID1,
&testFile{testBaseEntry: testBaseEntry{name: "file1.txt", modtime: fileModTime1, oid: object.ID{}, owner: fileOwnerinfo1, mode: 0o700}, content: "abcdefghij"},
)
dir2 := createTestDirectory(
"testDir2",
dirModTime,
dirOwnerInfo,
dirMode,
dirObjectID2,
&testFile{testBaseEntry: testBaseEntry{name: "file1.txt", modtime: fileModTime2, oid: object.ID{}, owner: fileOwnerinfo2, mode: 0o777}, content: "abcdefghij"},
)
c, err := diff.NewComparer(&buf)
require.NoError(t, err)
t.Cleanup(func() {
_ = c.Close()
})
expectedStats := diff.Stats{
FileEntries: diff.EntryTypeStats{
SameContentButDifferentMetadata: 1,
SameContentButDifferentModificationTime: 1,
SameContentButDifferentMode: 1,
SameContentButDifferentUserOwner: 1,
SameContentButDifferentGroupOwner: 1,
},
}
actualStats, err := c.Compare(ctx, dir1, dir2)
require.NoError(t, err)
require.Empty(t, buf.String())
require.Equal(t, expectedStats, actualStats)
}
func TestCompareIdenticalDirectoriesWithDiffDirectoryMetadata(t *testing.T) {
var buf bytes.Buffer
ctx := context.Background()
dirModTime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
dirModTime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC)
dirOwnerInfo1 := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
dirOwnerInfo2 := fs.OwnerInfo{UserID: 1001, GroupID: 1002}
dirMode1 := os.FileMode(0o644)
dirMode2 := os.FileMode(0o777)
fileModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
dirObjectID := object.DirectObjectID(cid)
dir1 := createTestDirectory(
"testDir1",
dirModTime1,
dirOwnerInfo1,
dirMode1,
dirObjectID,
&testFile{testBaseEntry: testBaseEntry{name: "file1.txt", modtime: fileModTime}, content: "abcdefghij"},
)
dir2 := createTestDirectory(
"testDir2",
dirModTime2,
dirOwnerInfo2,
dirMode2,
dirObjectID,
&testFile{testBaseEntry: testBaseEntry{name: "file1.txt", modtime: fileModTime}, content: "abcdefghij"},
)
c, err := diff.NewComparer(&buf)
require.NoError(t, err)
t.Cleanup(func() {
_ = c.Close()
})
expectedStats := diff.Stats{
DirectoryEntries: diff.EntryTypeStats{
SameContentButDifferentMetadata: 1,
SameContentButDifferentModificationTime: 1,
SameContentButDifferentMode: 1,
SameContentButDifferentUserOwner: 1,
SameContentButDifferentGroupOwner: 1,
},
}
actualStats, err := c.Compare(ctx, dir1, dir2)
require.NoError(t, err)
require.Empty(t, buf.String())
require.Equal(t, expectedStats, actualStats)
}
func createTestDirectory(name string, modtime time.Time, owner fs.OwnerInfo, mode os.FileMode, oid object.ID, files ...fs.Entry) *testDirectory {
return &testDirectory{testBaseEntry: testBaseEntry{modtime: modtime, name: name, owner: owner, mode: mode, oid: oid}, files: files}
}