mirror of
https://github.com/kopia/kopia.git
synced 2026-01-11 16:07:51 -05:00
Added functionality to calculate aggregate statistics when comparing what's changed between snapshots using kopia diff Statistics collected during snapshot diff computation includes: - files added/removed/modified - dirs added/removed/modified - files/dirs with metadata changes but same underlying content (OID) Testing approach: Added a test for verifying stats collected when comparing two directories with the same objectID but metadata changes across snapshots (dir mode, dir mod time, dir owner, etc), expectation is all the appropriate dir stats fields are updated. Added another test for verifying stats collected when comparing two directories with similar file contents but the metadata for the files have changed between snapshots but content remains unchanged. Expectation is all the relevant file level stats fields are updated. Existing tests have been updated due to stats now being printed in addition to previous output.
472 lines
13 KiB
Go
472 lines
13 KiB
Go
package diff_test
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"io"
|
|
"os"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/kopia/kopia/fs"
|
|
"github.com/kopia/kopia/internal/diff"
|
|
"github.com/kopia/kopia/repo/content/index"
|
|
"github.com/kopia/kopia/repo/object"
|
|
)
|
|
|
|
var (
|
|
_ fs.Entry = (*testFile)(nil)
|
|
_ fs.Directory = (*testDirectory)(nil)
|
|
)
|
|
|
|
type testBaseEntry struct {
|
|
modtime time.Time
|
|
mode os.FileMode
|
|
name string
|
|
owner fs.OwnerInfo
|
|
oid object.ID
|
|
}
|
|
|
|
func (f *testBaseEntry) IsDir() bool { return false }
|
|
func (f *testBaseEntry) LocalFilesystemPath() string { return f.name }
|
|
func (f *testBaseEntry) Close() {}
|
|
func (f *testBaseEntry) Name() string { return f.name }
|
|
func (f *testBaseEntry) ModTime() time.Time { return f.modtime }
|
|
func (f *testBaseEntry) Sys() interface{} { return nil }
|
|
func (f *testBaseEntry) Owner() fs.OwnerInfo { return f.owner }
|
|
func (f *testBaseEntry) Device() fs.DeviceInfo { return fs.DeviceInfo{Dev: 1} }
|
|
func (f *testBaseEntry) ObjectID() object.ID { return f.oid }
|
|
|
|
func (f *testBaseEntry) Mode() os.FileMode {
|
|
if f.mode == 0 {
|
|
return 0o644
|
|
}
|
|
|
|
return f.mode & ^os.ModeDir
|
|
}
|
|
|
|
type testFile struct {
|
|
testBaseEntry
|
|
content string
|
|
}
|
|
|
|
func (f *testFile) Open(ctx context.Context) (io.Reader, error) {
|
|
return strings.NewReader(f.content), nil
|
|
}
|
|
|
|
func (f *testFile) Size() int64 { return int64(len(f.content)) }
|
|
|
|
type testDirectory struct {
|
|
testBaseEntry
|
|
files []fs.Entry
|
|
}
|
|
|
|
func (d *testDirectory) Iterate(ctx context.Context) (fs.DirectoryIterator, error) {
|
|
return fs.StaticIterator(d.files, nil), nil
|
|
}
|
|
|
|
func (d *testDirectory) SupportsMultipleIterations() bool { return false }
|
|
func (d *testDirectory) IsDir() bool { return true }
|
|
func (d *testDirectory) LocalFilesystemPath() string { return d.name }
|
|
func (d *testDirectory) Size() int64 { return 0 }
|
|
func (d *testDirectory) Readdir(ctx context.Context) ([]fs.Entry, error) { return d.files, nil }
|
|
|
|
func (d *testDirectory) Mode() os.FileMode {
|
|
if d.mode == 0 {
|
|
return os.ModeDir | 0o755
|
|
}
|
|
|
|
return os.ModeDir | d.mode
|
|
}
|
|
|
|
func (d *testDirectory) Child(ctx context.Context, name string) (fs.Entry, error) {
|
|
for _, f := range d.files {
|
|
if f.Name() == name {
|
|
return f, nil
|
|
}
|
|
}
|
|
|
|
return nil, fs.ErrEntryNotFound
|
|
}
|
|
|
|
func TestCompareEmptyDirectories(t *testing.T) {
|
|
var buf bytes.Buffer
|
|
|
|
ctx := context.Background()
|
|
|
|
dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
|
|
dirMode := os.FileMode(0o777)
|
|
|
|
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
|
|
dirObjectID1 := object.DirectObjectID(cid)
|
|
|
|
cid, _ = index.IDFromHash("i", []byte("dfjlgn"))
|
|
dirObjectID2 := object.DirectObjectID(cid)
|
|
|
|
dir1 := createTestDirectory("testDir1", dirModTime, dirOwnerInfo, dirMode, dirObjectID1)
|
|
dir2 := createTestDirectory("testDir2", dirModTime, dirOwnerInfo, dirMode, dirObjectID2)
|
|
|
|
c, err := diff.NewComparer(&buf)
|
|
require.NoError(t, err)
|
|
|
|
t.Cleanup(func() {
|
|
_ = c.Close()
|
|
})
|
|
|
|
expectedStats := diff.Stats{}
|
|
actualStats, err := c.Compare(ctx, dir1, dir2)
|
|
|
|
require.NoError(t, err)
|
|
require.Empty(t, buf.String())
|
|
require.Equal(t, expectedStats, actualStats)
|
|
}
|
|
|
|
func TestCompareIdenticalDirectories(t *testing.T) {
|
|
var buf bytes.Buffer
|
|
|
|
ctx := context.Background()
|
|
|
|
dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
|
|
dirMode := os.FileMode(0o777)
|
|
fileModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
|
|
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
|
|
dirObjectID1 := object.DirectObjectID(cid)
|
|
|
|
cid, _ = index.IDFromHash("i", []byte("dfjlgn"))
|
|
dirObjectID2 := object.DirectObjectID(cid)
|
|
|
|
file1 := &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file1.txt"}, content: "abcdefghij"}
|
|
file2 := &testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"}
|
|
|
|
dir1 := createTestDirectory(
|
|
"testDir1",
|
|
dirModTime,
|
|
dirOwnerInfo,
|
|
dirMode,
|
|
dirObjectID1,
|
|
file1,
|
|
file2,
|
|
)
|
|
dir2 := createTestDirectory(
|
|
"testDir2",
|
|
dirModTime,
|
|
dirOwnerInfo,
|
|
dirMode,
|
|
dirObjectID2,
|
|
file1,
|
|
file2,
|
|
)
|
|
|
|
expectedStats := diff.Stats{}
|
|
|
|
c, err := diff.NewComparer(&buf)
|
|
require.NoError(t, err)
|
|
|
|
t.Cleanup(func() {
|
|
_ = c.Close()
|
|
})
|
|
|
|
actualStats, err := c.Compare(ctx, dir1, dir2)
|
|
|
|
require.NoError(t, err)
|
|
require.Empty(t, buf.String())
|
|
require.Equal(t, expectedStats, actualStats)
|
|
}
|
|
|
|
func TestCompareDifferentDirectories(t *testing.T) {
|
|
var buf bytes.Buffer
|
|
|
|
ctx := context.Background()
|
|
|
|
dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
fileModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
|
|
dirMode := os.FileMode(0o777)
|
|
|
|
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
|
|
dirObjectID1 := object.DirectObjectID(cid)
|
|
|
|
cid, _ = index.IDFromHash("i", []byte("dfjlgn"))
|
|
dirObjectID2 := object.DirectObjectID(cid)
|
|
|
|
dir1 := createTestDirectory(
|
|
"testDir1",
|
|
dirModTime,
|
|
dirOwnerInfo,
|
|
dirMode,
|
|
dirObjectID1,
|
|
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file1.txt"}, content: "abcdefghij"},
|
|
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"},
|
|
)
|
|
dir2 := createTestDirectory(
|
|
"testDir2",
|
|
dirModTime,
|
|
dirOwnerInfo,
|
|
dirMode,
|
|
dirObjectID2,
|
|
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file3.txt"}, content: "abcdefghij1"},
|
|
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file4.txt"}, content: "klmnopqrstuvwxyz2"},
|
|
)
|
|
|
|
c, err := diff.NewComparer(&buf)
|
|
require.NoError(t, err)
|
|
|
|
t.Cleanup(func() {
|
|
_ = c.Close()
|
|
})
|
|
|
|
expectedStats := diff.Stats{}
|
|
expectedStats.FileEntries.Added = 2
|
|
expectedStats.FileEntries.Removed = 2
|
|
|
|
expectedOutput := "added file ./file3.txt (11 bytes)\nadded file ./file4.txt (17 bytes)\n" +
|
|
"removed file ./file1.txt (10 bytes)\n" +
|
|
"removed file ./file2.txt (16 bytes)\n"
|
|
|
|
actualStats, err := c.Compare(ctx, dir1, dir2)
|
|
|
|
require.NoError(t, err)
|
|
require.Equal(t, expectedStats, actualStats)
|
|
require.Equal(t, expectedOutput, buf.String())
|
|
}
|
|
|
|
func TestCompareDifferentDirectories_DirTimeDiff(t *testing.T) {
|
|
var buf bytes.Buffer
|
|
|
|
ctx := context.Background()
|
|
|
|
fileModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
dirModTime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
dirModTime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
|
|
dirMode := os.FileMode(0o777)
|
|
|
|
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
|
|
dirObjectID1 := object.DirectObjectID(cid)
|
|
|
|
cid, _ = index.IDFromHash("i", []byte("dfjlgn"))
|
|
dirObjectID2 := object.DirectObjectID(cid)
|
|
|
|
dir1 := createTestDirectory(
|
|
"testDir1",
|
|
dirModTime1,
|
|
dirOwnerInfo,
|
|
dirMode,
|
|
dirObjectID1,
|
|
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file1.txt"}, content: "abcdefghij"},
|
|
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"},
|
|
)
|
|
dir2 := createTestDirectory(
|
|
"testDir2",
|
|
dirModTime2,
|
|
dirOwnerInfo,
|
|
dirMode,
|
|
dirObjectID2,
|
|
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file1.txt"}, content: "abcdefghij"},
|
|
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime, name: "file2.txt"}, content: "klmnopqrstuvwxyz"},
|
|
)
|
|
|
|
expectedStats := diff.Stats{}
|
|
expectedStats.DirectoryEntries.Modified = 1
|
|
|
|
c, err := diff.NewComparer(&buf)
|
|
require.NoError(t, err)
|
|
|
|
t.Cleanup(func() {
|
|
_ = c.Close()
|
|
})
|
|
|
|
expectedOutput := ". modification times differ: 2023-04-12 10:30:00 +0000 UTC 2022-04-12 10:30:00 +0000 UTC\n"
|
|
actualStats, err := c.Compare(ctx, dir1, dir2)
|
|
|
|
require.NoError(t, err)
|
|
require.Equal(t, expectedOutput, buf.String())
|
|
require.Equal(t, expectedStats, actualStats)
|
|
}
|
|
|
|
func TestCompareDifferentDirectories_FileTimeDiff(t *testing.T) {
|
|
var buf bytes.Buffer
|
|
|
|
ctx := context.Background()
|
|
|
|
fileModTime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
fileModTime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
|
|
dirMode := os.FileMode(0o700)
|
|
|
|
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
|
|
OID1 := object.DirectObjectID(cid)
|
|
|
|
cid, _ = index.IDFromHash("i", []byte("hvhjb"))
|
|
OID2 := object.DirectObjectID(cid)
|
|
|
|
dir1 := createTestDirectory(
|
|
"testDir1",
|
|
dirModTime,
|
|
dirOwnerInfo,
|
|
dirMode,
|
|
OID1,
|
|
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime1, name: "file1.txt", oid: OID1}, content: "abcdefghij"},
|
|
)
|
|
dir2 := createTestDirectory(
|
|
"testDir2",
|
|
dirModTime,
|
|
dirOwnerInfo,
|
|
dirMode,
|
|
OID2,
|
|
&testFile{testBaseEntry: testBaseEntry{modtime: fileModTime2, name: "file1.txt", oid: OID2}, content: "abcdefghij"},
|
|
)
|
|
|
|
c, err := diff.NewComparer(&buf)
|
|
require.NoError(t, err)
|
|
|
|
t.Cleanup(func() {
|
|
_ = c.Close()
|
|
})
|
|
|
|
expectedStats := diff.Stats{}
|
|
expectedStats.FileEntries.Modified = 1
|
|
|
|
expectedOutput := "./file1.txt modification times differ: 2023-04-12 10:30:00 +0000 UTC 2022-04-12 10:30:00 +0000 UTC\n"
|
|
|
|
actualStats, err := c.Compare(ctx, dir1, dir2)
|
|
|
|
require.NoError(t, err)
|
|
require.Equal(t, expectedOutput, buf.String())
|
|
require.Equal(t, expectedStats, actualStats)
|
|
}
|
|
|
|
func TestCompareFileWithIdenticalContentsButDiffFileMetadata(t *testing.T) {
|
|
var buf bytes.Buffer
|
|
|
|
ctx := context.Background()
|
|
|
|
fileModTime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
fileModTime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
|
|
fileOwnerinfo1 := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
|
|
fileOwnerinfo2 := fs.OwnerInfo{UserID: 1001, GroupID: 1002}
|
|
|
|
dirOwnerInfo := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
|
|
dirMode := os.FileMode(0o777)
|
|
dirModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
|
|
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
|
|
dirObjectID1 := object.DirectObjectID(cid)
|
|
|
|
cid, _ = index.IDFromHash("i", []byte("dfjlgn"))
|
|
dirObjectID2 := object.DirectObjectID(cid)
|
|
|
|
dir1 := createTestDirectory(
|
|
"testDir1",
|
|
dirModTime,
|
|
dirOwnerInfo,
|
|
dirMode,
|
|
dirObjectID1,
|
|
&testFile{testBaseEntry: testBaseEntry{name: "file1.txt", modtime: fileModTime1, oid: object.ID{}, owner: fileOwnerinfo1, mode: 0o700}, content: "abcdefghij"},
|
|
)
|
|
|
|
dir2 := createTestDirectory(
|
|
"testDir2",
|
|
dirModTime,
|
|
dirOwnerInfo,
|
|
dirMode,
|
|
dirObjectID2,
|
|
&testFile{testBaseEntry: testBaseEntry{name: "file1.txt", modtime: fileModTime2, oid: object.ID{}, owner: fileOwnerinfo2, mode: 0o777}, content: "abcdefghij"},
|
|
)
|
|
|
|
c, err := diff.NewComparer(&buf)
|
|
require.NoError(t, err)
|
|
|
|
t.Cleanup(func() {
|
|
_ = c.Close()
|
|
})
|
|
|
|
expectedStats := diff.Stats{
|
|
FileEntries: diff.EntryTypeStats{
|
|
SameContentButDifferentMetadata: 1,
|
|
SameContentButDifferentModificationTime: 1,
|
|
SameContentButDifferentMode: 1,
|
|
SameContentButDifferentUserOwner: 1,
|
|
SameContentButDifferentGroupOwner: 1,
|
|
},
|
|
}
|
|
|
|
actualStats, err := c.Compare(ctx, dir1, dir2)
|
|
|
|
require.NoError(t, err)
|
|
require.Empty(t, buf.String())
|
|
require.Equal(t, expectedStats, actualStats)
|
|
}
|
|
|
|
func TestCompareIdenticalDirectoriesWithDiffDirectoryMetadata(t *testing.T) {
|
|
var buf bytes.Buffer
|
|
|
|
ctx := context.Background()
|
|
|
|
dirModTime1 := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
dirModTime2 := time.Date(2022, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
|
|
dirOwnerInfo1 := fs.OwnerInfo{UserID: 1000, GroupID: 1000}
|
|
dirOwnerInfo2 := fs.OwnerInfo{UserID: 1001, GroupID: 1002}
|
|
|
|
dirMode1 := os.FileMode(0o644)
|
|
dirMode2 := os.FileMode(0o777)
|
|
|
|
fileModTime := time.Date(2023, time.April, 12, 10, 30, 0, 0, time.UTC)
|
|
|
|
cid, _ := index.IDFromHash("p", []byte("sdkjfn"))
|
|
dirObjectID := object.DirectObjectID(cid)
|
|
|
|
dir1 := createTestDirectory(
|
|
"testDir1",
|
|
dirModTime1,
|
|
dirOwnerInfo1,
|
|
dirMode1,
|
|
dirObjectID,
|
|
&testFile{testBaseEntry: testBaseEntry{name: "file1.txt", modtime: fileModTime}, content: "abcdefghij"},
|
|
)
|
|
|
|
dir2 := createTestDirectory(
|
|
"testDir2",
|
|
dirModTime2,
|
|
dirOwnerInfo2,
|
|
dirMode2,
|
|
dirObjectID,
|
|
&testFile{testBaseEntry: testBaseEntry{name: "file1.txt", modtime: fileModTime}, content: "abcdefghij"},
|
|
)
|
|
c, err := diff.NewComparer(&buf)
|
|
require.NoError(t, err)
|
|
|
|
t.Cleanup(func() {
|
|
_ = c.Close()
|
|
})
|
|
|
|
expectedStats := diff.Stats{
|
|
DirectoryEntries: diff.EntryTypeStats{
|
|
SameContentButDifferentMetadata: 1,
|
|
SameContentButDifferentModificationTime: 1,
|
|
SameContentButDifferentMode: 1,
|
|
SameContentButDifferentUserOwner: 1,
|
|
SameContentButDifferentGroupOwner: 1,
|
|
},
|
|
}
|
|
|
|
actualStats, err := c.Compare(ctx, dir1, dir2)
|
|
|
|
require.NoError(t, err)
|
|
require.Empty(t, buf.String())
|
|
require.Equal(t, expectedStats, actualStats)
|
|
}
|
|
|
|
func createTestDirectory(name string, modtime time.Time, owner fs.OwnerInfo, mode os.FileMode, oid object.ID, files ...fs.Entry) *testDirectory {
|
|
return &testDirectory{testBaseEntry: testBaseEntry{modtime: modtime, name: name, owner: owner, mode: mode, oid: oid}, files: files}
|
|
}
|