Files
kopia/cli/command_snapshot_fix_test.go
Jarek Kowalski 9bf9cac7fb refactor(repository): ensure we always parse content.ID and object.ID (#1960)
* refactor(repository): ensure we always parse content.ID and object.ID

This changes the types to be incompatible with string to prevent direct
conversion to and from string.

This has the additional benefit of reducing number of memory allocations
and bytes for all IDs.

content.ID went from 2 allocations to 1:
   typical case 32 characters + 16 bytes per-string overhead
   worst-case 65 characters + 16 bytes per-string overhead
   now: 34 bytes

object.ID went from 2 allocations to 1:
   typical case 32 characters + 16 bytes per-string overhead
   worst-case 65 characters + 16 bytes per-string overhead
   now: 36 bytes

* move index.{ID,IDRange} methods to separate files

* replaced index.IDFromHash with content.IDFromHash externally

* minor tweaks and additional tests

* Update repo/content/index/id_test.go

Co-authored-by: Julio Lopez <1953782+julio-lopez@users.noreply.github.com>

* Update repo/content/index/id_test.go

Co-authored-by: Julio Lopez <1953782+julio-lopez@users.noreply.github.com>

* pr feedback

* post-merge fixes

* pr feedback

* pr feedback

* fixed subtle regression in sortedContents()

This was actually not producing invalid results because of how base36
works, just not sorting as efficiently as it could.

Co-authored-by: Julio Lopez <1953782+julio-lopez@users.noreply.github.com>
2022-05-25 14:15:56 +00:00

441 lines
14 KiB
Go

package cli_test
import (
"bytes"
"os"
"path/filepath"
"sort"
"strings"
"testing"
"github.com/stretchr/testify/require"
"github.com/kopia/kopia/internal/testutil"
"github.com/kopia/kopia/repo/content"
"github.com/kopia/kopia/repo/object"
"github.com/kopia/kopia/snapshot"
"github.com/kopia/kopia/snapshot/snapshotfs"
"github.com/kopia/kopia/tests/testenv"
)
// nolint:maintidx
func TestSnapshotFix(t *testing.T) {
srcDir1 := testutil.TempDirectory(t)
if testutil.ShouldReduceTestComplexity() {
return
}
// 300 bytes
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "small-file1"), 1, bytes.Repeat([]byte{1, 2, 3}, 100))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "small-file1-dup"), 1, bytes.Repeat([]byte{1, 2, 3}, 100))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "small-file2"), 1, bytes.Repeat([]byte{1, 2, 4}, 100))
require.NoError(t, os.MkdirAll(filepath.Join(srcDir1, "dir1"), 0o700))
require.NoError(t, os.MkdirAll(filepath.Join(srcDir1, "dir2"), 0o700))
// 3 x 3 x 1_000_000 bytes = 9 MB
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "large-file1"), 3, bytes.Repeat([]byte{1, 2, 3}, 1000000))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "large-file1-dup"), 3, bytes.Repeat([]byte{1, 2, 3}, 1000000))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "large-file2"), 3, bytes.Repeat([]byte{1, 2, 4}, 1000000))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "dir1", "small-file1"), 1, bytes.Repeat([]byte{1, 1, 2, 3}, 100))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "dir1", "small-file1-dup"), 1, bytes.Repeat([]byte{1, 1, 2, 3}, 100))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "dir1", "small-file2"), 1, bytes.Repeat([]byte{1, 1, 2, 4}, 100))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "dir1", "large-file1"), 3, bytes.Repeat([]byte{1, 1, 2, 3}, 1000000))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "dir1", "large-file1-dup"), 3, bytes.Repeat([]byte{1, 1, 2, 3}, 1000000))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "dir1", "large-file2"), 3, bytes.Repeat([]byte{1, 1, 2, 4}, 1000000))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "dir2", "small-file1"), 1, bytes.Repeat([]byte{2, 1, 2, 3}, 100))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "dir2", "small-file1-dup"), 1, bytes.Repeat([]byte{2, 1, 2, 3}, 100))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "dir2", "small-file2"), 1, bytes.Repeat([]byte{2, 1, 2, 4}, 100))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "dir2", "large-file1"), 3, bytes.Repeat([]byte{2, 1, 2, 3}, 1000000))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "dir2", "large-file1-dup"), 3, bytes.Repeat([]byte{2, 1, 2, 3}, 1000000))
mustWriteFileWithRepeatedData(t, filepath.Join(srcDir1, "dir2", "large-file2"), 3, bytes.Repeat([]byte{2, 1, 2, 4}, 1000000))
cases := []struct {
name string
flags []string
modifyRepoAfterSnapshot func(env *testenv.CLITest, man *snapshot.Manifest, fileMap map[string]*snapshot.DirEntry)
initiallyCorrupted bool
wantRecoveredFiles []string
wantRootStub bool
wantFixFail bool
wantFailVerify bool
}{
{
name: "FixInvalidFiles_NoOp",
modifyRepoAfterSnapshot: func(env *testenv.CLITest, man *snapshot.Manifest, fileMap map[string]*snapshot.DirEntry) {},
flags: []string{"invalid-files"},
wantRecoveredFiles: []string{
"dir1",
"dir1/large-file1",
"dir1/large-file1-dup",
"dir1/large-file2",
"dir1/small-file1",
"dir1/small-file1-dup",
"dir1/small-file2",
"dir2",
"dir2/large-file1",
"dir2/large-file1-dup",
"dir2/large-file2",
"dir2/small-file1",
"dir2/small-file1-dup",
"dir2/small-file2",
"large-file1",
"large-file1-dup",
"large-file2",
"small-file1",
"small-file1-dup",
"small-file2",
},
},
{
name: "FixInvalidFiles_MissingRootDirStub",
modifyRepoAfterSnapshot: func(env *testenv.CLITest, man *snapshot.Manifest, fileMap map[string]*snapshot.DirEntry) {
forgetContents(t, env,
man.RootObjectID().String())
},
initiallyCorrupted: true,
flags: []string{"invalid-files"},
wantRootStub: true,
},
{
name: "FixInvalidFiles_MissingRootDirFail",
modifyRepoAfterSnapshot: func(env *testenv.CLITest, man *snapshot.Manifest, fileMap map[string]*snapshot.DirEntry) {
forgetContents(t, env,
man.RootObjectID().String())
},
initiallyCorrupted: true,
flags: []string{"invalid-files", "--invalid-directory-handling=fail"},
wantFixFail: true,
},
{
name: "FixInvalidFiles_MissingRootDirKeep",
modifyRepoAfterSnapshot: func(env *testenv.CLITest, man *snapshot.Manifest, fileMap map[string]*snapshot.DirEntry) {
forgetContents(t, env,
man.RootObjectID().String())
},
initiallyCorrupted: true,
flags: []string{"invalid-files", "--invalid-directory-handling=keep"},
wantFailVerify: true,
},
{
name: "FixInvalidFiles_MissingShortContentFileRemove",
modifyRepoAfterSnapshot: func(env *testenv.CLITest, man *snapshot.Manifest, fileMap map[string]*snapshot.DirEntry) {
forgetContents(t, env,
fileMap["small-file1"].ObjectID.String(),
fileMap["dir1/small-file1"].ObjectID.String())
},
initiallyCorrupted: true,
// recovered files
flags: []string{"invalid-files", "--invalid-file-handling=remove"},
wantRecoveredFiles: []string{
"dir1",
"dir1/large-file1",
"dir1/large-file1-dup",
"dir1/large-file2",
"dir1/small-file2",
"dir2",
"dir2/large-file1",
"dir2/large-file1-dup",
"dir2/large-file2",
"dir2/small-file1",
"dir2/small-file1-dup",
"dir2/small-file2",
"large-file1",
"large-file1-dup",
"large-file2",
"small-file2",
},
},
{
name: "FixInvalidFiles_MissingShortContentFileStub",
modifyRepoAfterSnapshot: func(env *testenv.CLITest, man *snapshot.Manifest, fileMap map[string]*snapshot.DirEntry) {
forgetContents(t, env,
fileMap["small-file1"].ObjectID.String(),
fileMap["dir1/small-file1"].ObjectID.String())
},
initiallyCorrupted: true,
flags: []string{"invalid-files"},
// recovered files
wantRecoveredFiles: []string{
".INVALID.small-file1",
".INVALID.small-file1-dup",
"dir1",
"dir1/.INVALID.small-file1",
"dir1/.INVALID.small-file1-dup",
"dir1/large-file1",
"dir1/large-file1-dup",
"dir1/large-file2",
"dir1/small-file2",
"dir2",
"dir2/large-file1",
"dir2/large-file1-dup",
"dir2/large-file2",
"dir2/small-file1",
"dir2/small-file1-dup",
"dir2/small-file2",
"large-file1",
"large-file1-dup",
"large-file2",
"small-file2",
},
},
{
name: "FixInvalidFiles_MissingShortContentFileKeep",
modifyRepoAfterSnapshot: func(env *testenv.CLITest, man *snapshot.Manifest, fileMap map[string]*snapshot.DirEntry) {
forgetContents(t, env,
fileMap["small-file1"].ObjectID.String(),
fileMap["dir1/small-file1"].ObjectID.String())
},
initiallyCorrupted: true,
flags: []string{"invalid-files", "--invalid-file-handling=keep"},
wantFailVerify: true,
},
{
name: "FixInvalidFiles_MissingShortContentDir",
modifyRepoAfterSnapshot: func(env *testenv.CLITest, man *snapshot.Manifest, fileMap map[string]*snapshot.DirEntry) {
forgetContents(t, env,
fileMap["dir1"].ObjectID.String())
},
initiallyCorrupted: true,
flags: []string{"invalid-files", "--invalid-directory-handling=stub"},
wantRecoveredFiles: []string{
".INVALID.dir1",
"dir2",
"dir2/large-file1",
"dir2/large-file1-dup",
"dir2/large-file2",
"dir2/small-file1",
"dir2/small-file1-dup",
"dir2/small-file2",
"large-file1",
"large-file1-dup",
"large-file2",
"small-file1",
"small-file1-dup",
"small-file2",
},
},
{
name: "FixInvalidFiles_MissingLargeFileIndex",
modifyRepoAfterSnapshot: func(env *testenv.CLITest, man *snapshot.Manifest, fileMap map[string]*snapshot.DirEntry) {
forgetContents(t, env,
strings.TrimPrefix(fileMap["large-file1"].ObjectID.String(), "I"),
strings.TrimPrefix(fileMap["dir1/large-file1"].ObjectID.String(), "I"))
},
initiallyCorrupted: true,
flags: []string{"invalid-files", "--invalid-file-handling=remove"},
wantRecoveredFiles: []string{
"dir1",
"dir1/large-file2",
"dir1/small-file1",
"dir1/small-file1-dup",
"dir1/small-file2",
"dir2",
"dir2/large-file1",
"dir2/large-file1-dup",
"dir2/large-file2",
"dir2/small-file1",
"dir2/small-file1-dup",
"dir2/small-file2",
"large-file2",
"small-file1",
"small-file1-dup",
"small-file2",
},
},
{
name: "FixRemoveFiles_ByFileName",
modifyRepoAfterSnapshot: func(env *testenv.CLITest, man *snapshot.Manifest, fileMap map[string]*snapshot.DirEntry) {},
flags: []string{"remove-files", "--filename=small-file2", "--filename=large-file1-dup"},
wantRecoveredFiles: []string{
"dir1",
"dir1/large-file1",
"dir1/large-file2",
"dir1/small-file1",
"dir1/small-file1-dup",
"dir2",
"dir2/large-file1",
"dir2/large-file2",
"dir2/small-file1",
"dir2/small-file1-dup",
"large-file1",
"large-file2",
"small-file1",
"small-file1-dup",
},
},
{
name: "FixRemoveFiles_ByWildcard",
modifyRepoAfterSnapshot: func(env *testenv.CLITest, man *snapshot.Manifest, fileMap map[string]*snapshot.DirEntry) {},
flags: []string{"remove-files", "--filename=small-*", "--filename=*-dup"},
wantRecoveredFiles: []string{
"dir1",
"dir1/large-file1",
"dir1/large-file2",
"dir2",
"dir2/large-file1",
"dir2/large-file2",
"large-file1",
"large-file2",
},
},
}
for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
runner := testenv.NewInProcRunner(t)
env := testenv.NewCLITest(t, testenv.RepoFormatNotImportant, runner)
env.RunAndExpectSuccess(t, "repo", "create", "filesystem", "--path", env.RepoDir)
var man1, man2 snapshot.Manifest
testutil.MustParseJSONLines(t, env.RunAndExpectSuccess(t, "snapshot", "create", srcDir1, "--json"), &man1)
testutil.MustParseJSONLines(t, env.RunAndExpectSuccess(t, "snapshot", "create", srcDir1, "--json"), &man2)
fileMap := mustGetFileMap(t, env, man1.RootObjectID())
tc.modifyRepoAfterSnapshot(env, &man1, fileMap)
if tc.initiallyCorrupted {
env.RunAndExpectFailure(t, "snapshot", "verify")
} else {
env.RunAndExpectFailure(t, "snapshot", "success")
}
if tc.wantFixFail {
env.RunAndExpectFailure(t, append([]string{"snapshot", "fix"}, tc.flags...)...)
env.RunAndExpectFailure(t, append(append([]string{"snapshot", "fix"}, tc.flags...), "--commit")...)
env.RunAndExpectFailure(t, "snapshot", "verify")
return
}
// this does not commit fixes
env.RunAndExpectSuccess(t, append([]string{"snapshot", "fix"}, tc.flags...)...)
if tc.initiallyCorrupted {
// snapshot verify still fails
env.RunAndExpectFailure(t, "snapshot", "verify")
} else {
env.RunAndExpectFailure(t, "snapshot", "success")
}
env.RunAndExpectSuccess(t, append(append([]string{"snapshot", "fix"}, tc.flags...), "--commit")...)
if tc.wantFailVerify {
env.RunAndExpectFailure(t, "snapshot", "verify")
return
}
env.RunAndExpectSuccess(t, "snapshot", "verify")
var manifests []snapshot.Manifest
testutil.MustParseJSONLines(t, env.RunAndExpectSuccess(t, "snapshot", "list", "--json"), &manifests)
require.Len(t, manifests, 2)
// make sure all root entries have been fixed the same way
require.Equal(t, manifests[0].RootEntry, manifests[1].RootEntry)
switch {
case tc.wantRecoveredFiles != nil:
var remainingFiles []string
for f := range mustGetFileMap(t, env, manifests[0].RootObjectID()) {
remainingFiles = append(remainingFiles, f)
}
sort.Strings(remainingFiles)
require.Equal(t, tc.wantRecoveredFiles, remainingFiles)
case tc.wantRootStub:
var stub snapshotfs.UnreadableDirEntryReplacement
testutil.MustParseJSONLines(t, env.RunAndExpectSuccess(t, "show", manifests[0].RootObjectID().String()), &stub)
}
})
}
}
// forgetContents rewrites contents into a new blob and deletes the blob
// making index entries dangling.
func forgetContents(t *testing.T, env *testenv.CLITest, contentIDs ...string) {
t.Helper()
before := mustGetContentMap(t, env)
env.RunAndExpectSuccess(t, append([]string{"content", "rewrite", "--safety=none"}, contentIDs...)...)
after := mustGetContentMap(t, env)
var blobIDs []string
for _, cidStr := range contentIDs {
cid, err := content.ParseID(cidStr)
require.NoError(t, err)
require.NotEqual(t, before[cid].PackBlobID, after[cid].PackBlobID)
blobIDs = append(blobIDs, string(after[cid].PackBlobID))
}
env.RunAndExpectSuccess(t, append([]string{"blob", "rm"}, blobIDs...)...)
}
func mustGetContentMap(t *testing.T, env *testenv.CLITest) map[content.ID]content.InfoStruct {
t.Helper()
var contents1 []content.InfoStruct
testutil.MustParseJSONLines(t, env.RunAndExpectSuccess(t, "content", "ls", "--json"), &contents1)
contentMap := map[content.ID]content.InfoStruct{}
for _, v := range contents1 {
contentMap[v.ContentID] = v
}
return contentMap
}
func mustGetFileMap(t *testing.T, env *testenv.CLITest, root object.ID) map[string]*snapshot.DirEntry {
t.Helper()
fileMap := map[string]*snapshot.DirEntry{}
mustListDirEntries(t, env, fileMap, root, "")
return fileMap
}
func mustListDirEntries(t *testing.T, env *testenv.CLITest, out map[string]*snapshot.DirEntry, root object.ID, prefix string) {
t.Helper()
var dir1 snapshot.DirManifest
testutil.MustParseJSONLines(t, env.RunAndExpectSuccess(t, "show", root.String()), &dir1)
for _, v := range dir1.Entries {
out[prefix+v.Name] = v
if v.Type == snapshot.EntryTypeDirectory {
mustListDirEntries(t, env, out, v.ObjectID, prefix+v.Name+"/")
}
}
}
func mustWriteFileWithRepeatedData(t *testing.T, fname string, repeat int, data []byte) {
t.Helper()
f, err := os.Create(fname)
require.NoError(t, err)
defer f.Close()
for i := 0; i < repeat; i++ {
_, err := f.Write(data)
require.NoError(t, err)
}
}