feat(snapshots): support restoring sparse files (#1823)

* feat(snapshots): support restoring sparse files

This commit implements basic support for restoring sparse files from
a snapshot. When specifying "--mode=sparse" in a snapshot restore
command, Kopia will make a best effort to make sure the underlying
filesystem allocates the minimum amount of blocks needed to persist
restored files. In other words, enabling this feature will "force"
all restored files to be sparse-blocks of zero bytes in the source
file should not be allocated.

* Address review comments

- Separate sparse option into its own bool flag
- Implement sparsefile packagewith copySparse method
- Truncate once before writing sparse file
- Check error from Truncate
- Add unit test for copySparse
- Invoke GetBlockSize once per file copy
- Remove support for Windows and explain why
- Add unit test for stat package

Co-authored-by: Dave Smith-Uchida <dave@kasten.io>
This commit is contained in:
Ali Dowair
2022-03-23 05:09:50 +03:00
committed by GitHub
parent dc0d41b7e0
commit aafe56cd6f
11 changed files with 650 additions and 10 deletions

View File

@@ -101,6 +101,7 @@ type commandRestore struct {
restoreOverwriteDirectories bool
restoreOverwriteFiles bool
restoreOverwriteSymlinks bool
restoreSparse bool
restoreConsistentAttributes bool
restoreMode string
restoreParallel int
@@ -125,6 +126,7 @@ func (c *commandRestore) setup(svc appServices, parent commandParent) {
cmd.Flag("overwrite-directories", "Overwrite existing directories").Default("true").BoolVar(&c.restoreOverwriteDirectories)
cmd.Flag("overwrite-files", "Specifies whether or not to overwrite already existing files").Default("true").BoolVar(&c.restoreOverwriteFiles)
cmd.Flag("overwrite-symlinks", "Specifies whether or not to overwrite already existing symlinks").Default("true").BoolVar(&c.restoreOverwriteSymlinks)
cmd.Flag("sparse", "When doing a restore, attempt to write files sparsely-allocating the minimum amount of disk space needed.").Default("false").BoolVar(&c.restoreSparse)
cmd.Flag("consistent-attributes", "When multiple snapshots match, fail if they have inconsistent attributes").Envar("KOPIA_RESTORE_CONSISTENT_ATTRIBUTES").BoolVar(&c.restoreConsistentAttributes)
cmd.Flag("mode", "Override restore mode").Default(restoreModeAuto).EnumVar(&c.restoreMode, restoreModeAuto, restoreModeLocal, restoreModeZip, restoreModeZipNoCompress, restoreModeTar, restoreModeTgz)
cmd.Flag("parallel", "Restore parallelism (1=disable)").Default("8").IntVar(&c.restoreParallel)
@@ -220,6 +222,7 @@ func (c *commandRestore) restoreOutput(ctx context.Context) (restore.Output, err
SkipOwners: c.restoreSkipOwners,
SkipPermissions: c.restoreSkipPermissions,
SkipTimes: c.restoreSkipTimes,
Sparse: c.restoreSparse,
}, nil
case restoreModeZip, restoreModeZipNoCompress:

View File

@@ -6,7 +6,8 @@
"sync"
)
const bufSize = 65536
// BufSize is the size (in bytes) of the shared copy buffers Kopia uses to copy data.
const BufSize = 65536
var (
mu sync.Mutex //nolint:gochecknoglobals
@@ -21,7 +22,7 @@ func GetBuffer() []byte {
defer mu.Unlock()
if len(buffers) == 0 {
return make([]byte, bufSize)
return make([]byte, BufSize)
}
var b []byte

View File

@@ -0,0 +1,107 @@
// Package sparsefile provides wrappers for handling the writing of sparse files (files with holes).
package sparsefile
import (
"io"
"os"
"github.com/pkg/errors"
"github.com/kopia/kopia/internal/iocopy"
"github.com/kopia/kopia/internal/stat"
)
// Write writes the contents of src to the given targetPath, omitting any holes.
func Write(targetPath string, src io.Reader, size int64) error {
dst, err := os.OpenFile(targetPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o600) //nolint:gosec,gomnd
if err != nil {
return err //nolint:wrapcheck
}
// ensure we always close f. Note that this does not conflict with the
// close below, as close is idempotent.
defer dst.Close() //nolint:errcheck,gosec
if err = dst.Truncate(size); err != nil {
return errors.Wrap(err, "error writing sparse file")
}
s, err := stat.GetBlockSize(targetPath)
if err != nil {
return errors.Wrap(err, "error writing sparse file")
}
buf := iocopy.GetBuffer()
defer iocopy.ReleaseBuffer(buf)
w, err := copySparse(dst, src, buf[0:s])
if err != nil {
return errors.Wrap(err, "error writing sparse file")
}
if w != size {
return errors.Errorf("")
}
if err := dst.Close(); err != nil {
return err //nolint:wrapcheck
}
return nil
}
func copySparse(dst io.WriteSeeker, src io.Reader, buf []byte) (written int64, err error) {
for {
nr, er := src.Read(buf)
if nr > 0 { // nolint:nestif
// If non-zero data is read, write it. Otherwise, skip forwards.
if isAllZero(buf) {
dst.Seek(int64(nr), os.SEEK_CUR) // nolint:errcheck
written += int64(nr)
continue
}
nw, ew := dst.Write(buf[0:nr])
if nw < 0 || nr < nw {
nw = 0
if ew == nil {
ew = errors.New("invalid write result")
}
}
written += int64(nw)
if ew != nil {
err = ew
break
}
if nr != nw {
err = io.ErrShortWrite
break
}
}
if er != nil {
if er != io.EOF {
err = er
}
break
}
}
return written, err
}
func isAllZero(buf []byte) bool {
for _, b := range buf {
if b != 0 {
return false
}
}
return true
}

View File

@@ -0,0 +1,98 @@
package sparsefile
import (
"bytes"
"os"
"path/filepath"
"runtime"
"testing"
"github.com/kopia/kopia/internal/stat"
)
func TestSparseWrite(t *testing.T) {
t.Parallel()
if runtime.GOOS == "windows" {
t.Skip("sparse files are not supported on windows")
}
dir := t.TempDir()
blk, err := stat.GetBlockSize(dir)
if err != nil {
t.Fatal(err)
}
type chunk struct {
slice []byte
off uint64
rep uint64
}
cases := []struct {
name string
size uint64
data []chunk
}{
{
name: "null",
size: 0,
},
{
name: "empty",
size: blk,
data: []chunk{
{slice: []byte{0}, off: 0, rep: blk},
},
},
{
name: "hole",
size: 2 * blk,
data: []chunk{
{slice: []byte{1}, off: blk, rep: blk},
},
},
{
name: "mix",
size: 2 * blk,
data: []chunk{
{slice: []byte{1}, off: 3, rep: blk - 10},
{slice: []byte{1}, off: 2*blk - 10, rep: 10},
},
},
}
for _, c := range cases {
src := filepath.Join(dir, "src"+c.name)
dst := filepath.Join(dir, "dst"+c.name)
fd, err := os.Create(src)
if err != nil {
t.Fatal(err)
}
for _, d := range c.data {
fd.WriteAt(bytes.Repeat(d.slice, int(d.rep)), int64(d.off))
}
err = Write(dst, fd, int64(c.size))
if err != nil {
t.Fatalf("error writing %s: %v", dst, err)
}
s, err := os.ReadFile(src)
if err != nil {
t.Fatal(err)
}
d, err := os.ReadFile(dst)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(s, d) {
t.Fatalf("contents of %s and %s are not identical", src, dst)
}
}
}

37
internal/stat/stat_bsd.go Normal file
View File

@@ -0,0 +1,37 @@
//go:build openbsd
// +build openbsd
// Package stat provides a cross-platform abstraction for
// common stat commands.
package stat
import "syscall"
const (
diskBlockSize uint64 = 512
)
// GetFileAllocSize gets the space allocated on disk for the file.
// 'fname' in bytes.
func GetFileAllocSize(fname string) (uint64, error) {
var st syscall.Stat_t
err := syscall.Stat(fname, &st)
if err != nil {
return 0, err // nolint:wrapcheck
}
return uint64(st.Blocks) * diskBlockSize, nil
}
// GetBlockSize gets the disk block size of the underlying system.
func GetBlockSize(path string) (uint64, error) {
var st syscall.Statfs_t
err := syscall.Statfs(path, &st)
if err != nil {
return 0, err // nolint:wrapcheck
}
return uint64(st.F_bsize), nil
}

View File

@@ -0,0 +1,44 @@
//go:build !windows
// +build !windows
package stat
import (
"bytes"
"os"
"path/filepath"
"testing"
)
func TestGetBlockSize(t *testing.T) {
s, err := GetBlockSize(os.DevNull)
if err != nil {
t.Fatal(err)
}
if s <= 0 {
t.Fatalf("invalid disk block size: %d, must be greater than 0", s)
}
}
func TestGetFileAllocSize(t *testing.T) {
const size = 4096
d := t.TempDir()
f := filepath.Join(d, "test")
data := bytes.Repeat([]byte{1}, size)
err := os.WriteFile(f, data, os.ModePerm)
if err != nil {
t.Fatal(err)
}
s, err := GetFileAllocSize(f)
if err != nil {
t.Fatalf("error getting file alloc size for %s: %v", f, err)
}
if s < size {
t.Fatalf("invalid allocated file size %d, expected at least %d", s, size)
}
}

View File

@@ -0,0 +1,37 @@
//go:build linux || freebsd || darwin
// +build linux freebsd darwin
// Package stat provides a cross-platform abstraction for
// common stat commands.
package stat
import "syscall"
const (
diskBlockSize uint64 = 512
)
// GetFileAllocSize gets the space allocated on disk for the file
// 'fname' in bytes.
func GetFileAllocSize(fname string) (uint64, error) {
var st syscall.Stat_t
err := syscall.Stat(fname, &st)
if err != nil {
return 0, err // nolint:wrapcheck
}
return uint64(st.Blocks) * diskBlockSize, nil
}
// GetBlockSize gets the disk block size of the underlying system.
func GetBlockSize(path string) (uint64, error) {
var st syscall.Statfs_t
err := syscall.Statfs(path, &st)
if err != nil {
return 0, err // nolint:wrapcheck
}
return uint64(st.Bsize), nil // nolint:unconvert,nolintlint
}

View File

@@ -0,0 +1,21 @@
//go:build windows
// +build windows
// Package stat provides a cross-platform abstraction for
// common stat commands.
package stat
import "errors"
var errNotImplemented = errors.New("not implemented")
// GetFileAllocSize gets the space allocated on disk for the file
// 'fname' in bytes.
func GetFileAllocSize(fname string) (uint64, error) {
return 0, errNotImplemented
}
// GetBlockSize gets the disk block size of the underlying system.
func GetBlockSize(path string) (uint64, error) {
return 0, errNotImplemented
}

View File

@@ -48,6 +48,15 @@ func TestSkipUnlessCI(tb testing.TB, msg string, args ...interface{}) {
}
}
// TestSkipUnlessLinux skips the current test if the test environment is not Linux.
func TestSkipUnlessLinux(tb testing.TB) {
tb.Helper()
if runtime.GOOS != "linux" {
tb.Skip("test not supported in this environment.")
}
}
// TestSkipOnCIUnlessLinuxAMD64 skips the current test if running on CI unless the environment is Linux/AMD64.
func TestSkipOnCIUnlessLinuxAMD64(tb testing.TB) {
tb.Helper()

View File

@@ -15,6 +15,7 @@
"github.com/kopia/kopia/fs/localfs"
"github.com/kopia/kopia/internal/atomicfile"
"github.com/kopia/kopia/internal/iocopy"
"github.com/kopia/kopia/internal/sparsefile"
"github.com/kopia/kopia/snapshot"
)
@@ -56,6 +57,9 @@ type FilesystemOutput struct {
// SkipTimes when set to true causes restore to skip restoring modification times.
SkipTimes bool `json:"skipTimes"`
// Sparse when set to true causes restore files sparsely-not writing any holes (zero regions) to disk.
Sparse bool `json:"sparse"`
}
// Parallelizable implements restore.Output interface.
@@ -146,11 +150,11 @@ func (o *FilesystemOutput) CreateSymlink(ctx context.Context, relativePath strin
path := filepath.Join(o.TargetPath, filepath.FromSlash(relativePath))
switch stat, err := os.Lstat(path); {
switch st, err := os.Lstat(path); {
case os.IsNotExist(err): // Proceed to symlink creation
case err != nil:
return errors.Wrap(err, "lstat error at symlink path")
case fileIsSymlink(stat):
case fileIsSymlink(st):
// Throw error if we are not overwriting symlinks
if !o.OverwriteSymlinks {
return errors.Errorf("will not overwrite existing symlink")
@@ -175,8 +179,8 @@ func (o *FilesystemOutput) CreateSymlink(ctx context.Context, relativePath strin
return nil
}
func fileIsSymlink(stat os.FileInfo) bool {
return stat.Mode()&os.ModeSymlink != 0
func fileIsSymlink(st os.FileInfo) bool {
return st.Mode()&os.ModeSymlink != 0
}
// SymlinkExists implements restore.Output interface.
@@ -282,13 +286,13 @@ func isWindows() bool {
}
func (o *FilesystemOutput) createDirectory(ctx context.Context, path string) error {
switch stat, err := os.Stat(path); {
switch st, err := os.Stat(path); {
case os.IsNotExist(err):
// nolint:wrapcheck
return os.MkdirAll(path, outputDirMode)
case err != nil:
return errors.Wrap(err, "failed to stat path "+path)
case stat.Mode().IsDir():
case st.Mode().IsDir():
if !o.OverwriteDirectories {
if empty, _ := isEmptyDirectory(path); !empty {
return errors.Errorf("non-empty directory already exists, not overwriting it: %q", path)
@@ -315,7 +319,8 @@ func write(targetPath string, r fs.Reader) error {
name := f.Name()
if err := iocopy.JustCopy(f, r); err != nil {
err = iocopy.JustCopy(f, r)
if err != nil {
return errors.Wrap(err, "cannot write data to file %q "+name)
}
@@ -346,13 +351,23 @@ func (o *FilesystemOutput) copyFileContent(ctx context.Context, targetPath strin
defer r.Close() //nolint:errcheck
log(ctx).Debugf("copying file contents to: %v", targetPath)
targetPath = atomicfile.MaybePrefixLongFilenameOnWindows(targetPath)
if o.WriteFilesAtomically {
// nolint:wrapcheck
return atomicfile.Write(targetPath, r)
}
return write(atomicfile.MaybePrefixLongFilenameOnWindows(targetPath), r)
if o.Sparse {
if isWindows() {
log(ctx).Infof("sparse files are not supported on Windows, restoring normally")
} else {
// nolint:wrapcheck
return sparsefile.Write(targetPath, r, f.Size())
}
}
return write(targetPath, r)
}
func isEmptyDirectory(name string) (bool, error) {

View File

@@ -3,6 +3,7 @@
import (
"archive/tar"
"archive/zip"
"bytes"
"compress/gzip"
"errors"
"fmt"
@@ -21,6 +22,8 @@
"github.com/kopia/kopia/fs/localfs"
"github.com/kopia/kopia/internal/diff"
"github.com/kopia/kopia/internal/fshasher"
"github.com/kopia/kopia/internal/iocopy"
"github.com/kopia/kopia/internal/stat"
"github.com/kopia/kopia/internal/testlogging"
"github.com/kopia/kopia/internal/testutil"
"github.com/kopia/kopia/tests/clitestutil"
@@ -488,6 +491,271 @@ func TestRestoreSnapshotOfSingleFile(t *testing.T) {
verifyFileMode(t, filepath.Join(restoreDir, "restored-5"), defaultRestoredFilePermission)
}
func TestSnapshotSparseRestore(t *testing.T) {
t.Parallel()
// The behavior of the Darwin (APFS) is not published, and sparse restores
// are not supported on Windows. As such, we cannot (reliably) test them here.
testutil.TestSkipUnlessLinux(t)
runner := testenv.NewInProcRunner(t)
e := testenv.NewCLITest(t, testenv.RepoFormatNotImportant, runner)
e.RunAndExpectSuccess(t, "repo", "create", "filesystem", "--path", e.RepoDir)
sourceDir := testutil.TempDirectory(t)
restoreDir := testutil.TempDirectory(t)
bufSize := uint64(iocopy.BufSize)
blkSize, err := stat.GetBlockSize(sourceDir)
if err != nil {
t.Fatalf("error getting disk block size: %v", err)
}
type chunk struct {
slice []byte
off uint64
rep uint64
}
cases := []struct {
name string
data []chunk
trunc uint64 // Truncate source file to this size
sLog uint64 // Expected logical size of source file
sPhys uint64 // Expected physical size of source file
rLog uint64 // Expected logical size of restored file
rPhys uint64 // Expected physical size of restored file
}{
{
name: "null_file",
trunc: 0,
sLog: 0,
sPhys: 0,
rLog: 0,
rPhys: 0,
},
{
name: "empty_file",
trunc: 3 * bufSize,
sLog: 3 * bufSize,
sPhys: 0,
rLog: 3 * bufSize,
rPhys: 0,
},
{
name: "blk",
data: []chunk{
{slice: []byte("1"), off: 0, rep: blkSize},
},
sLog: blkSize,
sPhys: blkSize,
rLog: blkSize,
rPhys: blkSize,
},
{
name: "blk_real_zeros",
data: []chunk{
{slice: []byte{0}, off: 0, rep: blkSize},
},
sLog: blkSize,
sPhys: blkSize,
rLog: blkSize,
rPhys: 0,
},
{
name: "buf_real_zeros",
data: []chunk{
{slice: []byte{0}, off: 0, rep: bufSize},
},
sLog: bufSize,
sPhys: bufSize,
rLog: bufSize,
rPhys: 0,
},
{
name: "buf_full",
data: []chunk{
{slice: []byte("1"), off: 0, rep: bufSize},
},
sLog: bufSize,
sPhys: bufSize,
rLog: bufSize,
rPhys: bufSize,
},
{
name: "buf_trailing_bytes",
data: []chunk{
{slice: []byte("1"), off: bufSize - blkSize - 1, rep: 1},
{slice: []byte("1"), off: bufSize - 1, rep: 1},
},
trunc: bufSize,
sLog: bufSize,
sPhys: 2 * blkSize,
rLog: bufSize,
rPhys: 2 * blkSize,
},
{
name: "buf_trailing_hole",
data: []chunk{
{slice: []byte("1"), off: 0, rep: 1},
},
trunc: bufSize,
sLog: bufSize,
sPhys: blkSize,
rLog: bufSize,
rPhys: blkSize,
},
{
name: "buf_hole_aligned",
data: []chunk{
{slice: []byte("1"), off: bufSize, rep: blkSize},
},
trunc: bufSize + blkSize,
sLog: bufSize + blkSize,
sPhys: blkSize,
rLog: bufSize + blkSize,
rPhys: blkSize,
},
{
name: "buf_hole_on_buf_boundary",
data: []chunk{
{slice: []byte("1"), off: bufSize / 2, rep: bufSize},
},
sLog: bufSize * 3 / 2,
sPhys: bufSize,
rLog: bufSize * 3 / 2,
rPhys: bufSize,
},
{
name: "blk_hole_on_blk_boundary",
data: []chunk{
{slice: []byte("1"), off: blkSize / 2, rep: blkSize},
},
sLog: blkSize * 3 / 2,
sPhys: blkSize * 2,
rLog: blkSize * 3 / 2,
rPhys: blkSize * 2,
},
{
name: "blk_hole_on_buf_boundary",
data: []chunk{
{slice: []byte("1"), off: 0, rep: bufSize - (blkSize / 2)},
{slice: []byte("1"), off: bufSize + (blkSize / 2), rep: blkSize / 2},
},
sLog: bufSize + blkSize,
sPhys: bufSize + blkSize,
rLog: bufSize + blkSize,
rPhys: bufSize + blkSize,
},
{
name: "blk_hole_aligned",
data: []chunk{
{slice: []byte("1"), off: 0, rep: bufSize},
{slice: []byte("1"), off: bufSize + blkSize, rep: bufSize - blkSize},
},
trunc: 2 * bufSize,
sLog: 2 * bufSize,
sPhys: 2*bufSize - blkSize,
rLog: 2 * bufSize,
rPhys: 2*bufSize - blkSize,
},
{
name: "blk_alternating_empty",
data: []chunk{
{slice: []byte("1"), off: 0, rep: blkSize},
{slice: []byte("1"), off: 1 * blkSize, rep: blkSize},
{slice: []byte("1"), off: 4 * blkSize, rep: blkSize},
{slice: []byte("1"), off: 6 * blkSize, rep: blkSize},
{slice: []byte("1"), off: 8 * blkSize, rep: blkSize},
},
sLog: 9 * blkSize,
sPhys: 5 * blkSize,
rLog: 9 * blkSize,
rPhys: 5 * blkSize,
},
{
name: "blk_alternating_zero",
data: []chunk{
{slice: []byte("1"), off: 0, rep: blkSize},
{slice: []byte{0}, off: blkSize, rep: blkSize},
{slice: []byte("1"), off: 2 * blkSize, rep: blkSize},
{slice: []byte{0}, off: 3 * blkSize, rep: blkSize},
},
sLog: 4 * blkSize,
sPhys: 4 * blkSize,
rLog: 4 * blkSize,
rPhys: 2 * blkSize,
},
}
for _, c := range cases {
sourceFile := filepath.Join(sourceDir, c.name+"_source")
fd, err := os.Create(sourceFile)
if err != nil {
t.Fatal(err)
}
err = fd.Truncate(int64(c.trunc))
if err != nil {
t.Fatal(err)
}
for _, d := range c.data {
fd.WriteAt(bytes.Repeat(d.slice, int(d.rep)), int64(d.off))
}
verifyFileSize(t, sourceFile, c.sLog, c.sPhys)
e.RunAndExpectSuccess(t, "snapshot", "create", sourceFile)
si := clitestutil.ListSnapshotsAndExpectSuccess(t, e, sourceFile)
if got, want := len(si), 1; got != want {
t.Fatalf("got %v sources, wanted %v", got, want)
}
if got, want := len(si[0].Snapshots), 1; got != want {
t.Fatalf("got %v snapshots, wanted %v", got, want)
}
snapID := si[0].Snapshots[0].SnapshotID
restoreFile := filepath.Join(restoreDir, c.name+"_restore")
e.RunAndExpectSuccess(t, "snapshot", "restore", snapID, "--sparse", restoreFile)
verifyFileSize(t, restoreFile, c.rLog, c.rPhys)
}
}
func verifyFileSize(t *testing.T, fname string, logical, physical uint64) {
t.Helper()
st, err := os.Stat(fname)
if err != nil {
t.Fatalf("error verifying file size: %v", err)
}
realLogical := uint64(st.Size())
if realLogical != logical {
t.Errorf("%s logical file size incorrect: expected %d, got %d", fname, logical, realLogical)
}
if runtime.GOOS == windowsOSName {
t.Logf("getting physical file size is not supported on windows")
return
}
realPhysical, err := stat.GetFileAllocSize(fname)
if err != nil {
t.Fatalf("error verifying file size: %v", err)
}
if realPhysical != physical {
t.Errorf("%s physical file size incorrect: expected %d, got %d", fname, physical, realPhysical)
}
}
func verifyFileMode(t *testing.T, filename string, want os.FileMode) {
t.Helper()