Files
kopia/snapshot/upload.go
Jarek Kowalski 6e83ad0b9b moved policies to separate package
refactored policy management into policy.Manager that uses manifest.Manager internally
2017-11-27 18:05:32 -08:00

431 lines
11 KiB
Go

package snapshot
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"hash/fnv"
"io"
"log"
"math/rand"
"sync/atomic"
"time"
"github.com/kopia/kopia/fs"
"github.com/kopia/kopia/internal/dir"
"github.com/kopia/kopia/internal/hashcache"
"github.com/kopia/kopia/object"
"github.com/kopia/kopia/policy"
"github.com/kopia/kopia/repo"
)
func hashEntryMetadata(w io.Writer, e *fs.EntryMetadata) {
binary.Write(w, binary.LittleEndian, e.Name)
binary.Write(w, binary.LittleEndian, e.ModTime.UnixNano())
binary.Write(w, binary.LittleEndian, e.FileMode())
binary.Write(w, binary.LittleEndian, e.FileSize)
binary.Write(w, binary.LittleEndian, e.UserID)
binary.Write(w, binary.LittleEndian, e.GroupID)
}
func metadataHash(e *fs.EntryMetadata) uint64 {
h := fnv.New64a()
hashEntryMetadata(h, e)
return h.Sum64()
}
var errCancelled = errors.New("cancelled")
// Uploader supports efficient uploading files and directories to repository.
type Uploader struct {
Progress UploadProgress
// specifies criteria for including and excluding files.
FilesPolicy policy.FilesPolicy
// automatically cancel the Upload after certain number of bytes
MaxUploadBytes int64
// ignore file read errors
IgnoreFileErrors bool
// probability with hich hashcache entries will be ignored, must be [0..100]
// 0=always use hash cache if possible
// 100=never use hash cache
ForceHashPercentage int
// Do not hash-cache files younger than this age.
// Protects from accidentally caching incorrect hashes of files that are being modified.
HashCacheMinAge time.Duration
uploadBuf []byte
repo *repo.Repository
cacheWriter hashcache.Writer
cacheReader hashcache.Reader
hashCacheCutoff time.Time
stats Stats
cancelled int32
}
// IsCancelled returns true if the upload is cancelled.
func (u *Uploader) IsCancelled() bool {
return u.cancelReason() != ""
}
func (u *Uploader) cancelReason() string {
if c := atomic.LoadInt32(&u.cancelled) != 0; c {
return "cancelled"
}
if mub := u.MaxUploadBytes; mub > 0 && u.repo.Blocks.Stats().WrittenBytes > mub {
return "limit reached"
}
return ""
}
func (u *Uploader) uploadFileInternal(f fs.File, relativePath string) (*dir.Entry, uint64, error) {
file, err := f.Open()
if err != nil {
return nil, 0, fmt.Errorf("unable to open file: %v", err)
}
defer file.Close()
writer := u.repo.Objects.NewWriter(object.WriterOptions{
Description: "FILE:" + f.Metadata().Name,
})
defer writer.Close()
u.Progress.Started(relativePath, f.Metadata().FileSize)
written, err := u.copyWithProgress(relativePath, writer, file, 0, f.Metadata().FileSize)
if err != nil {
u.Progress.Finished(relativePath, f.Metadata().FileSize, err)
return nil, 0, err
}
e2, err := file.EntryMetadata()
if err != nil {
u.Progress.Finished(relativePath, f.Metadata().FileSize, err)
return nil, 0, err
}
r, err := writer.Result()
if err != nil {
u.Progress.Finished(relativePath, f.Metadata().FileSize, err)
return nil, 0, err
}
de := newDirEntry(e2, r)
de.FileSize = written
u.Progress.Finished(relativePath, f.Metadata().FileSize, nil)
return de, metadataHash(&de.EntryMetadata), nil
}
func (u *Uploader) uploadSymlinkInternal(f fs.Symlink, relativePath string) (*dir.Entry, uint64, error) {
u.Progress.Started(relativePath, 1)
target, err := f.Readlink()
if err != nil {
return nil, 0, fmt.Errorf("unable to read symlink: %v", err)
}
writer := u.repo.Objects.NewWriter(object.WriterOptions{
Description: "SYMLINK:" + f.Metadata().Name,
})
defer writer.Close()
written, err := u.copyWithProgress(relativePath, writer, bytes.NewBufferString(target), 0, f.Metadata().FileSize)
if err != nil {
u.Progress.Finished(relativePath, f.Metadata().FileSize, err)
return nil, 0, err
}
r, err := writer.Result()
if err != nil {
u.Progress.Finished(relativePath, f.Metadata().FileSize, err)
return nil, 0, err
}
de := newDirEntry(f.Metadata(), r)
de.FileSize = written
u.Progress.Finished(relativePath, 1, nil)
return de, metadataHash(&de.EntryMetadata), nil
}
func (u *Uploader) copyWithProgress(path string, dst io.Writer, src io.Reader, completed int64, length int64) (int64, error) {
if u.uploadBuf == nil {
u.uploadBuf = make([]byte, 128*1024) // 128 KB buffer
}
var written int64
for {
if u.IsCancelled() {
return 0, errCancelled
}
readBytes, readErr := src.Read(u.uploadBuf)
if readBytes > 0 {
wroteBytes, writeErr := dst.Write(u.uploadBuf[0:readBytes])
if wroteBytes > 0 {
written += int64(wroteBytes)
completed += int64(wroteBytes)
if length < completed {
length = completed
}
u.Progress.Progress(path, completed, length)
}
if writeErr != nil {
return written, writeErr
}
if readBytes != wroteBytes {
return written, io.ErrShortWrite
}
}
if readErr != nil {
if readErr == io.EOF {
break
}
return written, readErr
}
}
return written, nil
}
func newDirEntry(md *fs.EntryMetadata, oid object.ID) *dir.Entry {
return &dir.Entry{
EntryMetadata: *md,
ObjectID: oid,
}
}
// uploadFile uploads the specified File to the repository.
func (u *Uploader) uploadFile(file fs.File) (object.ID, error) {
e, _, err := u.uploadFileInternal(file, file.Metadata().Name)
if err != nil {
return object.NullID, err
}
return e.ObjectID, nil
}
// uploadDir uploads the specified Directory to the repository.
// An optional ID of a hash-cache object may be provided, in which case the Uploader will use its
// contents to avoid hashing
func (u *Uploader) uploadDir(dir fs.Directory) (object.ID, object.ID, error) {
var err error
mw := u.repo.Objects.NewWriter(object.WriterOptions{
Description: "HASHCACHE:" + dir.Metadata().Name,
PackGroup: "HC",
})
defer mw.Close()
u.cacheWriter = hashcache.NewWriter(mw)
oid, err := uploadDirInternal(u, dir, ".")
if u.IsCancelled() {
if err := u.cacheReader.CopyTo(u.cacheWriter); err != nil {
return object.NullID, object.NullID, err
}
}
u.cacheWriter.Finalize()
u.cacheWriter = nil
if err != nil {
return object.NullID, object.NullID, err
}
hcid, err := mw.Result()
if err := u.repo.Objects.Flush(); err != nil {
return object.NullID, object.NullID, fmt.Errorf("can't flush pending objects: %v", err)
}
return oid, hcid, err
}
func uploadDirInternal(
u *Uploader,
directory fs.Directory,
relativePath string,
) (object.ID, error) {
u.Progress.StartedDir(relativePath)
defer u.Progress.FinishedDir(relativePath)
u.stats.TotalDirectoryCount++
entries, err := directory.Readdir()
if err != nil {
return object.NullID, err
}
writer := u.repo.Objects.NewWriter(object.WriterOptions{
Description: "DIR:" + relativePath,
PackGroup: "DIR",
})
dw := dir.NewWriter(writer)
defer writer.Close()
for _, entry := range entries {
if u.IsCancelled() {
break
}
e := entry.Metadata()
entryRelativePath := relativePath + "/" + e.Name
if !u.FilesPolicy.ShouldInclude(e) {
log.Printf("ignoring %q", entryRelativePath)
u.stats.ExcludedFileCount++
u.stats.ExcludedTotalFileSize += e.FileSize
continue
}
var de *dir.Entry
var hash uint64
// regular file
// See if we had this name during previous pass.
cachedEntry := u.maybeIgnoreHashCacheEntry(u.cacheReader.FindEntry(entryRelativePath))
// ... and whether file metadata is identical to the previous one.
computedHash := metadataHash(e)
cacheMatches := (cachedEntry != nil) && cachedEntry.Hash == computedHash
switch entry.(type) {
case fs.File:
u.stats.TotalFileCount++
u.stats.TotalFileSize += e.FileSize
}
if cacheMatches {
u.stats.CachedFiles++
u.Progress.Cached(entryRelativePath, entry.Metadata().FileSize)
// Avoid hashing by reusing previous object ID.
de, hash, err = newDirEntry(e, cachedEntry.ObjectID), cachedEntry.Hash, nil
} else {
switch entry := entry.(type) {
case fs.Directory:
var oid object.ID
oid, err = uploadDirInternal(u, entry, entryRelativePath)
de = newDirEntry(e, oid)
hash = 0
case fs.Symlink:
de, hash, err = u.uploadSymlinkInternal(entry, entryRelativePath)
case fs.File:
u.stats.NonCachedFiles++
de, hash, err = u.uploadFileInternal(entry, entryRelativePath)
default:
return object.NullID, fmt.Errorf("file type %v not supported", entry.Metadata().Type)
}
}
if err == errCancelled {
break
}
if err != nil {
if u.IgnoreFileErrors {
u.stats.ReadErrors++
log.Printf("warning: unable to hash file %q: %s, ignoring", entryRelativePath, err)
continue
}
return object.NullID, fmt.Errorf("unable to hash file: %s", err)
}
if err := dw.WriteEntry(de); err != nil {
return object.NullID, err
}
if de.Type != fs.EntryTypeDirectory && hash != 0 && entry.Metadata().ModTime.Before(u.hashCacheCutoff) {
if err := u.cacheWriter.WriteEntry(hashcache.Entry{
Name: entryRelativePath,
Hash: hash,
ObjectID: de.ObjectID,
}); err != nil {
return object.NullID, err
}
}
}
dw.Finalize()
return writer.Result()
}
func (u *Uploader) maybeIgnoreHashCacheEntry(e *hashcache.Entry) *hashcache.Entry {
if rand.Intn(100) < u.ForceHashPercentage {
return nil
}
return e
}
// NewUploader creates new Uploader object for a given repository.
func NewUploader(r *repo.Repository) *Uploader {
return &Uploader{
repo: r,
Progress: &nullUploadProgress{},
HashCacheMinAge: 1 * time.Hour,
IgnoreFileErrors: true,
}
}
// Cancel requests cancellation of an upload that's in progress. Will typically result in an incomplete snapshot.
func (u *Uploader) Cancel() {
atomic.StoreInt32(&u.cancelled, 1)
}
// Upload uploads contents of the specified filesystem entry (file or directory) to the repository and returns snapshot.Manifest with statistics.
// Old snapshot manifest, when provided can be used to speed up uploads by utilizing hash cache.
func (u *Uploader) Upload(
source fs.Entry,
sourceInfo *SourceInfo,
old *Manifest,
) (*Manifest, error) {
s := &Manifest{
Source: *sourceInfo,
}
u.cacheReader = hashcache.Open(nil)
u.stats = Stats{}
if old != nil {
if r, err := u.repo.Objects.Open(old.HashCacheID); err == nil {
u.cacheReader = hashcache.Open(r)
}
}
var err error
s.StartTime = time.Now()
u.hashCacheCutoff = time.Now().Add(-u.HashCacheMinAge)
s.HashCacheCutoffTime = u.hashCacheCutoff
switch entry := source.(type) {
case fs.Directory:
s.RootObjectID, s.HashCacheID, err = u.uploadDir(entry)
case fs.File:
s.RootObjectID, err = u.uploadFile(entry)
default:
return nil, fmt.Errorf("unsupported source: %v", s.Source)
}
if err != nil {
return nil, err
}
s.IncompleteReason = u.cancelReason()
s.EndTime = time.Now()
s.Stats = u.stats
s.Stats.Block = u.repo.Blocks.Stats()
return s, nil
}