Merge github.com:kopia/repo into import-repo

This commit is contained in:
Jarek Kowalski
2019-05-27 15:41:55 -07:00
92 changed files with 11427 additions and 2 deletions

220
block/block_cache.go Normal file
View File

@@ -0,0 +1,220 @@
package block
import (
"container/heap"
"context"
"fmt"
"os"
"path/filepath"
"sync"
"time"
"github.com/kopia/repo/storage"
"github.com/kopia/repo/storage/filesystem"
)
const (
defaultSweepFrequency = 1 * time.Minute
defaultTouchThreshold = 10 * time.Minute
)
type blockCache struct {
st storage.Storage
cacheStorage storage.Storage
maxSizeBytes int64
hmacSecret []byte
sweepFrequency time.Duration
touchThreshold time.Duration
mu sync.Mutex
lastTotalSizeBytes int64
closed chan struct{}
}
type blockToucher interface {
TouchBlock(ctx context.Context, blockID string, threshold time.Duration) error
}
func adjustCacheKey(cacheKey string) string {
// block IDs with odd length have a single-byte prefix.
// move the prefix to the end of cache key to make sure the top level shard is spread 256 ways.
if len(cacheKey)%2 == 1 {
return cacheKey[1:] + cacheKey[0:1]
}
return cacheKey
}
func (c *blockCache) getContentBlock(ctx context.Context, cacheKey string, physicalBlockID string, offset, length int64) ([]byte, error) {
cacheKey = adjustCacheKey(cacheKey)
useCache := shouldUseBlockCache(ctx) && c.cacheStorage != nil
if useCache {
if b := c.readAndVerifyCacheBlock(ctx, cacheKey); b != nil {
return b, nil
}
}
b, err := c.st.GetBlock(ctx, physicalBlockID, offset, length)
if err == storage.ErrBlockNotFound {
// not found in underlying storage
return nil, err
}
if err == nil && useCache {
if puterr := c.cacheStorage.PutBlock(ctx, cacheKey, appendHMAC(b, c.hmacSecret)); puterr != nil {
log.Warningf("unable to write cache item %v: %v", cacheKey, puterr)
}
}
return b, err
}
func (c *blockCache) readAndVerifyCacheBlock(ctx context.Context, cacheKey string) []byte {
b, err := c.cacheStorage.GetBlock(ctx, cacheKey, 0, -1)
if err == nil {
b, err = verifyAndStripHMAC(b, c.hmacSecret)
if err == nil {
if t, ok := c.cacheStorage.(blockToucher); ok {
t.TouchBlock(ctx, cacheKey, c.touchThreshold) //nolint:errcheck
}
// retrieved from cache and HMAC valid
return b
}
// ignore malformed blocks
log.Warningf("malformed block %v: %v", cacheKey, err)
return nil
}
if err != storage.ErrBlockNotFound {
log.Warningf("unable to read cache %v: %v", cacheKey, err)
}
return nil
}
func (c *blockCache) close() {
close(c.closed)
}
func (c *blockCache) sweepDirectoryPeriodically(ctx context.Context) {
for {
select {
case <-c.closed:
return
case <-time.After(c.sweepFrequency):
err := c.sweepDirectory(ctx)
if err != nil {
log.Warningf("blockCache sweep failed: %v", err)
}
}
}
}
// A blockMetadataHeap implements heap.Interface and holds storage.BlockMetadata.
type blockMetadataHeap []storage.BlockMetadata
func (h blockMetadataHeap) Len() int { return len(h) }
func (h blockMetadataHeap) Less(i, j int) bool {
return h[i].Timestamp.Before(h[j].Timestamp)
}
func (h blockMetadataHeap) Swap(i, j int) {
h[i], h[j] = h[j], h[i]
}
func (h *blockMetadataHeap) Push(x interface{}) {
*h = append(*h, x.(storage.BlockMetadata))
}
func (h *blockMetadataHeap) Pop() interface{} {
old := *h
n := len(old)
item := old[n-1]
*h = old[0 : n-1]
return item
}
func (c *blockCache) sweepDirectory(ctx context.Context) (err error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.cacheStorage == nil {
return nil
}
t0 := time.Now()
var h blockMetadataHeap
var totalRetainedSize int64
err = c.cacheStorage.ListBlocks(ctx, "", func(it storage.BlockMetadata) error {
heap.Push(&h, it)
totalRetainedSize += it.Length
if totalRetainedSize > c.maxSizeBytes {
oldest := heap.Pop(&h).(storage.BlockMetadata)
if delerr := c.cacheStorage.DeleteBlock(ctx, oldest.BlockID); delerr != nil {
log.Warningf("unable to remove %v: %v", oldest.BlockID, delerr)
} else {
totalRetainedSize -= oldest.Length
}
}
return nil
})
if err != nil {
return fmt.Errorf("error listing cache: %v", err)
}
log.Debugf("finished sweeping directory in %v and retained %v/%v bytes (%v %%)", time.Since(t0), totalRetainedSize, c.maxSizeBytes, 100*totalRetainedSize/c.maxSizeBytes)
c.lastTotalSizeBytes = totalRetainedSize
return nil
}
func newBlockCache(ctx context.Context, st storage.Storage, caching CachingOptions) (*blockCache, error) {
var cacheStorage storage.Storage
var err error
if caching.MaxCacheSizeBytes > 0 && caching.CacheDirectory != "" {
blockCacheDir := filepath.Join(caching.CacheDirectory, "blocks")
if _, err = os.Stat(blockCacheDir); os.IsNotExist(err) {
if err = os.MkdirAll(blockCacheDir, 0700); err != nil {
return nil, err
}
}
cacheStorage, err = filesystem.New(context.Background(), &filesystem.Options{
Path: blockCacheDir,
DirectoryShards: []int{2},
})
if err != nil {
return nil, err
}
}
return newBlockCacheWithCacheStorage(ctx, st, cacheStorage, caching, defaultTouchThreshold, defaultSweepFrequency)
}
func newBlockCacheWithCacheStorage(ctx context.Context, st, cacheStorage storage.Storage, caching CachingOptions, touchThreshold time.Duration, sweepFrequency time.Duration) (*blockCache, error) {
c := &blockCache{
st: st,
cacheStorage: cacheStorage,
maxSizeBytes: caching.MaxCacheSizeBytes,
hmacSecret: append([]byte(nil), caching.HMACSecret...),
closed: make(chan struct{}),
touchThreshold: touchThreshold,
sweepFrequency: sweepFrequency,
}
if err := c.sweepDirectory(ctx); err != nil {
return nil, err
}
go c.sweepDirectoryPeriodically(ctx)
return c, nil
}

298
block/block_cache_test.go Normal file
View File

@@ -0,0 +1,298 @@
package block
import (
"bytes"
"context"
"errors"
"fmt"
"io/ioutil"
"os"
"reflect"
"sort"
"strings"
"testing"
"time"
"github.com/kopia/repo/internal/storagetesting"
"github.com/kopia/repo/storage"
)
func newUnderlyingStorageForBlockCacheTesting(t *testing.T) storage.Storage {
ctx := context.Background()
data := map[string][]byte{}
st := storagetesting.NewMapStorage(data, nil, nil)
assertNoError(t, st.PutBlock(ctx, "block-1", []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}))
assertNoError(t, st.PutBlock(ctx, "block-4k", bytes.Repeat([]byte{1, 2, 3, 4}, 1000))) // 4000 bytes
return st
}
func TestCacheExpiration(t *testing.T) {
cacheData := map[string][]byte{}
cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil)
underlyingStorage := newUnderlyingStorageForBlockCacheTesting(t)
cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, cacheStorage, CachingOptions{
MaxCacheSizeBytes: 10000,
}, 0, 500*time.Millisecond)
if err != nil {
t.Fatalf("err: %v", err)
}
defer cache.close()
ctx := context.Background()
_, err = cache.getContentBlock(ctx, "00000a", "block-4k", 0, -1) // 4k
assertNoError(t, err)
_, err = cache.getContentBlock(ctx, "00000b", "block-4k", 0, -1) // 4k
assertNoError(t, err)
_, err = cache.getContentBlock(ctx, "00000c", "block-4k", 0, -1) // 4k
assertNoError(t, err)
_, err = cache.getContentBlock(ctx, "00000d", "block-4k", 0, -1) // 4k
assertNoError(t, err)
// wait for a sweep
time.Sleep(2 * time.Second)
// 00000a and 00000b will be removed from cache because it's the oldest.
// to verify, let's remove block-4k from the underlying storage and make sure we can still read
// 00000c and 00000d from the cache but not 00000a nor 00000b
assertNoError(t, underlyingStorage.DeleteBlock(ctx, "block-4k"))
cases := []struct {
block string
expectedError error
}{
{"00000a", storage.ErrBlockNotFound},
{"00000b", storage.ErrBlockNotFound},
{"00000c", nil},
{"00000d", nil},
}
for _, tc := range cases {
_, got := cache.getContentBlock(ctx, tc.block, "block-4k", 0, -1)
if want := tc.expectedError; got != want {
t.Errorf("unexpected error when getting block %v: %v wanted %v", tc.block, got, want)
} else {
t.Logf("got correct error %v when reading block %v", tc.expectedError, tc.block)
}
}
}
func TestDiskBlockCache(t *testing.T) {
ctx := context.Background()
tmpDir, err := ioutil.TempDir("", "kopia")
if err != nil {
t.Fatalf("error getting temp dir: %v", err)
}
defer os.RemoveAll(tmpDir)
cache, err := newBlockCache(ctx, newUnderlyingStorageForBlockCacheTesting(t), CachingOptions{
MaxCacheSizeBytes: 10000,
CacheDirectory: tmpDir,
})
if err != nil {
t.Fatalf("err: %v", err)
}
defer cache.close()
verifyBlockCache(t, cache)
}
func verifyBlockCache(t *testing.T, cache *blockCache) {
ctx := context.Background()
t.Run("GetContentBlock", func(t *testing.T) {
cases := []struct {
cacheKey string
physicalBlockID string
offset int64
length int64
expected []byte
err error
}{
{"xf0f0f1", "block-1", 1, 5, []byte{2, 3, 4, 5, 6}, nil},
{"xf0f0f2", "block-1", 0, -1, []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil},
{"xf0f0f1", "block-1", 1, 5, []byte{2, 3, 4, 5, 6}, nil},
{"xf0f0f2", "block-1", 0, -1, []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil},
{"xf0f0f3", "no-such-block", 0, -1, nil, storage.ErrBlockNotFound},
{"xf0f0f4", "no-such-block", 10, 5, nil, storage.ErrBlockNotFound},
{"f0f0f5", "block-1", 7, 3, []byte{8, 9, 10}, nil},
{"xf0f0f6", "block-1", 11, 10, nil, fmt.Errorf("invalid offset")},
{"xf0f0f6", "block-1", -1, 5, nil, fmt.Errorf("invalid offset")},
}
for _, tc := range cases {
v, err := cache.getContentBlock(ctx, tc.cacheKey, tc.physicalBlockID, tc.offset, tc.length)
if !reflect.DeepEqual(err, tc.err) {
t.Errorf("unexpected error for %v: %+v, wanted %+v", tc.cacheKey, err, tc.err)
}
if !reflect.DeepEqual(v, tc.expected) {
t.Errorf("unexpected data for %v: %x, wanted %x", tc.cacheKey, v, tc.expected)
}
}
verifyStorageBlockList(t, cache.cacheStorage, "f0f0f1x", "f0f0f2x", "f0f0f5")
})
t.Run("DataCorruption", func(t *testing.T) {
cacheKey := "f0f0f1x"
d, err := cache.cacheStorage.GetBlock(ctx, cacheKey, 0, -1)
if err != nil {
t.Fatalf("unable to retrieve data from cache: %v", err)
}
// corrupt the data and write back
d[0] ^= 1
if err := cache.cacheStorage.PutBlock(ctx, cacheKey, d); err != nil {
t.Fatalf("unable to write corrupted block: %v", err)
}
v, err := cache.getContentBlock(ctx, "xf0f0f1", "block-1", 1, 5)
if err != nil {
t.Fatalf("error in getContentBlock: %v", err)
}
if got, want := v, []byte{2, 3, 4, 5, 6}; !reflect.DeepEqual(v, want) {
t.Errorf("invalid result when reading corrupted data: %v, wanted %v", got, want)
}
})
}
func TestCacheFailureToOpen(t *testing.T) {
someError := errors.New("some error")
cacheData := map[string][]byte{}
cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil)
underlyingStorage := newUnderlyingStorageForBlockCacheTesting(t)
faultyCache := &storagetesting.FaultyStorage{
Base: cacheStorage,
Faults: map[string][]*storagetesting.Fault{
"ListBlocks": {
{Err: someError},
},
},
}
// Will fail because of ListBlocks failure.
_, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{
MaxCacheSizeBytes: 10000,
}, 0, 5*time.Hour)
if err == nil || !strings.Contains(err.Error(), someError.Error()) {
t.Errorf("invalid error %v, wanted: %v", err, someError)
}
// ListBlocks fails only once, next time it succeeds.
cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{
MaxCacheSizeBytes: 10000,
}, 0, 100*time.Millisecond)
if err != nil {
t.Fatalf("err: %v", err)
}
defer cache.close()
}
func TestCacheFailureToWrite(t *testing.T) {
someError := errors.New("some error")
cacheData := map[string][]byte{}
cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil)
underlyingStorage := newUnderlyingStorageForBlockCacheTesting(t)
faultyCache := &storagetesting.FaultyStorage{
Base: cacheStorage,
}
cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{
MaxCacheSizeBytes: 10000,
}, 0, 5*time.Hour)
if err != nil {
t.Fatalf("err: %v", err)
}
defer cache.close()
ctx := context.Background()
faultyCache.Faults = map[string][]*storagetesting.Fault{
"PutBlock": {
{Err: someError},
},
}
v, err := cache.getContentBlock(ctx, "aa", "block-1", 0, 3)
if err != nil {
t.Errorf("write failure wasn't ignored: %v", err)
}
if got, want := v, []byte{1, 2, 3}; !reflect.DeepEqual(got, want) {
t.Errorf("unexpected value retrieved from cache: %v, want: %v", got, want)
}
all, err := storage.ListAllBlocks(ctx, cacheStorage, "")
if err != nil {
t.Errorf("error listing cache: %v", err)
}
if len(all) != 0 {
t.Errorf("invalid test - cache was written")
}
}
func TestCacheFailureToRead(t *testing.T) {
someError := errors.New("some error")
cacheData := map[string][]byte{}
cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil)
underlyingStorage := newUnderlyingStorageForBlockCacheTesting(t)
faultyCache := &storagetesting.FaultyStorage{
Base: cacheStorage,
}
cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{
MaxCacheSizeBytes: 10000,
}, 0, 5*time.Hour)
if err != nil {
t.Fatalf("err: %v", err)
}
defer cache.close()
ctx := context.Background()
faultyCache.Faults = map[string][]*storagetesting.Fault{
"GetBlock": {
{Err: someError, Repeat: 100},
},
}
for i := 0; i < 2; i++ {
v, err := cache.getContentBlock(ctx, "aa", "block-1", 0, 3)
if err != nil {
t.Errorf("read failure wasn't ignored: %v", err)
}
if got, want := v, []byte{1, 2, 3}; !reflect.DeepEqual(got, want) {
t.Errorf("unexpected value retrieved from cache: %v, want: %v", got, want)
}
}
}
func verifyStorageBlockList(t *testing.T, st storage.Storage, expectedBlocks ...string) {
t.Helper()
var foundBlocks []string
assertNoError(t, st.ListBlocks(context.Background(), "", func(bm storage.BlockMetadata) error {
foundBlocks = append(foundBlocks, bm.BlockID)
return nil
}))
sort.Strings(foundBlocks)
if !reflect.DeepEqual(foundBlocks, expectedBlocks) {
t.Errorf("unexpected block list: %v, wanted %v", foundBlocks, expectedBlocks)
}
}
func assertNoError(t *testing.T, err error) {
t.Helper()
if err != nil {
t.Errorf("err: %v", err)
}
}

217
block/block_formatter.go Normal file
View File

@@ -0,0 +1,217 @@
package block
import (
"crypto/aes"
"crypto/cipher"
"crypto/hmac" //nolint:gas
"crypto/sha256"
"fmt"
"hash"
"sort"
"golang.org/x/crypto/blake2b"
"golang.org/x/crypto/blake2s"
"golang.org/x/crypto/salsa20"
"golang.org/x/crypto/sha3"
)
// HashFunc computes hash of block of data using a cryptographic hash function, possibly with HMAC and/or truncation.
type HashFunc func(data []byte) []byte
// HashFuncFactory returns a hash function for given formatting options.
type HashFuncFactory func(o FormattingOptions) (HashFunc, error)
// Encryptor performs encryption and decryption of blocks of data.
type Encryptor interface {
// Encrypt returns encrypted bytes corresponding to the given plaintext. Must not clobber the input slice.
Encrypt(plainText []byte, blockID []byte) ([]byte, error)
// Decrypt returns unencrypted bytes corresponding to the given ciphertext. Must not clobber the input slice.
Decrypt(cipherText []byte, blockID []byte) ([]byte, error)
}
// EncryptorFactory creates new Encryptor for given FormattingOptions
type EncryptorFactory func(o FormattingOptions) (Encryptor, error)
var hashFunctions = map[string]HashFuncFactory{}
var encryptors = map[string]EncryptorFactory{}
// nullEncryptor implements non-encrypted format.
type nullEncryptor struct {
}
func (fi nullEncryptor) Encrypt(plainText []byte, blockID []byte) ([]byte, error) {
return cloneBytes(plainText), nil
}
func (fi nullEncryptor) Decrypt(cipherText []byte, blockID []byte) ([]byte, error) {
return cloneBytes(cipherText), nil
}
// ctrEncryptor implements encrypted format which uses CTR mode of a block cipher with nonce==IV.
type ctrEncryptor struct {
createCipher func() (cipher.Block, error)
}
func (fi ctrEncryptor) Encrypt(plainText []byte, blockID []byte) ([]byte, error) {
return symmetricEncrypt(fi.createCipher, blockID, plainText)
}
func (fi ctrEncryptor) Decrypt(cipherText []byte, blockID []byte) ([]byte, error) {
return symmetricEncrypt(fi.createCipher, blockID, cipherText)
}
func symmetricEncrypt(createCipher func() (cipher.Block, error), iv []byte, b []byte) ([]byte, error) {
blockCipher, err := createCipher()
if err != nil {
return nil, err
}
ctr := cipher.NewCTR(blockCipher, iv[0:blockCipher.BlockSize()])
result := make([]byte, len(b))
ctr.XORKeyStream(result, b)
return result, nil
}
type salsaEncryptor struct {
nonceSize int
key *[32]byte
}
func (s salsaEncryptor) Decrypt(input []byte, blockID []byte) ([]byte, error) {
return s.encryptDecrypt(input, blockID)
}
func (s salsaEncryptor) Encrypt(input []byte, blockID []byte) ([]byte, error) {
return s.encryptDecrypt(input, blockID)
}
func (s salsaEncryptor) encryptDecrypt(input []byte, blockID []byte) ([]byte, error) {
if len(blockID) < s.nonceSize {
return nil, fmt.Errorf("hash too short, expected >=%v bytes, got %v", s.nonceSize, len(blockID))
}
result := make([]byte, len(input))
nonce := blockID[0:s.nonceSize]
salsa20.XORKeyStream(result, input, nonce, s.key)
return result, nil
}
// truncatedHMACHashFuncFactory returns a HashFuncFactory that computes HMAC(hash, secret) of a given block of bytes
// and truncates results to the given size.
func truncatedHMACHashFuncFactory(hf func() hash.Hash, truncate int) HashFuncFactory {
return func(o FormattingOptions) (HashFunc, error) {
return func(b []byte) []byte {
h := hmac.New(hf, o.HMACSecret)
h.Write(b) // nolint:errcheck
return h.Sum(nil)[0:truncate]
}, nil
}
}
// truncatedKeyedHashFuncFactory returns a HashFuncFactory that computes keyed hash of a given block of bytes
// and truncates results to the given size.
func truncatedKeyedHashFuncFactory(hf func(key []byte) (hash.Hash, error), truncate int) HashFuncFactory {
return func(o FormattingOptions) (HashFunc, error) {
if _, err := hf(o.HMACSecret); err != nil {
return nil, err
}
return func(b []byte) []byte {
h, _ := hf(o.HMACSecret)
h.Write(b) // nolint:errcheck
return h.Sum(nil)[0:truncate]
}, nil
}
}
// newCTREncryptorFactory returns new EncryptorFactory that uses CTR with symmetric encryption (such as AES) and a given key size.
func newCTREncryptorFactory(keySize int, createCipherWithKey func(key []byte) (cipher.Block, error)) EncryptorFactory {
return func(o FormattingOptions) (Encryptor, error) {
key, err := adjustKey(o.MasterKey, keySize)
if err != nil {
return nil, fmt.Errorf("unable to get encryption key: %v", err)
}
return ctrEncryptor{
createCipher: func() (cipher.Block, error) {
return createCipherWithKey(key)
},
}, nil
}
}
// RegisterHash registers a hash function with a given name.
func RegisterHash(name string, newHashFunc HashFuncFactory) {
hashFunctions[name] = newHashFunc
}
func SupportedHashAlgorithms() []string {
var result []string
for k := range hashFunctions {
result = append(result, k)
}
sort.Strings(result)
return result
}
func SupportedEncryptionAlgorithms() []string {
var result []string
for k := range encryptors {
result = append(result, k)
}
sort.Strings(result)
return result
}
// RegisterEncryption registers new encryption algorithm.
func RegisterEncryption(name string, newEncryptor EncryptorFactory) {
encryptors[name] = newEncryptor
}
// DefaultHash is the name of the default hash algorithm.
const DefaultHash = "BLAKE2B-256-128"
// DefaultEncryption is the name of the default encryption algorithm.
const DefaultEncryption = "SALSA20"
func init() {
RegisterHash("HMAC-SHA256", truncatedHMACHashFuncFactory(sha256.New, 32))
RegisterHash("HMAC-SHA256-128", truncatedHMACHashFuncFactory(sha256.New, 16))
RegisterHash("HMAC-SHA224", truncatedHMACHashFuncFactory(sha256.New224, 28))
RegisterHash("HMAC-SHA3-224", truncatedHMACHashFuncFactory(sha3.New224, 28))
RegisterHash("HMAC-SHA3-256", truncatedHMACHashFuncFactory(sha3.New256, 32))
RegisterHash("BLAKE2S-128", truncatedKeyedHashFuncFactory(blake2s.New128, 16))
RegisterHash("BLAKE2S-256", truncatedKeyedHashFuncFactory(blake2s.New256, 32))
RegisterHash("BLAKE2B-256-128", truncatedKeyedHashFuncFactory(blake2b.New256, 16))
RegisterHash("BLAKE2B-256", truncatedKeyedHashFuncFactory(blake2b.New256, 32))
RegisterEncryption("NONE", func(f FormattingOptions) (Encryptor, error) {
return nullEncryptor{}, nil
})
RegisterEncryption("AES-128-CTR", newCTREncryptorFactory(16, aes.NewCipher))
RegisterEncryption("AES-192-CTR", newCTREncryptorFactory(24, aes.NewCipher))
RegisterEncryption("AES-256-CTR", newCTREncryptorFactory(32, aes.NewCipher))
RegisterEncryption("SALSA20", func(f FormattingOptions) (Encryptor, error) {
var k [32]byte
copy(k[:], f.MasterKey[0:32])
return salsaEncryptor{8, &k}, nil
})
RegisterEncryption("XSALSA20", func(f FormattingOptions) (Encryptor, error) {
var k [32]byte
copy(k[:], f.MasterKey[0:32])
return salsaEncryptor{24, &k}, nil
})
}
func adjustKey(masterKey []byte, desiredKeySize int) ([]byte, error) {
if len(masterKey) == desiredKeySize {
return masterKey, nil
}
if desiredKeySize < len(masterKey) {
return masterKey[0:desiredKeySize], nil
}
return nil, fmt.Errorf("required key too long %v, but only have %v", desiredKeySize, len(masterKey))
}

View File

@@ -0,0 +1,62 @@
package block
import (
"bytes"
"crypto/sha1"
"math/rand"
"testing"
)
// combinations of hash and encryption that are not compatible.
var incompatibleAlgorithms = map[string]string{
"BLAKE2B-256-128/XSALSA20": "invalid encryptor: hash too short, expected >=24 bytes, got 16",
"BLAKE2S-128/XSALSA20": "invalid encryptor: hash too short, expected >=24 bytes, got 16",
"HMAC-RIPEMD-160/XSALSA20": "invalid encryptor: hash too short, expected >=24 bytes, got 20",
"HMAC-SHA256-128/XSALSA20": "invalid encryptor: hash too short, expected >=24 bytes, got 16",
}
func TestFormatters(t *testing.T) {
secret := []byte("secret")
data := make([]byte, 100)
rand.Read(data)
h0 := sha1.Sum(data)
for _, hashAlgo := range SupportedHashAlgorithms() {
for _, encryptionAlgo := range SupportedEncryptionAlgorithms() {
h, e, err := CreateHashAndEncryptor(FormattingOptions{
HMACSecret: secret,
MasterKey: make([]byte, 32),
Hash: hashAlgo,
Encryption: encryptionAlgo,
})
if err != nil {
key := hashAlgo + "/" + encryptionAlgo
errmsg := incompatibleAlgorithms[key]
if err.Error() == errmsg {
continue
}
t.Errorf("Algorithm %v not marked as incompatible and failed with %v", key, err)
continue
}
blockID := h(data)
cipherText, err := e.Encrypt(data, blockID)
if err != nil || cipherText == nil {
t.Errorf("invalid response from Encrypt: %v %v", cipherText, err)
}
plainText, err := e.Decrypt(cipherText, blockID)
if err != nil || plainText == nil {
t.Errorf("invalid response from Decrypt: %v %v", plainText, err)
}
h1 := sha1.Sum(plainText)
if !bytes.Equal(h0[:], h1[:]) {
t.Errorf("Encrypt()/Decrypt() does not round-trip: %x %x", h0, h1)
}
}
}
}

View File

@@ -0,0 +1,11 @@
package block
// FormattingOptions describes the rules for formatting blocks in repository.
type FormattingOptions struct {
Version int `json:"version,omitempty"` // version number, must be "1"
Hash string `json:"hash,omitempty"` // identifier of the hash algorithm used
Encryption string `json:"encryption,omitempty"` // identifier of the encryption algorithm used
HMACSecret []byte `json:"secret,omitempty"` // HMAC secret used to generate encryption keys
MasterKey []byte `json:"masterKey,omitempty"` // master encryption key (SIV-mode encryption only)
MaxPackSize int `json:"maxPackSize,omitempty"` // maximum size of a pack object
}

View File

@@ -0,0 +1,226 @@
package block
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"hash/crc32"
"reflect"
)
// RecoverIndexFromPackFile attempts to recover index block entries from a given pack file.
// Pack file length may be provided (if known) to reduce the number of bytes that are read from the storage.
func (bm *Manager) RecoverIndexFromPackFile(ctx context.Context, packFile string, packFileLength int64, commit bool) ([]Info, error) {
localIndexBytes, err := bm.readPackFileLocalIndex(ctx, packFile, packFileLength)
if err != nil {
return nil, err
}
ndx, err := openPackIndex(bytes.NewReader(localIndexBytes))
if err != nil {
return nil, fmt.Errorf("unable to open index in file %v", packFile)
}
defer ndx.Close() //nolint:errcheck
var recovered []Info
err = ndx.Iterate("", func(i Info) error {
recovered = append(recovered, i)
if commit {
bm.packIndexBuilder.Add(i)
}
return nil
})
return recovered, err
}
type packBlockPostamble struct {
localIndexIV []byte
localIndexOffset uint32
localIndexLength uint32
}
func (p *packBlockPostamble) toBytes() ([]byte, error) {
// 4 varints + IV + 4 bytes of checksum + 1 byte of postamble length
n := 0
buf := make([]byte, 4*binary.MaxVarintLen64+len(p.localIndexIV)+4+1)
n += binary.PutUvarint(buf[n:], uint64(1)) // version flag
n += binary.PutUvarint(buf[n:], uint64(len(p.localIndexIV))) // length of local index IV
copy(buf[n:], p.localIndexIV)
n += len(p.localIndexIV)
n += binary.PutUvarint(buf[n:], uint64(p.localIndexOffset))
n += binary.PutUvarint(buf[n:], uint64(p.localIndexLength))
checksum := crc32.ChecksumIEEE(buf[0:n])
binary.BigEndian.PutUint32(buf[n:], checksum)
n += 4
if n > 255 {
return nil, fmt.Errorf("postamble too long: %v", n)
}
buf[n] = byte(n)
return buf[0 : n+1], nil
}
// findPostamble detects if a given block of bytes contains a possibly valid postamble, and returns it if so
// NOTE, even if this function returns a postamble, it should not be trusted to be correct, since it's not
// cryptographically signed. this is to facilitate data recovery.
func findPostamble(b []byte) *packBlockPostamble {
if len(b) == 0 {
// no postamble
return nil
}
// length of postamble is the last byte
postambleLength := int(b[len(b)-1])
if postambleLength < 5 {
// too short, must be at least 5 bytes (checksum + own length)
return nil
}
postambleStart := len(b) - 1 - postambleLength
postambleEnd := len(b) - 1
if postambleStart < 0 {
// invalid last byte
return nil
}
postambleBytes := b[postambleStart:postambleEnd]
payload, checksumBytes := postambleBytes[0:len(postambleBytes)-4], postambleBytes[len(postambleBytes)-4:]
checksum := binary.BigEndian.Uint32(checksumBytes)
validChecksum := crc32.ChecksumIEEE(payload)
if checksum != validChecksum {
// invalid checksum, not a valid postamble
return nil
}
return decodePostamble(payload)
}
func decodePostamble(payload []byte) *packBlockPostamble {
flags, n := binary.Uvarint(payload)
if n <= 0 {
// invalid flags
return nil
}
if flags != 1 {
// unsupported flag
return nil
}
payload = payload[n:]
ivLength, n := binary.Uvarint(payload)
if n <= 0 {
// invalid flags
return nil
}
payload = payload[n:]
if ivLength > uint64(len(payload)) {
// invalid IV length
return nil
}
iv := payload[0:ivLength]
payload = payload[ivLength:]
off, n := binary.Uvarint(payload)
if n <= 0 {
// invalid offset
return nil
}
payload = payload[n:]
length, n := binary.Uvarint(payload)
if n <= 0 {
// invalid offset
return nil
}
return &packBlockPostamble{
localIndexIV: iv,
localIndexLength: uint32(length),
localIndexOffset: uint32(off),
}
}
func (bm *Manager) buildLocalIndex(pending packIndexBuilder) ([]byte, error) {
var buf bytes.Buffer
if err := pending.Build(&buf); err != nil {
return nil, fmt.Errorf("unable to build local index: %v", err)
}
return buf.Bytes(), nil
}
// appendPackFileIndexRecoveryData appends data designed to help with recovery of pack index in case it gets damaged or lost.
func (bm *Manager) appendPackFileIndexRecoveryData(blockData []byte, pending packIndexBuilder) ([]byte, error) {
// build, encrypt and append local index
localIndexOffset := len(blockData)
localIndex, err := bm.buildLocalIndex(pending)
if err != nil {
return nil, err
}
localIndexIV := bm.hashData(localIndex)
encryptedLocalIndex, err := bm.encryptor.Encrypt(localIndex, localIndexIV)
if err != nil {
return nil, err
}
postamble := packBlockPostamble{
localIndexIV: localIndexIV,
localIndexOffset: uint32(localIndexOffset),
localIndexLength: uint32(len(localIndex)),
}
blockData = append(blockData, encryptedLocalIndex...)
postambleBytes, err := postamble.toBytes()
if err != nil {
return nil, err
}
blockData = append(blockData, postambleBytes...)
pa2 := findPostamble(blockData)
if pa2 == nil {
log.Fatalf("invalid postamble written, that could not be immediately decoded, it's a bug")
}
if !reflect.DeepEqual(postamble, *pa2) {
log.Fatalf("postamble did not round-trip: %v %v", postamble, *pa2)
}
return blockData, nil
}
func (bm *Manager) readPackFileLocalIndex(ctx context.Context, packFile string, packFileLength int64) ([]byte, error) {
payload, err := bm.st.GetBlock(ctx, packFile, 0, -1)
if err != nil {
return nil, err
}
postamble := findPostamble(payload)
if postamble == nil {
return nil, fmt.Errorf("unable to find valid postamble in file %v", packFile)
}
if uint64(postamble.localIndexOffset+postamble.localIndexLength) > uint64(len(payload)) {
// invalid offset/length
return nil, fmt.Errorf("unable to find valid local index in file %v", packFile)
}
encryptedLocalIndexBytes := payload[postamble.localIndexOffset : postamble.localIndexOffset+postamble.localIndexLength]
if encryptedLocalIndexBytes == nil {
return nil, fmt.Errorf("unable to find valid local index in file %v", packFile)
}
localIndexBytes, err := bm.decryptAndVerify(encryptedLocalIndexBytes, postamble.localIndexIV)
if err != nil {
return nil, fmt.Errorf("unable to decrypt local index: %v", err)
}
return localIndexBytes, nil
}

View File

@@ -0,0 +1,90 @@
package block
import (
"context"
"testing"
"time"
"github.com/kopia/repo/storage"
)
func TestBlockIndexRecovery(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
block2 := writeBlockAndVerify(ctx, t, bm, seededRandomData(11, 100))
block3 := writeBlockAndVerify(ctx, t, bm, seededRandomData(12, 100))
if err := bm.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
}
// delete all index blocks
assertNoError(t, bm.st.ListBlocks(ctx, newIndexBlockPrefix, func(bi storage.BlockMetadata) error {
log.Debugf("deleting %v", bi.BlockID)
return bm.st.DeleteBlock(ctx, bi.BlockID)
}))
// now with index blocks gone, all blocks appear to not be found
bm = newTestBlockManager(data, keyTime, nil)
verifyBlockNotFound(ctx, t, bm, block1)
verifyBlockNotFound(ctx, t, bm, block2)
verifyBlockNotFound(ctx, t, bm, block3)
totalRecovered := 0
// pass 1 - just list blocks to recover, but don't commit
err := bm.st.ListBlocks(ctx, PackBlockPrefix, func(bi storage.BlockMetadata) error {
infos, err := bm.RecoverIndexFromPackFile(ctx, bi.BlockID, bi.Length, false)
if err != nil {
return err
}
totalRecovered += len(infos)
log.Debugf("recovered %v blocks", len(infos))
return nil
})
if err != nil {
t.Errorf("error recovering: %v", err)
}
if got, want := totalRecovered, 3; got != want {
t.Errorf("invalid # of blocks recovered: %v, want %v", got, want)
}
// blocks are stil not found
verifyBlockNotFound(ctx, t, bm, block1)
verifyBlockNotFound(ctx, t, bm, block2)
verifyBlockNotFound(ctx, t, bm, block3)
// pass 2 now pass commit=true to add recovered blocks to index
totalRecovered = 0
err = bm.st.ListBlocks(ctx, PackBlockPrefix, func(bi storage.BlockMetadata) error {
infos, err := bm.RecoverIndexFromPackFile(ctx, bi.BlockID, bi.Length, true)
if err != nil {
return err
}
totalRecovered += len(infos)
log.Debugf("recovered %v blocks", len(infos))
return nil
})
if err != nil {
t.Errorf("error recovering: %v", err)
}
if got, want := totalRecovered, 3; got != want {
t.Errorf("invalid # of blocks recovered: %v, want %v", got, want)
}
verifyBlock(ctx, t, bm, block1, seededRandomData(10, 100))
verifyBlock(ctx, t, bm, block2, seededRandomData(11, 100))
verifyBlock(ctx, t, bm, block3, seededRandomData(12, 100))
if err := bm.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
}
verifyBlock(ctx, t, bm, block1, seededRandomData(10, 100))
verifyBlock(ctx, t, bm, block2, seededRandomData(11, 100))
verifyBlock(ctx, t, bm, block3, seededRandomData(12, 100))
}

1039
block/block_manager.go Normal file
View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,148 @@
package block
import (
"bytes"
"context"
"fmt"
"time"
"github.com/pkg/errors"
)
var autoCompactionOptions = CompactOptions{
MinSmallBlocks: 4 * parallelFetches,
MaxSmallBlocks: 64,
}
// CompactOptions provides options for compaction
type CompactOptions struct {
MinSmallBlocks int
MaxSmallBlocks int
AllBlocks bool
SkipDeletedOlderThan time.Duration
}
// CompactIndexes performs compaction of index blocks ensuring that # of small blocks is between minSmallBlockCount and maxSmallBlockCount
func (bm *Manager) CompactIndexes(ctx context.Context, opt CompactOptions) error {
log.Debugf("CompactIndexes(%+v)", opt)
if opt.MaxSmallBlocks < opt.MinSmallBlocks {
return fmt.Errorf("invalid block counts")
}
indexBlocks, _, err := bm.loadPackIndexesUnlocked(ctx)
if err != nil {
return errors.Wrap(err, "error loading indexes")
}
blocksToCompact := bm.getBlocksToCompact(indexBlocks, opt)
if err := bm.compactAndDeleteIndexBlocks(ctx, blocksToCompact, opt); err != nil {
log.Warningf("error performing quick compaction: %v", err)
}
return nil
}
func (bm *Manager) getBlocksToCompact(indexBlocks []IndexInfo, opt CompactOptions) []IndexInfo {
var nonCompactedBlocks []IndexInfo
var totalSizeNonCompactedBlocks int64
var verySmallBlocks []IndexInfo
var totalSizeVerySmallBlocks int64
var mediumSizedBlocks []IndexInfo
var totalSizeMediumSizedBlocks int64
for _, b := range indexBlocks {
if b.Length > int64(bm.maxPackSize) && !opt.AllBlocks {
continue
}
nonCompactedBlocks = append(nonCompactedBlocks, b)
if b.Length < int64(bm.maxPackSize/20) {
verySmallBlocks = append(verySmallBlocks, b)
totalSizeVerySmallBlocks += b.Length
} else {
mediumSizedBlocks = append(mediumSizedBlocks, b)
totalSizeMediumSizedBlocks += b.Length
}
totalSizeNonCompactedBlocks += b.Length
}
if len(nonCompactedBlocks) < opt.MinSmallBlocks {
// current count is below min allowed - nothing to do
formatLog.Debugf("no small blocks to compact")
return nil
}
if len(verySmallBlocks) > len(nonCompactedBlocks)/2 && len(mediumSizedBlocks)+1 < opt.MinSmallBlocks {
formatLog.Debugf("compacting %v very small blocks", len(verySmallBlocks))
return verySmallBlocks
}
formatLog.Debugf("compacting all %v non-compacted blocks", len(nonCompactedBlocks))
return nonCompactedBlocks
}
func (bm *Manager) compactAndDeleteIndexBlocks(ctx context.Context, indexBlocks []IndexInfo, opt CompactOptions) error {
if len(indexBlocks) <= 1 {
return nil
}
formatLog.Debugf("compacting %v blocks", len(indexBlocks))
t0 := time.Now()
bld := make(packIndexBuilder)
for _, indexBlock := range indexBlocks {
if err := bm.addIndexBlocksToBuilder(ctx, bld, indexBlock, opt); err != nil {
return err
}
}
var buf bytes.Buffer
if err := bld.Build(&buf); err != nil {
return errors.Wrap(err, "unable to build an index")
}
compactedIndexBlock, err := bm.writePackIndexesNew(ctx, buf.Bytes())
if err != nil {
return errors.Wrap(err, "unable to write compacted indexes")
}
formatLog.Debugf("wrote compacted index (%v bytes) in %v", compactedIndexBlock, time.Since(t0))
for _, indexBlock := range indexBlocks {
if indexBlock.FileName == compactedIndexBlock {
continue
}
bm.listCache.deleteListCache(ctx)
if err := bm.st.DeleteBlock(ctx, indexBlock.FileName); err != nil {
log.Warningf("unable to delete compacted block %q: %v", indexBlock.FileName, err)
}
}
return nil
}
func (bm *Manager) addIndexBlocksToBuilder(ctx context.Context, bld packIndexBuilder, indexBlock IndexInfo, opt CompactOptions) error {
data, err := bm.getPhysicalBlockInternal(ctx, indexBlock.FileName)
if err != nil {
return err
}
index, err := openPackIndex(bytes.NewReader(data))
if err != nil {
return fmt.Errorf("unable to open index block %q: %v", indexBlock, err)
}
_ = index.Iterate("", func(i Info) error {
if i.Deleted && opt.SkipDeletedOlderThan > 0 && time.Since(i.Timestamp()) > opt.SkipDeletedOlderThan {
log.Debugf("skipping block %v deleted at %v", i.BlockID, i.Timestamp())
return nil
}
bld.Add(i)
return nil
})
return nil
}

909
block/block_manager_test.go Normal file
View File

@@ -0,0 +1,909 @@
package block
import (
"bytes"
"context"
"crypto/hmac"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"math/rand"
"reflect"
"strings"
"sync"
"testing"
"time"
"github.com/kopia/repo/internal/storagetesting"
"github.com/kopia/repo/storage"
logging "github.com/op/go-logging"
)
const (
maxPackSize = 2000
)
var fakeTime = time.Date(2017, 1, 1, 0, 0, 0, 0, time.UTC)
var hmacSecret = []byte{1, 2, 3}
func init() {
logging.SetLevel(logging.DEBUG, "")
}
func TestBlockManagerEmptyFlush(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
bm.Flush(ctx)
if got, want := len(data), 0; got != want {
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
}
func TestBlockZeroBytes1(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
blockID := writeBlockAndVerify(ctx, t, bm, []byte{})
bm.Flush(ctx)
if got, want := len(data), 2; got != want {
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
dumpBlockManagerData(t, data)
bm = newTestBlockManager(data, keyTime, nil)
verifyBlock(ctx, t, bm, blockID, []byte{})
}
func TestBlockZeroBytes2(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 10))
writeBlockAndVerify(ctx, t, bm, []byte{})
bm.Flush(ctx)
if got, want := len(data), 2; got != want {
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
dumpBlockManagerData(t, data)
}
}
func TestBlockManagerSmallBlockWrites(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
for i := 0; i < 100; i++ {
writeBlockAndVerify(ctx, t, bm, seededRandomData(i, 10))
}
if got, want := len(data), 0; got != want {
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
bm.Flush(ctx)
if got, want := len(data), 2; got != want {
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
}
func TestBlockManagerDedupesPendingBlocks(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
for i := 0; i < 100; i++ {
writeBlockAndVerify(ctx, t, bm, seededRandomData(0, 999))
}
if got, want := len(data), 0; got != want {
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
bm.Flush(ctx)
if got, want := len(data), 2; got != want {
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
}
func TestBlockManagerDedupesPendingAndUncommittedBlocks(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
// no writes here, all data fits in a single pack.
writeBlockAndVerify(ctx, t, bm, seededRandomData(0, 950))
writeBlockAndVerify(ctx, t, bm, seededRandomData(1, 950))
writeBlockAndVerify(ctx, t, bm, seededRandomData(2, 10))
if got, want := len(data), 0; got != want {
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
// no writes here
writeBlockAndVerify(ctx, t, bm, seededRandomData(0, 950))
writeBlockAndVerify(ctx, t, bm, seededRandomData(1, 950))
writeBlockAndVerify(ctx, t, bm, seededRandomData(2, 10))
if got, want := len(data), 0; got != want {
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
bm.Flush(ctx)
// this flushes the pack block + index block
if got, want := len(data), 2; got != want {
dumpBlockManagerData(t, data)
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
}
func TestBlockManagerEmpty(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
noSuchBlockID := string(hashValue([]byte("foo")))
b, err := bm.GetBlock(ctx, noSuchBlockID)
if err != storage.ErrBlockNotFound {
t.Errorf("unexpected error when getting non-existent block: %v, %v", b, err)
}
bi, err := bm.BlockInfo(ctx, noSuchBlockID)
if err != storage.ErrBlockNotFound {
t.Errorf("unexpected error when getting non-existent block info: %v, %v", bi, err)
}
if got, want := len(data), 0; got != want {
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
}
func verifyActiveIndexBlockCount(ctx context.Context, t *testing.T, bm *Manager, expected int) {
t.Helper()
blks, err := bm.IndexBlocks(ctx)
if err != nil {
t.Errorf("error listing active index blocks: %v", err)
return
}
if got, want := len(blks), expected; got != want {
t.Errorf("unexpected number of active index blocks %v, expected %v (%v)", got, want, blks)
}
}
func TestBlockManagerInternalFlush(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
for i := 0; i < 100; i++ {
b := make([]byte, 25)
rand.Read(b)
writeBlockAndVerify(ctx, t, bm, b)
}
// 1 data block written, but no index yet.
if got, want := len(data), 1; got != want {
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
// do it again - should be 2 blocks + 1000 bytes pending.
for i := 0; i < 100; i++ {
b := make([]byte, 25)
rand.Read(b)
writeBlockAndVerify(ctx, t, bm, b)
}
// 2 data blocks written, but no index yet.
if got, want := len(data), 2; got != want {
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
bm.Flush(ctx)
// third block gets written, followed by index.
if got, want := len(data), 4; got != want {
dumpBlockManagerData(t, data)
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
}
}
func TestBlockManagerWriteMultiple(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
timeFunc := fakeTimeNowWithAutoAdvance(fakeTime, 1*time.Second)
bm := newTestBlockManager(data, keyTime, timeFunc)
var blockIDs []string
for i := 0; i < 5000; i++ {
//t.Logf("i=%v", i)
b := seededRandomData(i, i%113)
blkID, err := bm.WriteBlock(ctx, b, "")
if err != nil {
t.Errorf("err: %v", err)
}
blockIDs = append(blockIDs, blkID)
if i%17 == 0 {
//t.Logf("flushing %v", i)
if err := bm.Flush(ctx); err != nil {
t.Fatalf("error flushing: %v", err)
}
//dumpBlockManagerData(t, data)
}
if i%41 == 0 {
//t.Logf("opening new manager: %v", i)
if err := bm.Flush(ctx); err != nil {
t.Fatalf("error flushing: %v", err)
}
//t.Logf("data block count: %v", len(data))
//dumpBlockManagerData(t, data)
bm = newTestBlockManager(data, keyTime, timeFunc)
}
pos := rand.Intn(len(blockIDs))
if _, err := bm.GetBlock(ctx, blockIDs[pos]); err != nil {
dumpBlockManagerData(t, data)
t.Fatalf("can't read block %q: %v", blockIDs[pos], err)
continue
}
}
}
// This is regression test for a bug where we would corrupt data when encryption
// was done in place and clobbered pending data in memory.
func TestBlockManagerFailedToWritePack(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
st := storagetesting.NewMapStorage(data, keyTime, nil)
faulty := &storagetesting.FaultyStorage{
Base: st,
}
st = faulty
bm, err := newManagerWithOptions(context.Background(), st, FormattingOptions{
Version: 1,
Hash: "HMAC-SHA256-128",
Encryption: "AES-256-CTR",
MaxPackSize: maxPackSize,
HMACSecret: []byte("foo"),
MasterKey: []byte("0123456789abcdef0123456789abcdef"),
}, CachingOptions{}, fakeTimeNowFrozen(fakeTime), nil)
if err != nil {
t.Fatalf("can't create bm: %v", err)
}
logging.SetLevel(logging.DEBUG, "faulty-storage")
faulty.Faults = map[string][]*storagetesting.Fault{
"PutBlock": {
{Err: errors.New("booboo")},
},
}
b1, err := bm.WriteBlock(ctx, seededRandomData(1, 10), "")
if err != nil {
t.Fatalf("can't create block: %v", err)
}
if err := bm.Flush(ctx); err != nil {
t.Logf("expected flush error: %v", err)
}
verifyBlock(ctx, t, bm, b1, seededRandomData(1, 10))
}
func TestBlockManagerConcurrency(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
preexistingBlock := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
bm.Flush(ctx)
dumpBlockManagerData(t, data)
bm1 := newTestBlockManager(data, keyTime, nil)
bm2 := newTestBlockManager(data, keyTime, nil)
bm3 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(fakeTime.Add(1), 1*time.Second))
// all bm* can see pre-existing block
verifyBlock(ctx, t, bm1, preexistingBlock, seededRandomData(10, 100))
verifyBlock(ctx, t, bm2, preexistingBlock, seededRandomData(10, 100))
verifyBlock(ctx, t, bm3, preexistingBlock, seededRandomData(10, 100))
// write the same block in all managers.
sharedBlock := writeBlockAndVerify(ctx, t, bm1, seededRandomData(20, 100))
writeBlockAndVerify(ctx, t, bm2, seededRandomData(20, 100))
writeBlockAndVerify(ctx, t, bm3, seededRandomData(20, 100))
// write unique block per manager.
bm1block := writeBlockAndVerify(ctx, t, bm1, seededRandomData(31, 100))
bm2block := writeBlockAndVerify(ctx, t, bm2, seededRandomData(32, 100))
bm3block := writeBlockAndVerify(ctx, t, bm3, seededRandomData(33, 100))
// make sure they can't see each other's unflushed blocks.
verifyBlockNotFound(ctx, t, bm1, bm2block)
verifyBlockNotFound(ctx, t, bm1, bm3block)
verifyBlockNotFound(ctx, t, bm2, bm1block)
verifyBlockNotFound(ctx, t, bm2, bm3block)
verifyBlockNotFound(ctx, t, bm3, bm1block)
verifyBlockNotFound(ctx, t, bm3, bm2block)
// now flush all writers, they still can't see each others' data.
bm1.Flush(ctx)
bm2.Flush(ctx)
bm3.Flush(ctx)
verifyBlockNotFound(ctx, t, bm1, bm2block)
verifyBlockNotFound(ctx, t, bm1, bm3block)
verifyBlockNotFound(ctx, t, bm2, bm1block)
verifyBlockNotFound(ctx, t, bm2, bm3block)
verifyBlockNotFound(ctx, t, bm3, bm1block)
verifyBlockNotFound(ctx, t, bm3, bm2block)
// new block manager at this point can see all data.
bm4 := newTestBlockManager(data, keyTime, nil)
verifyBlock(ctx, t, bm4, preexistingBlock, seededRandomData(10, 100))
verifyBlock(ctx, t, bm4, sharedBlock, seededRandomData(20, 100))
verifyBlock(ctx, t, bm4, bm1block, seededRandomData(31, 100))
verifyBlock(ctx, t, bm4, bm2block, seededRandomData(32, 100))
verifyBlock(ctx, t, bm4, bm3block, seededRandomData(33, 100))
if got, want := getIndexCount(data), 4; got != want {
t.Errorf("unexpected index count before compaction: %v, wanted %v", got, want)
}
if err := bm4.CompactIndexes(ctx, CompactOptions{
MinSmallBlocks: 1,
MaxSmallBlocks: 1,
}); err != nil {
t.Errorf("compaction error: %v", err)
}
if got, want := getIndexCount(data), 1; got != want {
t.Errorf("unexpected index count after compaction: %v, wanted %v", got, want)
}
// new block manager at this point can see all data.
bm5 := newTestBlockManager(data, keyTime, nil)
verifyBlock(ctx, t, bm5, preexistingBlock, seededRandomData(10, 100))
verifyBlock(ctx, t, bm5, sharedBlock, seededRandomData(20, 100))
verifyBlock(ctx, t, bm5, bm1block, seededRandomData(31, 100))
verifyBlock(ctx, t, bm5, bm2block, seededRandomData(32, 100))
verifyBlock(ctx, t, bm5, bm3block, seededRandomData(33, 100))
if err := bm5.CompactIndexes(ctx, CompactOptions{
MinSmallBlocks: 1,
MaxSmallBlocks: 1,
}); err != nil {
t.Errorf("compaction error: %v", err)
}
}
func TestDeleteBlock(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
bm.Flush(ctx)
block2 := writeBlockAndVerify(ctx, t, bm, seededRandomData(11, 100))
if err := bm.DeleteBlock(block1); err != nil {
t.Errorf("unable to delete block: %v", block1)
}
if err := bm.DeleteBlock(block2); err != nil {
t.Errorf("unable to delete block: %v", block1)
}
verifyBlockNotFound(ctx, t, bm, block1)
verifyBlockNotFound(ctx, t, bm, block2)
bm.Flush(ctx)
log.Debugf("-----------")
bm = newTestBlockManager(data, keyTime, nil)
//dumpBlockManagerData(t, data)
verifyBlockNotFound(ctx, t, bm, block1)
verifyBlockNotFound(ctx, t, bm, block2)
}
func TestRewriteNonDeleted(t *testing.T) {
const stepBehaviors = 3
// perform a sequence WriteBlock() <action1> RewriteBlock() <action2> GetBlock()
// where actionX can be (0=flush and reopen, 1=flush, 2=nothing)
for action1 := 0; action1 < stepBehaviors; action1++ {
for action2 := 0; action2 < stepBehaviors; action2++ {
t.Run(fmt.Sprintf("case-%v-%v", action1, action2), func(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
fakeNow := fakeTimeNowWithAutoAdvance(fakeTime, 1*time.Second)
bm := newTestBlockManager(data, keyTime, fakeNow)
applyStep := func(action int) {
switch action {
case 0:
t.Logf("flushing and reopening")
bm.Flush(ctx)
bm = newTestBlockManager(data, keyTime, fakeNow)
case 1:
t.Logf("flushing")
bm.Flush(ctx)
case 2:
t.Logf("doing nothing")
}
}
block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
applyStep(action1)
assertNoError(t, bm.RewriteBlock(ctx, block1))
applyStep(action2)
verifyBlock(ctx, t, bm, block1, seededRandomData(10, 100))
dumpBlockManagerData(t, data)
})
}
}
}
func TestDisableFlush(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
bm.DisableIndexFlush()
bm.DisableIndexFlush()
for i := 0; i < 500; i++ {
writeBlockAndVerify(ctx, t, bm, seededRandomData(i, 100))
}
bm.Flush(ctx) // flush will not have effect
bm.EnableIndexFlush()
bm.Flush(ctx) // flush will not have effect
bm.EnableIndexFlush()
verifyActiveIndexBlockCount(ctx, t, bm, 0)
bm.EnableIndexFlush()
verifyActiveIndexBlockCount(ctx, t, bm, 0)
bm.Flush(ctx) // flush will happen now
verifyActiveIndexBlockCount(ctx, t, bm, 1)
}
func TestRewriteDeleted(t *testing.T) {
const stepBehaviors = 3
// perform a sequence WriteBlock() <action1> Delete() <action2> RewriteBlock() <action3> GetBlock()
// where actionX can be (0=flush and reopen, 1=flush, 2=nothing)
for action1 := 0; action1 < stepBehaviors; action1++ {
for action2 := 0; action2 < stepBehaviors; action2++ {
for action3 := 0; action3 < stepBehaviors; action3++ {
t.Run(fmt.Sprintf("case-%v-%v-%v", action1, action2, action3), func(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
fakeNow := fakeTimeNowWithAutoAdvance(fakeTime, 1*time.Second)
bm := newTestBlockManager(data, keyTime, fakeNow)
applyStep := func(action int) {
switch action {
case 0:
t.Logf("flushing and reopening")
bm.Flush(ctx)
bm = newTestBlockManager(data, keyTime, fakeNow)
case 1:
t.Logf("flushing")
bm.Flush(ctx)
case 2:
t.Logf("doing nothing")
}
}
block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
applyStep(action1)
assertNoError(t, bm.DeleteBlock(block1))
applyStep(action2)
if got, want := bm.RewriteBlock(ctx, block1), storage.ErrBlockNotFound; got != want && got != nil {
t.Errorf("unexpected error %v, wanted %v", got, want)
}
applyStep(action3)
verifyBlockNotFound(ctx, t, bm, block1)
dumpBlockManagerData(t, data)
})
}
}
}
}
func TestDeleteAndRecreate(t *testing.T) {
ctx := context.Background()
// simulate race between delete/recreate and delete
// delete happens at t0+10, recreate at t0+20 and second delete time is parameterized.
// depending on it, the second delete results will be visible.
cases := []struct {
desc string
deletionTime time.Time
isVisible bool
}{
{"deleted before delete and-recreate", fakeTime.Add(5 * time.Second), true},
//{"deleted after delete and recreate", fakeTime.Add(25 * time.Second), false},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
// write a block
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, fakeTimeNowFrozen(fakeTime))
block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
bm.Flush(ctx)
// delete but at given timestamp but don't commit yet.
bm0 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(tc.deletionTime, 1*time.Second))
assertNoError(t, bm0.DeleteBlock(block1))
// delete it at t0+10
bm1 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(fakeTime.Add(10*time.Second), 1*time.Second))
verifyBlock(ctx, t, bm1, block1, seededRandomData(10, 100))
assertNoError(t, bm1.DeleteBlock(block1))
bm1.Flush(ctx)
// recreate at t0+20
bm2 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(fakeTime.Add(20*time.Second), 1*time.Second))
block2 := writeBlockAndVerify(ctx, t, bm2, seededRandomData(10, 100))
bm2.Flush(ctx)
// commit deletion from bm0 (t0+5)
bm0.Flush(ctx)
//dumpBlockManagerData(t, data)
if block1 != block2 {
t.Errorf("got invalid block %v, expected %v", block2, block1)
}
bm3 := newTestBlockManager(data, keyTime, nil)
dumpBlockManagerData(t, data)
if tc.isVisible {
verifyBlock(ctx, t, bm3, block1, seededRandomData(10, 100))
} else {
verifyBlockNotFound(ctx, t, bm3, block1)
}
})
}
}
func TestFindUnreferencedStorageFiles(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
verifyUnreferencedStorageFilesCount(ctx, t, bm, 0)
blockID := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
if err := bm.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
}
verifyUnreferencedStorageFilesCount(ctx, t, bm, 0)
if err := bm.DeleteBlock(blockID); err != nil {
t.Errorf("error deleting block: %v", blockID)
}
if err := bm.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
}
// block still present in first pack
verifyUnreferencedStorageFilesCount(ctx, t, bm, 0)
assertNoError(t, bm.RewriteBlock(ctx, blockID))
if err := bm.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
}
verifyUnreferencedStorageFilesCount(ctx, t, bm, 1)
assertNoError(t, bm.RewriteBlock(ctx, blockID))
if err := bm.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
}
verifyUnreferencedStorageFilesCount(ctx, t, bm, 2)
}
func TestFindUnreferencedStorageFiles2(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, nil)
verifyUnreferencedStorageFilesCount(ctx, t, bm, 0)
blockID := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
writeBlockAndVerify(ctx, t, bm, seededRandomData(11, 100))
dumpBlocks(t, bm, "after writing")
if err := bm.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
}
dumpBlocks(t, bm, "after flush")
verifyUnreferencedStorageFilesCount(ctx, t, bm, 0)
if err := bm.DeleteBlock(blockID); err != nil {
t.Errorf("error deleting block: %v", blockID)
}
dumpBlocks(t, bm, "after delete")
if err := bm.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
}
dumpBlocks(t, bm, "after flush")
// block present in first pack, original pack is still referenced
verifyUnreferencedStorageFilesCount(ctx, t, bm, 0)
}
func dumpBlocks(t *testing.T, bm *Manager, caption string) {
t.Helper()
infos, err := bm.ListBlockInfos("", true)
if err != nil {
t.Errorf("error listing blocks: %v", err)
return
}
log.Infof("**** dumping %v blocks %v", len(infos), caption)
for i, bi := range infos {
log.Debugf(" bi[%v]=%#v", i, bi)
}
log.Infof("finished dumping %v blocks", len(infos))
}
func verifyUnreferencedStorageFilesCount(ctx context.Context, t *testing.T, bm *Manager, want int) {
t.Helper()
unref, err := bm.FindUnreferencedStorageFiles(ctx)
if err != nil {
t.Errorf("error in FindUnreferencedStorageFiles: %v", err)
}
log.Infof("got %v expecting %v", unref, want)
if got := len(unref); got != want {
t.Errorf("invalid number of unreferenced blocks: %v, wanted %v", got, want)
}
}
func TestBlockWriteAliasing(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, fakeTimeNowFrozen(fakeTime))
blockData := []byte{100, 0, 0}
id1 := writeBlockAndVerify(ctx, t, bm, blockData)
blockData[0] = 101
id2 := writeBlockAndVerify(ctx, t, bm, blockData)
bm.Flush(ctx)
blockData[0] = 102
id3 := writeBlockAndVerify(ctx, t, bm, blockData)
blockData[0] = 103
id4 := writeBlockAndVerify(ctx, t, bm, blockData)
verifyBlock(ctx, t, bm, id1, []byte{100, 0, 0})
verifyBlock(ctx, t, bm, id2, []byte{101, 0, 0})
verifyBlock(ctx, t, bm, id3, []byte{102, 0, 0})
verifyBlock(ctx, t, bm, id4, []byte{103, 0, 0})
}
func TestBlockReadAliasing(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
keyTime := map[string]time.Time{}
bm := newTestBlockManager(data, keyTime, fakeTimeNowFrozen(fakeTime))
blockData := []byte{100, 0, 0}
id1 := writeBlockAndVerify(ctx, t, bm, blockData)
blockData2, err := bm.GetBlock(ctx, id1)
if err != nil {
t.Fatalf("can't get block data: %v", err)
}
blockData2[0]++
verifyBlock(ctx, t, bm, id1, blockData)
bm.Flush(ctx)
verifyBlock(ctx, t, bm, id1, blockData)
}
func TestVersionCompatibility(t *testing.T) {
for writeVer := minSupportedReadVersion; writeVer <= currentWriteVersion; writeVer++ {
t.Run(fmt.Sprintf("version-%v", writeVer), func(t *testing.T) {
verifyVersionCompat(t, writeVer)
})
}
}
func verifyVersionCompat(t *testing.T, writeVersion int) {
ctx := context.Background()
// create block manager that writes 'writeVersion' and reads all versions >= minSupportedReadVersion
data := map[string][]byte{}
keyTime := map[string]time.Time{}
mgr := newTestBlockManager(data, keyTime, nil)
mgr.writeFormatVersion = int32(writeVersion)
dataSet := map[string][]byte{}
for i := 0; i < 3000000; i = (i + 1) * 2 {
data := make([]byte, i)
rand.Read(data)
cid, err := mgr.WriteBlock(ctx, data, "")
if err != nil {
t.Fatalf("unable to write %v bytes: %v", len(data), err)
}
dataSet[cid] = data
}
verifyBlockManagerDataSet(ctx, t, mgr, dataSet)
// delete random 3 items (map iteration order is random)
cnt := 0
for blockID := range dataSet {
t.Logf("deleting %v", blockID)
assertNoError(t, mgr.DeleteBlock(blockID))
delete(dataSet, blockID)
cnt++
if cnt >= 3 {
break
}
}
if err := mgr.Flush(ctx); err != nil {
t.Fatalf("failed to flush: %v", err)
}
// create new manager that reads and writes using new version.
mgr = newTestBlockManager(data, keyTime, nil)
// make sure we can read everything
verifyBlockManagerDataSet(ctx, t, mgr, dataSet)
if err := mgr.CompactIndexes(ctx, CompactOptions{
MinSmallBlocks: 1,
MaxSmallBlocks: 1,
}); err != nil {
t.Fatalf("unable to compact indexes: %v", err)
}
if err := mgr.Flush(ctx); err != nil {
t.Fatalf("failed to flush: %v", err)
}
verifyBlockManagerDataSet(ctx, t, mgr, dataSet)
// now open one more manager
mgr = newTestBlockManager(data, keyTime, nil)
verifyBlockManagerDataSet(ctx, t, mgr, dataSet)
}
func verifyBlockManagerDataSet(ctx context.Context, t *testing.T, mgr *Manager, dataSet map[string][]byte) {
for blockID, originalPayload := range dataSet {
v, err := mgr.GetBlock(ctx, blockID)
if err != nil {
t.Errorf("unable to read block %q: %v", blockID, err)
continue
}
if !reflect.DeepEqual(v, originalPayload) {
t.Errorf("payload for %q does not match original: %v", v, originalPayload)
}
}
}
func newTestBlockManager(data map[string][]byte, keyTime map[string]time.Time, timeFunc func() time.Time) *Manager {
//st = logging.NewWrapper(st)
if timeFunc == nil {
timeFunc = fakeTimeNowWithAutoAdvance(fakeTime, 1*time.Second)
}
st := storagetesting.NewMapStorage(data, keyTime, timeFunc)
bm, err := newManagerWithOptions(context.Background(), st, FormattingOptions{
Hash: "HMAC-SHA256",
Encryption: "NONE",
HMACSecret: hmacSecret,
MaxPackSize: maxPackSize,
}, CachingOptions{}, timeFunc, nil)
if err != nil {
panic("can't create block manager: " + err.Error())
}
bm.checkInvariantsOnUnlock = true
return bm
}
func getIndexCount(d map[string][]byte) int {
var cnt int
for k := range d {
if strings.HasPrefix(k, newIndexBlockPrefix) {
cnt++
}
}
return cnt
}
func fakeTimeNowFrozen(t time.Time) func() time.Time {
return fakeTimeNowWithAutoAdvance(t, 0)
}
func fakeTimeNowWithAutoAdvance(t time.Time, dt time.Duration) func() time.Time {
var mu sync.Mutex
return func() time.Time {
mu.Lock()
defer mu.Unlock()
ret := t
t = t.Add(dt)
return ret
}
}
func verifyBlockNotFound(ctx context.Context, t *testing.T, bm *Manager, blockID string) {
t.Helper()
b, err := bm.GetBlock(ctx, blockID)
if err != storage.ErrBlockNotFound {
t.Errorf("unexpected response from GetBlock(%q), got %v,%v, expected %v", blockID, b, err, storage.ErrBlockNotFound)
}
}
func verifyBlock(ctx context.Context, t *testing.T, bm *Manager, blockID string, b []byte) {
t.Helper()
b2, err := bm.GetBlock(ctx, blockID)
if err != nil {
t.Errorf("unable to read block %q: %v", blockID, err)
return
}
if got, want := b2, b; !reflect.DeepEqual(got, want) {
t.Errorf("block %q data mismatch: got %x (nil:%v), wanted %x (nil:%v)", blockID, got, got == nil, want, want == nil)
}
bi, err := bm.BlockInfo(ctx, blockID)
if err != nil {
t.Errorf("error getting block info %q: %v", blockID, err)
}
if got, want := bi.Length, uint32(len(b)); got != want {
t.Errorf("invalid block size for %q: %v, wanted %v", blockID, got, want)
}
}
func writeBlockAndVerify(ctx context.Context, t *testing.T, bm *Manager, b []byte) string {
t.Helper()
blockID, err := bm.WriteBlock(ctx, b, "")
if err != nil {
t.Errorf("err: %v", err)
}
if got, want := blockID, string(hashValue(b)); got != want {
t.Errorf("invalid block ID for %x, got %v, want %v", b, got, want)
}
verifyBlock(ctx, t, bm, blockID, b)
return blockID
}
func seededRandomData(seed int, length int) []byte {
b := make([]byte, length)
rnd := rand.New(rand.NewSource(int64(seed)))
rnd.Read(b)
return b
}
func hashValue(b []byte) string {
h := hmac.New(sha256.New, hmacSecret)
h.Write(b) //nolint:errcheck
return hex.EncodeToString(h.Sum(nil))
}
func dumpBlockManagerData(t *testing.T, data map[string][]byte) {
t.Helper()
for k, v := range data {
if k[0] == 'n' {
ndx, err := openPackIndex(bytes.NewReader(v))
if err == nil {
t.Logf("index %v (%v bytes)", k, len(v))
assertNoError(t, ndx.Iterate("", func(i Info) error {
t.Logf(" %+v\n", i)
return nil
}))
}
} else {
t.Logf("data %v (%v bytes)\n", k, len(v))
}
}
}

147
block/builder.go Normal file
View File

@@ -0,0 +1,147 @@
package block
import (
"bufio"
"encoding/binary"
"fmt"
"io"
"sort"
)
// packIndexBuilder prepares and writes block index for writing.
type packIndexBuilder map[string]*Info
// Add adds a new entry to the builder or conditionally replaces it if the timestamp is greater.
func (b packIndexBuilder) Add(i Info) {
old, ok := b[i.BlockID]
if !ok || i.TimestampSeconds >= old.TimestampSeconds {
b[i.BlockID] = &i
}
}
func (b packIndexBuilder) sortedBlocks() []*Info {
var allBlocks []*Info
for _, v := range b {
allBlocks = append(allBlocks, v)
}
sort.Slice(allBlocks, func(i, j int) bool {
return allBlocks[i].BlockID < allBlocks[j].BlockID
})
return allBlocks
}
type indexLayout struct {
packFileOffsets map[string]uint32
entryCount int
keyLength int
entryLength int
extraDataOffset uint32
}
// Build writes the pack index to the provided output.
func (b packIndexBuilder) Build(output io.Writer) error {
allBlocks := b.sortedBlocks()
layout := &indexLayout{
packFileOffsets: map[string]uint32{},
keyLength: -1,
entryLength: 20,
entryCount: len(allBlocks),
}
w := bufio.NewWriter(output)
// prepare extra data to be appended at the end of an index.
extraData := prepareExtraData(allBlocks, layout)
// write header
header := make([]byte, 8)
header[0] = 1 // version
header[1] = byte(layout.keyLength)
binary.BigEndian.PutUint16(header[2:4], uint16(layout.entryLength))
binary.BigEndian.PutUint32(header[4:8], uint32(layout.entryCount))
if _, err := w.Write(header); err != nil {
return fmt.Errorf("unable to write header: %v", err)
}
// write all sorted blocks.
entry := make([]byte, layout.entryLength)
for _, it := range allBlocks {
if err := writeEntry(w, it, layout, entry); err != nil {
return fmt.Errorf("unable to write entry: %v", err)
}
}
if _, err := w.Write(extraData); err != nil {
return fmt.Errorf("error writing extra data: %v", err)
}
return w.Flush()
}
func prepareExtraData(allBlocks []*Info, layout *indexLayout) []byte {
var extraData []byte
for i, it := range allBlocks {
if i == 0 {
layout.keyLength = len(contentIDToBytes(it.BlockID))
}
if it.PackFile != "" {
if _, ok := layout.packFileOffsets[it.PackFile]; !ok {
layout.packFileOffsets[it.PackFile] = uint32(len(extraData))
extraData = append(extraData, []byte(it.PackFile)...)
}
}
if len(it.Payload) > 0 {
panic("storing payloads in indexes is not supported")
}
}
layout.extraDataOffset = uint32(8 + layout.entryCount*(layout.keyLength+layout.entryLength))
return extraData
}
func writeEntry(w io.Writer, it *Info, layout *indexLayout, entry []byte) error {
k := contentIDToBytes(it.BlockID)
if len(k) != layout.keyLength {
return fmt.Errorf("inconsistent key length: %v vs %v", len(k), layout.keyLength)
}
if err := formatEntry(entry, it, layout); err != nil {
return fmt.Errorf("unable to format entry: %v", err)
}
if _, err := w.Write(k); err != nil {
return fmt.Errorf("error writing entry key: %v", err)
}
if _, err := w.Write(entry); err != nil {
return fmt.Errorf("error writing entry: %v", err)
}
return nil
}
func formatEntry(entry []byte, it *Info, layout *indexLayout) error {
entryTimestampAndFlags := entry[0:8]
entryPackFileOffset := entry[8:12]
entryPackedOffset := entry[12:16]
entryPackedLength := entry[16:20]
timestampAndFlags := uint64(it.TimestampSeconds) << 16
if len(it.PackFile) == 0 {
return fmt.Errorf("empty pack block ID for %v", it.BlockID)
}
binary.BigEndian.PutUint32(entryPackFileOffset, layout.extraDataOffset+layout.packFileOffsets[it.PackFile])
if it.Deleted {
binary.BigEndian.PutUint32(entryPackedOffset, it.PackOffset|0x80000000)
} else {
binary.BigEndian.PutUint32(entryPackedOffset, it.PackOffset)
}
binary.BigEndian.PutUint32(entryPackedLength, it.Length)
timestampAndFlags |= uint64(it.FormatVersion) << 8
timestampAndFlags |= uint64(len(it.PackFile))
binary.BigEndian.PutUint64(entryTimestampAndFlags, timestampAndFlags)
return nil
}

33
block/cache_hmac.go Normal file
View File

@@ -0,0 +1,33 @@
package block
import "crypto/hmac"
import "crypto/sha256"
import "errors"
func appendHMAC(data []byte, secret []byte) []byte {
h := hmac.New(sha256.New, secret)
h.Write(data) // nolint:errcheck
return h.Sum(data)
}
func verifyAndStripHMAC(b []byte, secret []byte) ([]byte, error) {
if len(b) < sha256.Size {
return nil, errors.New("invalid data - too short")
}
p := len(b) - sha256.Size
data := b[0:p]
signature := b[p:]
h := hmac.New(sha256.New, secret)
h.Write(data) // nolint:errcheck
validSignature := h.Sum(nil)
if len(signature) != len(validSignature) {
return nil, errors.New("invalid signature length")
}
if hmac.Equal(validSignature, signature) {
return data, nil
}
return nil, errors.New("invalid data - corrupted")
}

10
block/caching_options.go Normal file
View File

@@ -0,0 +1,10 @@
package block
// CachingOptions specifies configuration of local cache.
type CachingOptions struct {
CacheDirectory string `json:"cacheDirectory,omitempty"`
MaxCacheSizeBytes int64 `json:"maxCacheSize,omitempty"`
MaxListCacheDurationSec int `json:"maxListCacheDuration,omitempty"`
IgnoreListCache bool `json:"-"`
HMACSecret []byte `json:"-"`
}

View File

@@ -0,0 +1,138 @@
package block
import (
"fmt"
"path/filepath"
"sync"
"github.com/kopia/repo/storage"
)
type committedBlockIndex struct {
cache committedBlockIndexCache
mu sync.Mutex
inUse map[string]packIndex
merged mergedIndex
}
type committedBlockIndexCache interface {
hasIndexBlockID(indexBlockID string) (bool, error)
addBlockToCache(indexBlockID string, data []byte) error
openIndex(indexBlockID string) (packIndex, error)
expireUnused(used []string) error
}
func (b *committedBlockIndex) getBlock(blockID string) (Info, error) {
b.mu.Lock()
defer b.mu.Unlock()
info, err := b.merged.GetInfo(blockID)
if info != nil {
return *info, nil
}
if err == nil {
return Info{}, storage.ErrBlockNotFound
}
return Info{}, err
}
func (b *committedBlockIndex) addBlock(indexBlockID string, data []byte, use bool) error {
if err := b.cache.addBlockToCache(indexBlockID, data); err != nil {
return err
}
if !use {
return nil
}
b.mu.Lock()
defer b.mu.Unlock()
if b.inUse[indexBlockID] != nil {
return nil
}
ndx, err := b.cache.openIndex(indexBlockID)
if err != nil {
return fmt.Errorf("unable to open pack index %q: %v", indexBlockID, err)
}
b.inUse[indexBlockID] = ndx
b.merged = append(b.merged, ndx)
return nil
}
func (b *committedBlockIndex) listBlocks(prefix string, cb func(i Info) error) error {
b.mu.Lock()
m := append(mergedIndex(nil), b.merged...)
b.mu.Unlock()
return m.Iterate(prefix, cb)
}
func (b *committedBlockIndex) packFilesChanged(packFiles []string) bool {
if len(packFiles) != len(b.inUse) {
return true
}
for _, packFile := range packFiles {
if b.inUse[packFile] == nil {
return true
}
}
return false
}
func (b *committedBlockIndex) use(packFiles []string) (bool, error) {
b.mu.Lock()
defer b.mu.Unlock()
if !b.packFilesChanged(packFiles) {
return false, nil
}
log.Debugf("set of index files has changed (had %v, now %v)", len(b.inUse), len(packFiles))
var newMerged mergedIndex
newInUse := map[string]packIndex{}
defer func() {
newMerged.Close() //nolint:errcheck
}()
for _, e := range packFiles {
ndx, err := b.cache.openIndex(e)
if err != nil {
return false, fmt.Errorf("unable to open pack index %q: %v", e, err)
}
newMerged = append(newMerged, ndx)
newInUse[e] = ndx
}
b.merged = newMerged
b.inUse = newInUse
if err := b.cache.expireUnused(packFiles); err != nil {
log.Warningf("unable to expire unused block index files: %v", err)
}
newMerged = nil
return true, nil
}
func newCommittedBlockIndex(caching CachingOptions) (*committedBlockIndex, error) {
var cache committedBlockIndexCache
if caching.CacheDirectory != "" {
dirname := filepath.Join(caching.CacheDirectory, "indexes")
cache = &diskCommittedBlockIndexCache{dirname}
} else {
cache = &memoryCommittedBlockIndexCache{
blocks: map[string]packIndex{},
}
}
return &committedBlockIndex{
cache: cache,
inUse: map[string]packIndex{},
}, nil
}

View File

@@ -0,0 +1,134 @@
package block
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"time"
"golang.org/x/exp/mmap"
)
const (
simpleIndexSuffix = ".sndx"
unusedCommittedBlockIndexCleanupTime = 1 * time.Hour // delete unused committed index blocks after 1 hour
)
type diskCommittedBlockIndexCache struct {
dirname string
}
func (c *diskCommittedBlockIndexCache) indexBlockPath(indexBlockID string) string {
return filepath.Join(c.dirname, indexBlockID+simpleIndexSuffix)
}
func (c *diskCommittedBlockIndexCache) openIndex(indexBlockID string) (packIndex, error) {
fullpath := c.indexBlockPath(indexBlockID)
f, err := mmap.Open(fullpath)
if err != nil {
return nil, err
}
return openPackIndex(f)
}
func (c *diskCommittedBlockIndexCache) hasIndexBlockID(indexBlockID string) (bool, error) {
_, err := os.Stat(c.indexBlockPath(indexBlockID))
if err == nil {
return true, nil
}
if os.IsNotExist(err) {
return false, nil
}
return false, err
}
func (c *diskCommittedBlockIndexCache) addBlockToCache(indexBlockID string, data []byte) error {
exists, err := c.hasIndexBlockID(indexBlockID)
if err != nil {
return err
}
if exists {
return nil
}
tmpFile, err := writeTempFileAtomic(c.dirname, data)
if err != nil {
return err
}
// rename() is atomic, so one process will succeed, but the other will fail
if err := os.Rename(tmpFile, c.indexBlockPath(indexBlockID)); err != nil {
// verify that the block exists
exists, err := c.hasIndexBlockID(indexBlockID)
if err != nil {
return err
}
if !exists {
return fmt.Errorf("unsuccessful index write of block %q", indexBlockID)
}
}
return nil
}
func writeTempFileAtomic(dirname string, data []byte) (string, error) {
// write to a temp file to avoid race where two processes are writing at the same time.
tf, err := ioutil.TempFile(dirname, "tmp")
if err != nil {
if os.IsNotExist(err) {
os.MkdirAll(dirname, 0700) //nolint:errcheck
tf, err = ioutil.TempFile(dirname, "tmp")
}
}
if err != nil {
return "", fmt.Errorf("can't create tmp file: %v", err)
}
if _, err := tf.Write(data); err != nil {
return "", fmt.Errorf("can't write to temp file: %v", err)
}
if err := tf.Close(); err != nil {
return "", fmt.Errorf("can't close tmp file")
}
return tf.Name(), nil
}
func (c *diskCommittedBlockIndexCache) expireUnused(used []string) error {
entries, err := ioutil.ReadDir(c.dirname)
if err != nil {
return fmt.Errorf("can't list cache: %v", err)
}
remaining := map[string]os.FileInfo{}
for _, ent := range entries {
if strings.HasSuffix(ent.Name(), simpleIndexSuffix) {
n := strings.TrimSuffix(ent.Name(), simpleIndexSuffix)
remaining[n] = ent
}
}
for _, u := range used {
delete(remaining, u)
}
for _, rem := range remaining {
if time.Since(rem.ModTime()) > unusedCommittedBlockIndexCleanupTime {
log.Debugf("removing unused %v %v", rem.Name(), rem.ModTime())
if err := os.Remove(filepath.Join(c.dirname, rem.Name())); err != nil {
log.Warningf("unable to remove unused index file: %v", err)
}
} else {
log.Debugf("keeping unused %v because it's too new %v", rem.Name(), rem.ModTime())
}
}
return nil
}

View File

@@ -0,0 +1,48 @@
package block
import (
"bytes"
"fmt"
"sync"
)
type memoryCommittedBlockIndexCache struct {
mu sync.Mutex
blocks map[string]packIndex
}
func (m *memoryCommittedBlockIndexCache) hasIndexBlockID(indexBlockID string) (bool, error) {
m.mu.Lock()
defer m.mu.Unlock()
return m.blocks[indexBlockID] != nil, nil
}
func (m *memoryCommittedBlockIndexCache) addBlockToCache(indexBlockID string, data []byte) error {
m.mu.Lock()
defer m.mu.Unlock()
ndx, err := openPackIndex(bytes.NewReader(data))
if err != nil {
return err
}
m.blocks[indexBlockID] = ndx
return nil
}
func (m *memoryCommittedBlockIndexCache) openIndex(indexBlockID string) (packIndex, error) {
m.mu.Lock()
defer m.mu.Unlock()
v := m.blocks[indexBlockID]
if v == nil {
return nil, fmt.Errorf("block not found in cache: %v", indexBlockID)
}
return v, nil
}
func (m *memoryCommittedBlockIndexCache) expireUnused(used []string) error {
return nil
}

View File

@@ -0,0 +1,38 @@
package block
import (
"encoding/hex"
)
func bytesToContentID(b []byte) string {
if len(b) == 0 {
return ""
}
if b[0] == 0xff {
return string(b[1:])
}
prefix := ""
if b[0] != 0 {
prefix = string(b[0:1])
}
return prefix + hex.EncodeToString(b[1:])
}
func contentIDToBytes(c string) []byte {
var prefix []byte
var skip int
if len(c)%2 == 1 {
prefix = []byte(c[0:1])
skip = 1
} else {
prefix = []byte{0}
}
b, err := hex.DecodeString(c[skip:])
if err != nil {
return append([]byte{0xff}, []byte(c)...)
}
return append(prefix, b...)
}

34
block/context.go Normal file
View File

@@ -0,0 +1,34 @@
package block
import "context"
type contextKey string
var useBlockCacheContextKey contextKey = "use-block-cache"
var useListCacheContextKey contextKey = "use-list-cache"
// UsingBlockCache returns a derived context that causes block manager to use cache.
func UsingBlockCache(ctx context.Context, enabled bool) context.Context {
return context.WithValue(ctx, useBlockCacheContextKey, enabled)
}
// UsingListCache returns a derived context that causes block manager to use cache.
func UsingListCache(ctx context.Context, enabled bool) context.Context {
return context.WithValue(ctx, useListCacheContextKey, enabled)
}
func shouldUseBlockCache(ctx context.Context) bool {
if enabled, ok := ctx.Value(useBlockCacheContextKey).(bool); ok {
return enabled
}
return true
}
func shouldUseListCache(ctx context.Context) bool {
if enabled, ok := ctx.Value(useListCacheContextKey).(bool); ok {
return enabled
}
return true
}

74
block/format.go Normal file
View File

@@ -0,0 +1,74 @@
package block
import (
"encoding/binary"
"fmt"
)
// Format describes a format of a single pack index. The actual structure is not used,
// it's purely for documentation purposes.
// The struct is byte-aligned.
type Format struct {
Version byte // format version number must be 0x01
KeySize byte // size of each key in bytes
EntrySize uint16 // size of each entry in bytes, big-endian
EntryCount uint32 // number of sorted (key,value) entries that follow
Entries []struct {
Key []byte // key bytes (KeySize)
Entry entry
}
ExtraData []byte // extra data
}
type entry struct {
// big endian:
// 48 most significant bits - 48-bit timestamp in seconds since 1970/01/01 UTC
// 8 bits - format version (currently == 1)
// 8 least significant bits - length of pack block ID
timestampAndFlags uint64 //
packFileOffset uint32 // 4 bytes, big endian, offset within index file where pack block ID begins
packedOffset uint32 // 4 bytes, big endian, offset within pack file where the contents begin
packedLength uint32 // 4 bytes, big endian, content length
}
func (e *entry) parse(b []byte) error {
if len(b) < 20 {
return fmt.Errorf("invalid entry length: %v", len(b))
}
e.timestampAndFlags = binary.BigEndian.Uint64(b[0:8])
e.packFileOffset = binary.BigEndian.Uint32(b[8:12])
e.packedOffset = binary.BigEndian.Uint32(b[12:16])
e.packedLength = binary.BigEndian.Uint32(b[16:20])
return nil
}
func (e *entry) IsDeleted() bool {
return e.packedOffset&0x80000000 != 0
}
func (e *entry) TimestampSeconds() int64 {
return int64(e.timestampAndFlags >> 16)
}
func (e *entry) PackedFormatVersion() byte {
return byte(e.timestampAndFlags >> 8)
}
func (e *entry) PackFileLength() byte {
return byte(e.timestampAndFlags)
}
func (e *entry) PackFileOffset() uint32 {
return e.packFileOffset
}
func (e *entry) PackedOffset() uint32 {
return e.packedOffset & 0x7fffffff
}
func (e *entry) PackedLength() uint32 {
return e.packedLength
}

198
block/index.go Normal file
View File

@@ -0,0 +1,198 @@
package block
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"sort"
"strings"
"github.com/pkg/errors"
)
// packIndex is a read-only index of packed blocks.
type packIndex interface {
io.Closer
GetInfo(blockID string) (*Info, error)
Iterate(prefix string, cb func(Info) error) error
}
type index struct {
hdr headerInfo
readerAt io.ReaderAt
}
type headerInfo struct {
keySize int
valueSize int
entryCount int
}
func readHeader(readerAt io.ReaderAt) (headerInfo, error) {
var header [8]byte
if n, err := readerAt.ReadAt(header[:], 0); err != nil || n != 8 {
return headerInfo{}, errors.Wrap(err, "invalid header")
}
if header[0] != 1 {
return headerInfo{}, fmt.Errorf("invalid header format: %v", header[0])
}
hi := headerInfo{
keySize: int(header[1]),
valueSize: int(binary.BigEndian.Uint16(header[2:4])),
entryCount: int(binary.BigEndian.Uint32(header[4:8])),
}
if hi.keySize <= 1 || hi.valueSize < 0 || hi.entryCount < 0 {
return headerInfo{}, fmt.Errorf("invalid header")
}
return hi, nil
}
// Iterate invokes the provided callback function for all blocks in the index, sorted alphabetically.
// The iteration ends when the callback returns an error, which is propagated to the caller or when
// all blocks have been visited.
func (b *index) Iterate(prefix string, cb func(Info) error) error {
startPos, err := b.findEntryPosition(prefix)
if err != nil {
return errors.Wrap(err, "could not find starting position")
}
stride := b.hdr.keySize + b.hdr.valueSize
entry := make([]byte, stride)
for i := startPos; i < b.hdr.entryCount; i++ {
n, err := b.readerAt.ReadAt(entry, int64(8+stride*i))
if err != nil || n != len(entry) {
return errors.Wrap(err, "unable to read from index")
}
key := entry[0:b.hdr.keySize]
value := entry[b.hdr.keySize:]
i, err := b.entryToInfo(bytesToContentID(key), value)
if err != nil {
return errors.Wrap(err, "invalid index data")
}
if !strings.HasPrefix(i.BlockID, prefix) {
break
}
if err := cb(i); err != nil {
return err
}
}
return nil
}
func (b *index) findEntryPosition(blockID string) (int, error) {
stride := b.hdr.keySize + b.hdr.valueSize
entryBuf := make([]byte, stride)
var readErr error
pos := sort.Search(b.hdr.entryCount, func(p int) bool {
if readErr != nil {
return false
}
_, err := b.readerAt.ReadAt(entryBuf, int64(8+stride*p))
if err != nil {
readErr = err
return false
}
return bytesToContentID(entryBuf[0:b.hdr.keySize]) >= blockID
})
return pos, readErr
}
func (b *index) findEntry(blockID string) ([]byte, error) {
key := contentIDToBytes(blockID)
if len(key) != b.hdr.keySize {
return nil, fmt.Errorf("invalid block ID: %q", blockID)
}
stride := b.hdr.keySize + b.hdr.valueSize
position, err := b.findEntryPosition(blockID)
if err != nil {
return nil, err
}
if position >= b.hdr.entryCount {
return nil, nil
}
entryBuf := make([]byte, stride)
if _, err := b.readerAt.ReadAt(entryBuf, int64(8+stride*position)); err != nil {
return nil, err
}
if bytes.Equal(entryBuf[0:len(key)], key) {
return entryBuf[len(key):], nil
}
return nil, nil
}
// GetInfo returns information about a given block. If a block is not found, nil is returned.
func (b *index) GetInfo(blockID string) (*Info, error) {
e, err := b.findEntry(blockID)
if err != nil {
return nil, err
}
if e == nil {
return nil, nil
}
i, err := b.entryToInfo(blockID, e)
if err != nil {
return nil, err
}
return &i, err
}
func (b *index) entryToInfo(blockID string, entryData []byte) (Info, error) {
if len(entryData) < 20 {
return Info{}, fmt.Errorf("invalid entry length: %v", len(entryData))
}
var e entry
if err := e.parse(entryData); err != nil {
return Info{}, err
}
packFile := make([]byte, e.PackFileLength())
n, err := b.readerAt.ReadAt(packFile, int64(e.PackFileOffset()))
if err != nil || n != int(e.PackFileLength()) {
return Info{}, errors.Wrap(err, "can't read pack block ID")
}
return Info{
BlockID: blockID,
Deleted: e.IsDeleted(),
TimestampSeconds: e.TimestampSeconds(),
FormatVersion: e.PackedFormatVersion(),
PackOffset: e.PackedOffset(),
Length: e.PackedLength(),
PackFile: string(packFile),
}, nil
}
// Close closes the index and the underlying reader.
func (b *index) Close() error {
if closer, ok := b.readerAt.(io.Closer); ok {
return closer.Close()
}
return nil
}
// openPackIndex reads an Index from a given reader. The caller must call Close() when the index is no longer used.
func openPackIndex(readerAt io.ReaderAt) (packIndex, error) {
h, err := readHeader(readerAt)
if err != nil {
return nil, errors.Wrap(err, "invalid header")
}
return &index{hdr: h, readerAt: readerAt}, nil
}

22
block/info.go Normal file
View File

@@ -0,0 +1,22 @@
package block
import (
"time"
)
// Info is an information about a single block managed by Manager.
type Info struct {
BlockID string `json:"blockID"`
Length uint32 `json:"length"`
TimestampSeconds int64 `json:"time"`
PackFile string `json:"packFile,omitempty"`
PackOffset uint32 `json:"packOffset,omitempty"`
Deleted bool `json:"deleted"`
Payload []byte `json:"payload"` // set for payloads stored inline
FormatVersion byte `json:"formatVersion"`
}
// Timestamp returns the time when a block was created or deleted.
func (i Info) Timestamp() time.Time {
return time.Unix(i.TimestampSeconds, 0)
}

123
block/list_cache.go Normal file
View File

@@ -0,0 +1,123 @@
package block
import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"time"
"github.com/kopia/repo/storage"
)
type listCache struct {
st storage.Storage
cacheFile string
listCacheDuration time.Duration
hmacSecret []byte
}
func (c *listCache) listIndexBlocks(ctx context.Context) ([]IndexInfo, error) {
if c.cacheFile != "" {
ci, err := c.readBlocksFromCache(ctx)
if err == nil {
expirationTime := ci.Timestamp.Add(c.listCacheDuration)
if time.Now().Before(expirationTime) {
log.Debugf("retrieved list of index blocks from cache")
return ci.Blocks, nil
}
} else if err != storage.ErrBlockNotFound {
log.Warningf("unable to open cache file: %v", err)
}
}
blocks, err := listIndexBlocksFromStorage(ctx, c.st)
if err == nil {
c.saveListToCache(ctx, &cachedList{
Blocks: blocks,
Timestamp: time.Now(),
})
}
log.Debugf("found %v index blocks from source", len(blocks))
return blocks, err
}
func (c *listCache) saveListToCache(ctx context.Context, ci *cachedList) {
if c.cacheFile == "" {
return
}
log.Debugf("saving index blocks to cache: %v", len(ci.Blocks))
if data, err := json.Marshal(ci); err == nil {
mySuffix := fmt.Sprintf(".tmp-%v-%v", os.Getpid(), time.Now().UnixNano())
if err := ioutil.WriteFile(c.cacheFile+mySuffix, appendHMAC(data, c.hmacSecret), 0600); err != nil {
log.Warningf("unable to write list cache: %v", err)
}
os.Rename(c.cacheFile+mySuffix, c.cacheFile) //nolint:errcheck
os.Remove(c.cacheFile + mySuffix) //nolint:errcheck
}
}
func (c *listCache) deleteListCache(ctx context.Context) {
if c.cacheFile != "" {
os.Remove(c.cacheFile) //nolint:errcheck
}
}
func (c *listCache) readBlocksFromCache(ctx context.Context) (*cachedList, error) {
if !shouldUseListCache(ctx) {
return nil, storage.ErrBlockNotFound
}
ci := &cachedList{}
data, err := ioutil.ReadFile(c.cacheFile)
if err != nil {
if os.IsNotExist(err) {
return nil, storage.ErrBlockNotFound
}
return nil, err
}
data, err = verifyAndStripHMAC(data, c.hmacSecret)
if err != nil {
return nil, fmt.Errorf("invalid file %v: %v", c.cacheFile, err)
}
if err := json.Unmarshal(data, &ci); err != nil {
return nil, fmt.Errorf("can't unmarshal cached list results: %v", err)
}
return ci, nil
}
func newListCache(ctx context.Context, st storage.Storage, caching CachingOptions) (*listCache, error) {
var listCacheFile string
if caching.CacheDirectory != "" {
listCacheFile = filepath.Join(caching.CacheDirectory, "list")
if _, err := os.Stat(caching.CacheDirectory); os.IsNotExist(err) {
if err := os.MkdirAll(caching.CacheDirectory, 0700); err != nil {
return nil, err
}
}
}
c := &listCache{
st: st,
cacheFile: listCacheFile,
hmacSecret: caching.HMACSecret,
listCacheDuration: time.Duration(caching.MaxListCacheDurationSec) * time.Second,
}
if caching.IgnoreListCache {
c.deleteListCache(ctx)
}
return c, nil
}

132
block/merged.go Normal file
View File

@@ -0,0 +1,132 @@
package block
import (
"container/heap"
"errors"
)
// mergedIndex is an implementation of Index that transparently merges returns from underlying Indexes.
type mergedIndex []packIndex
// Close closes all underlying indexes.
func (m mergedIndex) Close() error {
for _, ndx := range m {
if err := ndx.Close(); err != nil {
return err
}
}
return nil
}
// GetInfo returns information about a single block. If a block is not found, returns (nil,nil)
func (m mergedIndex) GetInfo(contentID string) (*Info, error) {
var best *Info
for _, ndx := range m {
i, err := ndx.GetInfo(contentID)
if err != nil {
return nil, err
}
if i != nil {
if best == nil || i.TimestampSeconds > best.TimestampSeconds || (i.TimestampSeconds == best.TimestampSeconds && !i.Deleted) {
best = i
}
}
}
return best, nil
}
type nextInfo struct {
it Info
ch <-chan Info
}
type nextInfoHeap []*nextInfo
func (h nextInfoHeap) Len() int { return len(h) }
func (h nextInfoHeap) Less(i, j int) bool {
if a, b := h[i].it.BlockID, h[j].it.BlockID; a != b {
return a < b
}
if a, b := h[i].it.TimestampSeconds, h[j].it.TimestampSeconds; a != b {
return a < b
}
return !h[i].it.Deleted
}
func (h nextInfoHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h *nextInfoHeap) Push(x interface{}) {
*h = append(*h, x.(*nextInfo))
}
func (h *nextInfoHeap) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}
func iterateChan(prefix string, ndx packIndex, done chan bool) <-chan Info {
ch := make(chan Info)
go func() {
defer close(ch)
_ = ndx.Iterate(prefix, func(i Info) error {
select {
case <-done:
return errors.New("end of iteration")
case ch <- i:
return nil
}
})
}()
return ch
}
// Iterate invokes the provided callback for all unique block IDs in the underlying sources until either
// all blocks have been visited or until an error is returned by the callback.
func (m mergedIndex) Iterate(prefix string, cb func(i Info) error) error {
var minHeap nextInfoHeap
done := make(chan bool)
defer close(done)
for _, ndx := range m {
ch := iterateChan(prefix, ndx, done)
it, ok := <-ch
if ok {
heap.Push(&minHeap, &nextInfo{it, ch})
}
}
var pendingItem Info
for len(minHeap) > 0 {
min := heap.Pop(&minHeap).(*nextInfo)
if pendingItem.BlockID != min.it.BlockID {
if pendingItem.BlockID != "" {
if err := cb(pendingItem); err != nil {
return err
}
}
pendingItem = min.it
} else if min.it.TimestampSeconds > pendingItem.TimestampSeconds {
pendingItem = min.it
}
it, ok := <-min.ch
if ok {
heap.Push(&minHeap, &nextInfo{it, min.ch})
}
}
if pendingItem.BlockID != "" {
return cb(pendingItem)
}
return nil
}
var _ packIndex = (*mergedIndex)(nil)

93
block/merged_test.go Normal file
View File

@@ -0,0 +1,93 @@
package block
import (
"bytes"
"reflect"
"testing"
"github.com/pkg/errors"
)
func TestMerged(t *testing.T) {
i1, err := indexWithItems(
Info{BlockID: "aabbcc", TimestampSeconds: 1, PackFile: "xx", PackOffset: 11},
Info{BlockID: "ddeeff", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111},
Info{BlockID: "z010203", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111},
Info{BlockID: "de1e1e", TimestampSeconds: 4, PackFile: "xx", PackOffset: 111},
)
if err != nil {
t.Fatalf("can't create index: %v", err)
}
i2, err := indexWithItems(
Info{BlockID: "aabbcc", TimestampSeconds: 3, PackFile: "yy", PackOffset: 33},
Info{BlockID: "xaabbcc", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111},
Info{BlockID: "de1e1e", TimestampSeconds: 4, PackFile: "xx", PackOffset: 222, Deleted: true},
)
if err != nil {
t.Fatalf("can't create index: %v", err)
}
i3, err := indexWithItems(
Info{BlockID: "aabbcc", TimestampSeconds: 2, PackFile: "zz", PackOffset: 22},
Info{BlockID: "ddeeff", TimestampSeconds: 1, PackFile: "zz", PackOffset: 222},
Info{BlockID: "k010203", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111},
Info{BlockID: "k020304", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111},
)
if err != nil {
t.Fatalf("can't create index: %v", err)
}
m := mergedIndex{i1, i2, i3}
i, err := m.GetInfo("aabbcc")
if err != nil || i == nil {
t.Fatalf("unable to get info: %v", err)
}
if got, want := i.PackOffset, uint32(33); got != want {
t.Errorf("invalid pack offset %v, wanted %v", got, want)
}
var inOrder []string
assertNoError(t, m.Iterate("", func(i Info) error {
inOrder = append(inOrder, i.BlockID)
if i.BlockID == "de1e1e" {
if i.Deleted {
t.Errorf("iteration preferred deleted block over non-deleted")
}
}
return nil
}))
if i, err := m.GetInfo("de1e1e"); err != nil {
t.Errorf("error getting deleted block info: %v", err)
} else if i.Deleted {
t.Errorf("GetInfo preferred deleted block over non-deleted")
}
expectedInOrder := []string{
"aabbcc",
"ddeeff",
"de1e1e",
"k010203",
"k020304",
"xaabbcc",
"z010203",
}
if !reflect.DeepEqual(inOrder, expectedInOrder) {
t.Errorf("unexpected items in order: %v, wanted %v", inOrder, expectedInOrder)
}
if err := m.Close(); err != nil {
t.Errorf("unexpected error in Close(): %v", err)
}
}
func indexWithItems(items ...Info) (packIndex, error) {
b := make(packIndexBuilder)
for _, it := range items {
b.Add(it)
}
var buf bytes.Buffer
if err := b.Build(&buf); err != nil {
return nil, errors.Wrap(err, "build error")
}
return openPackIndex(bytes.NewReader(buf.Bytes()))
}

View File

@@ -0,0 +1,26 @@
package block
import "testing"
func TestRoundTrip(t *testing.T) {
cases := []string{
"",
"x",
"aa",
"xaa",
"xaaa",
"a1x",
}
for _, tc := range cases {
b := contentIDToBytes(tc)
got := bytesToContentID(b)
if got != tc {
t.Errorf("%q did not round trip, got %q, wanted %q", tc, got, tc)
}
}
if got, want := bytesToContentID(nil), ""; got != want {
t.Errorf("unexpected content id %v, want %v", got, want)
}
}

235
block/packindex_test.go Normal file
View File

@@ -0,0 +1,235 @@
package block
import (
"bytes"
"crypto/sha1"
"encoding/hex"
"fmt"
"math/rand"
"reflect"
"strings"
"testing"
)
func TestPackIndex(t *testing.T) {
blockNumber := 0
deterministicBlockID := func(prefix string, id int) string {
h := sha1.New()
fmt.Fprintf(h, "%v%v", prefix, id)
blockNumber++
prefix2 := ""
if id%2 == 0 {
prefix2 = "x"
}
if id%7 == 0 {
prefix2 = "y"
}
if id%5 == 0 {
prefix2 = "m"
}
return string(fmt.Sprintf("%v%x", prefix2, h.Sum(nil)))
}
deterministicPackFile := func(id int) string {
h := sha1.New()
fmt.Fprintf(h, "%v", id)
blockNumber++
return string(fmt.Sprintf("%x", h.Sum(nil)))
}
deterministicPackedOffset := func(id int) uint32 {
s := rand.NewSource(int64(id + 1))
rnd := rand.New(s)
return uint32(rnd.Int31())
}
deterministicPackedLength := func(id int) uint32 {
s := rand.NewSource(int64(id + 2))
rnd := rand.New(s)
return uint32(rnd.Int31())
}
deterministicFormatVersion := func(id int) byte {
return byte(id % 100)
}
randomUnixTime := func() int64 {
return int64(rand.Int31())
}
var infos []Info
// deleted blocks with all information
for i := 0; i < 100; i++ {
infos = append(infos, Info{
TimestampSeconds: randomUnixTime(),
Deleted: true,
BlockID: deterministicBlockID("deleted-packed", i),
PackFile: deterministicPackFile(i),
PackOffset: deterministicPackedOffset(i),
Length: deterministicPackedLength(i),
FormatVersion: deterministicFormatVersion(i),
})
}
// non-deleted block
for i := 0; i < 100; i++ {
infos = append(infos, Info{
TimestampSeconds: randomUnixTime(),
BlockID: deterministicBlockID("packed", i),
PackFile: deterministicPackFile(i),
PackOffset: deterministicPackedOffset(i),
Length: deterministicPackedLength(i),
FormatVersion: deterministicFormatVersion(i),
})
}
infoMap := map[string]Info{}
b1 := make(packIndexBuilder)
b2 := make(packIndexBuilder)
b3 := make(packIndexBuilder)
for _, info := range infos {
infoMap[info.BlockID] = info
b1.Add(info)
b2.Add(info)
b3.Add(info)
}
var buf1 bytes.Buffer
var buf2 bytes.Buffer
var buf3 bytes.Buffer
if err := b1.Build(&buf1); err != nil {
t.Errorf("unable to build: %v", err)
}
if err := b1.Build(&buf2); err != nil {
t.Errorf("unable to build: %v", err)
}
if err := b1.Build(&buf3); err != nil {
t.Errorf("unable to build: %v", err)
}
data1 := buf1.Bytes()
data2 := buf2.Bytes()
data3 := buf3.Bytes()
if !reflect.DeepEqual(data1, data2) {
t.Errorf("builder output not stable: %x vs %x", hex.Dump(data1), hex.Dump(data2))
}
if !reflect.DeepEqual(data2, data3) {
t.Errorf("builder output not stable: %x vs %x", hex.Dump(data2), hex.Dump(data3))
}
t.Run("FuzzTest", func(t *testing.T) {
fuzzTestIndexOpen(t, data1)
})
ndx, err := openPackIndex(bytes.NewReader(data1))
if err != nil {
t.Fatalf("can't open index: %v", err)
}
defer ndx.Close()
for _, info := range infos {
info2, err := ndx.GetInfo(info.BlockID)
if err != nil {
t.Errorf("unable to find %v", info.BlockID)
continue
}
if !reflect.DeepEqual(info, *info2) {
t.Errorf("invalid value retrieved: %+v, wanted %+v", info2, info)
}
}
cnt := 0
assertNoError(t, ndx.Iterate("", func(info2 Info) error {
info := infoMap[info2.BlockID]
if !reflect.DeepEqual(info, info2) {
t.Errorf("invalid value retrieved: %+v, wanted %+v", info2, info)
}
cnt++
return nil
}))
if cnt != len(infoMap) {
t.Errorf("invalid number of iterations: %v, wanted %v", cnt, len(infoMap))
}
prefixes := []string{"a", "b", "f", "0", "3", "aa", "aaa", "aab", "fff", "m", "x", "y", "m0", "ma"}
for i := 0; i < 100; i++ {
blockID := deterministicBlockID("no-such-block", i)
v, err := ndx.GetInfo(blockID)
if err != nil {
t.Errorf("unable to get block %v: %v", blockID, err)
}
if v != nil {
t.Errorf("unexpected result when getting block %v: %v", blockID, v)
}
}
for _, prefix := range prefixes {
cnt2 := 0
assertNoError(t, ndx.Iterate(string(prefix), func(info2 Info) error {
cnt2++
if !strings.HasPrefix(string(info2.BlockID), string(prefix)) {
t.Errorf("unexpected item %v when iterating prefix %v", info2.BlockID, prefix)
}
return nil
}))
t.Logf("found %v elements with prefix %q", cnt2, prefix)
}
}
func fuzzTestIndexOpen(t *testing.T, originalData []byte) {
// use consistent random
rnd := rand.New(rand.NewSource(12345))
fuzzTest(rnd, originalData, 50000, func(d []byte) {
ndx, err := openPackIndex(bytes.NewReader(d))
if err != nil {
return
}
defer ndx.Close()
cnt := 0
_ = ndx.Iterate("", func(cb Info) error {
if cnt < 10 {
_, _ = ndx.GetInfo(cb.BlockID)
}
cnt++
return nil
})
})
}
func fuzzTest(rnd *rand.Rand, originalData []byte, rounds int, callback func(d []byte)) {
for round := 0; round < rounds; round++ {
data := append([]byte(nil), originalData...)
// mutate small number of bytes
bytesToMutate := rnd.Intn(3)
for i := 0; i < bytesToMutate; i++ {
pos := rnd.Intn(len(data))
data[pos] = byte(rnd.Int())
}
sectionsToInsert := rnd.Intn(3)
for i := 0; i < sectionsToInsert; i++ {
pos := rnd.Intn(len(data))
insertedLength := rnd.Intn(20)
insertedData := make([]byte, insertedLength)
rnd.Read(insertedData)
data = append(append(append([]byte(nil), data[0:pos]...), insertedData...), data[pos:]...)
}
sectionsToDelete := rnd.Intn(3)
for i := 0; i < sectionsToDelete; i++ {
pos := rnd.Intn(len(data))
deletedLength := rnd.Intn(10)
if pos+deletedLength > len(data) {
continue
}
data = append(append([]byte(nil), data[0:pos]...), data[pos+deletedLength:]...)
}
callback(data)
}
}

25
block/stats.go Normal file
View File

@@ -0,0 +1,25 @@
package block
// Stats exposes statistics about block operation.
type Stats struct {
// Keep int64 fields first to ensure they get aligned to at least 64-bit boundaries
// which is required for atomic access on ARM and x86-32.
ReadBytes int64 `json:"readBytes,omitempty"`
WrittenBytes int64 `json:"writtenBytes,omitempty"`
DecryptedBytes int64 `json:"decryptedBytes,omitempty"`
EncryptedBytes int64 `json:"encryptedBytes,omitempty"`
HashedBytes int64 `json:"hashedBytes,omitempty"`
ReadBlocks int32 `json:"readBlocks,omitempty"`
WrittenBlocks int32 `json:"writtenBlocks,omitempty"`
CheckedBlocks int32 `json:"checkedBlocks,omitempty"`
HashedBlocks int32 `json:"hashedBlocks,omitempty"`
InvalidBlocks int32 `json:"invalidBlocks,omitempty"`
PresentBlocks int32 `json:"presentBlocks,omitempty"`
ValidBlocks int32 `json:"validBlocks,omitempty"`
}
// Reset clears all repository statistics.
func (s *Stats) Reset() {
*s = Stats{}
}

111
connect.go Normal file
View File

@@ -0,0 +1,111 @@
package repo
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"io/ioutil"
"os"
"path/filepath"
"github.com/kopia/repo/block"
"github.com/kopia/repo/storage"
"github.com/pkg/errors"
)
// ConnectOptions specifies options when persisting configuration to connect to a repository.
type ConnectOptions struct {
block.CachingOptions
}
// Connect connects to the repository in the specified storage and persists the configuration and credentials in the file provided.
func Connect(ctx context.Context, configFile string, st storage.Storage, password string, opt ConnectOptions) error {
formatBytes, err := st.GetBlock(ctx, FormatBlockID, 0, -1)
if err != nil {
return errors.Wrap(err, "unable to read format block")
}
f, err := parseFormatBlock(formatBytes)
if err != nil {
return err
}
var lc LocalConfig
lc.Storage = st.ConnectionInfo()
if err = setupCaching(configFile, &lc, opt.CachingOptions, f.UniqueID); err != nil {
return errors.Wrap(err, "unable to set up caching")
}
d, err := json.MarshalIndent(&lc, "", " ")
if err != nil {
return err
}
if err = os.MkdirAll(filepath.Dir(configFile), 0700); err != nil {
return errors.Wrap(err, "unable to create config directory")
}
if err = ioutil.WriteFile(configFile, d, 0600); err != nil {
return errors.Wrap(err, "unable to write config file")
}
// now verify that the repository can be opened with the provided config file.
r, err := Open(ctx, configFile, password, nil)
if err != nil {
return err
}
return r.Close(ctx)
}
func setupCaching(configPath string, lc *LocalConfig, opt block.CachingOptions, uniqueID []byte) error {
if opt.MaxCacheSizeBytes == 0 {
lc.Caching = block.CachingOptions{}
return nil
}
if opt.CacheDirectory == "" {
cacheDir, err := os.UserCacheDir()
if err != nil {
return errors.Wrap(err, "unable to determine cache directory")
}
h := sha256.New()
h.Write(uniqueID) //nolint:errcheck
h.Write([]byte(configPath)) //nolint:errcheck
lc.Caching.CacheDirectory = filepath.Join(cacheDir, "kopia", hex.EncodeToString(h.Sum(nil))[0:16])
} else {
absCacheDir, err := filepath.Abs(opt.CacheDirectory)
if err != nil {
return err
}
lc.Caching.CacheDirectory = absCacheDir
}
lc.Caching.MaxCacheSizeBytes = opt.MaxCacheSizeBytes
lc.Caching.MaxListCacheDurationSec = opt.MaxListCacheDurationSec
log.Debugf("Creating cache directory '%v' with max size %v", lc.Caching.CacheDirectory, lc.Caching.MaxCacheSizeBytes)
if err := os.MkdirAll(lc.Caching.CacheDirectory, 0700); err != nil {
log.Warningf("unablet to create cache directory: %v", err)
}
return nil
}
// Disconnect removes the specified configuration file and any local cache directories.
func Disconnect(configFile string) error {
cfg, err := loadConfigFromFile(configFile)
if err != nil {
return err
}
if cfg.Caching.CacheDirectory != "" {
if err = os.RemoveAll(cfg.Caching.CacheDirectory); err != nil {
log.Warningf("unable to to remove cache directory: %v", err)
}
}
return os.Remove(configFile)
}

33
crypto_key_derivation.go Normal file
View File

@@ -0,0 +1,33 @@
package repo
import (
"crypto/sha256"
"fmt"
"io"
"golang.org/x/crypto/hkdf"
"golang.org/x/crypto/scrypt"
)
// defaultKeyDerivationAlgorithm is the key derivation algorithm for new configurations.
const defaultKeyDerivationAlgorithm = "scrypt-65536-8-1"
func (f formatBlock) deriveMasterKeyFromPassword(password string) ([]byte, error) {
const masterKeySize = 32
switch f.KeyDerivationAlgorithm {
case "scrypt-65536-8-1":
return scrypt.Key([]byte(password), f.UniqueID, 65536, 8, 1, masterKeySize)
default:
return nil, fmt.Errorf("unsupported key algorithm: %v", f.KeyDerivationAlgorithm)
}
}
// deriveKeyFromMasterKey computes a key for a specific purpose and length using HKDF based on the master key.
func deriveKeyFromMasterKey(masterKey, uniqueID, purpose []byte, length int) []byte {
key := make([]byte, length)
k := hkdf.New(sha256.New, masterKey, uniqueID, purpose)
io.ReadFull(k, key) //nolint:errcheck
return key
}

2
doc.go Normal file
View File

@@ -0,0 +1,2 @@
// Package repo implements content-addressable Repository on top of BLOB storage.
package repo

View File

@@ -0,0 +1,40 @@
//+build !test
// Command repository_api demonstrates the use of Kopia's Repository API.
package main
import (
"context"
"log"
"os"
"github.com/kopia/repo"
)
func main() {
ctx := context.Background()
if err := setupRepositoryAndConnect(ctx, masterPassword); err != nil {
log.Printf("unable to set up repository: %v", err)
os.Exit(1)
}
r, err := repo.Open(ctx, configFile, masterPassword, nil)
if err != nil {
log.Printf("unable to open repository: %v", err)
os.Exit(1)
}
defer r.Close(ctx) //nolint:errcheck
uploadAndDownloadObjects(ctx, r)
// Now list blocks found in the repository.
blks, err := r.Blocks.ListBlocks("")
if err != nil {
log.Printf("err: %v", err)
}
for _, b := range blks {
log.Printf("found block %v", b)
}
}

View File

@@ -0,0 +1,56 @@
//+build !test
package main
import (
"context"
"fmt"
"os"
"github.com/kopia/repo"
"github.com/kopia/repo/block"
"github.com/kopia/repo/storage/filesystem"
"github.com/kopia/repo/storage/logging"
)
const (
masterPassword = "my-password$!@#!@"
storageDir = "/tmp/kopia-example/storage"
configFile = "/tmp/kopia-example/config"
cacheDirectory = "/tmp/kopia-example/cache"
)
func setupRepositoryAndConnect(ctx context.Context, password string) error {
if err := os.MkdirAll(storageDir, 0700); err != nil {
return fmt.Errorf("unable to create directory: %v", err)
}
st, err := filesystem.New(ctx, &filesystem.Options{
Path: storageDir,
})
if err != nil {
return fmt.Errorf("unable to connect to storage: %v", err)
}
// set up logging so we can see what's going on
st = logging.NewWrapper(st)
// see if we already have the config file, if not connect.
if _, err := os.Stat(configFile); os.IsNotExist(err) {
// initialize repository
if err := repo.Initialize(ctx, st, &repo.NewRepositoryOptions{}, password); err != nil {
return fmt.Errorf("unable to initialize repository: %v", err)
}
// now establish connection to repository and create configuration file.
if err := repo.Connect(ctx, configFile, st, password, repo.ConnectOptions{
CachingOptions: block.CachingOptions{
CacheDirectory: cacheDirectory,
MaxCacheSizeBytes: 100000000,
},
}); err != nil {
return fmt.Errorf("unable to connect to repository: %v", err)
}
}
return nil
}

View File

@@ -0,0 +1,67 @@
//+build !test
package main
import (
"context"
"crypto/rand"
"io/ioutil"
"log"
"os"
"github.com/kopia/repo"
"github.com/kopia/repo/object"
)
func uploadRandomObject(ctx context.Context, r *repo.Repository, length int) (object.ID, error) {
w := r.Objects.NewWriter(ctx, object.WriterOptions{})
defer w.Close() //nolint:errcheck
buf := make([]byte, 256*1024)
for length > 0 {
todo := length
if todo > len(buf) {
todo = len(buf)
}
rand.Read(buf[0:todo]) //nolint:errcheck
if _, err := w.Write(buf[0:todo]); err != nil {
return "", err
}
length -= todo
}
return w.Result()
}
func downloadObject(ctx context.Context, r *repo.Repository, oid object.ID) ([]byte, error) {
rd, err := r.Objects.Open(ctx, oid)
if err != nil {
return nil, err
}
defer rd.Close() //nolint:errcheck
return ioutil.ReadAll(rd)
}
func uploadAndDownloadObjects(ctx context.Context, r *repo.Repository) {
var oids []object.ID
for size := 100; size < 100000000; size *= 2 {
log.Printf("uploading file with %v bytes", size)
oid, err := uploadRandomObject(ctx, r, size)
if err != nil {
log.Printf("unable to upload: %v", err)
os.Exit(1)
}
log.Printf("uploaded %v bytes as %v", size, oid)
oids = append(oids, oid)
}
for _, oid := range oids {
log.Printf("downloading %q", oid)
b, err := downloadObject(ctx, r, oid)
if err != nil {
log.Printf("unable to read object: %v", err)
}
log.Printf("downloaded %v", len(b))
}
}

263
format_block.go Normal file
View File

@@ -0,0 +1,263 @@
package repo
import (
"bytes"
"context"
"crypto/aes"
"crypto/cipher"
"crypto/hmac"
"crypto/rand"
"crypto/sha256"
"encoding/json"
"fmt"
"io"
"github.com/kopia/repo/storage"
"github.com/pkg/errors"
)
const defaultFormatEncryption = "AES256_GCM"
const (
maxChecksummedFormatBytesLength = 65000
formatBlockChecksumSize = sha256.Size
)
// formatBlockChecksumSecret is a HMAC secret used for checksumming the format block.
// It's not really a secret, but will provide positive identification of blocks that
// are repository format blocks.
var formatBlockChecksumSecret = []byte("kopia-repository")
// FormatBlockID is the identifier of a storage block that describes repository format.
const FormatBlockID = "kopia.repository"
var (
purposeAESKey = []byte("AES")
purposeAuthData = []byte("CHECKSUM")
errFormatBlockNotFound = errors.New("format block not found")
)
type formatBlock struct {
Tool string `json:"tool"`
BuildVersion string `json:"buildVersion"`
BuildInfo string `json:"buildInfo"`
UniqueID []byte `json:"uniqueID"`
KeyDerivationAlgorithm string `json:"keyAlgo"`
Version string `json:"version"`
EncryptionAlgorithm string `json:"encryption"`
EncryptedFormatBytes []byte `json:"encryptedBlockFormat,omitempty"`
UnencryptedFormat *repositoryObjectFormat `json:"blockFormat,omitempty"`
}
// encryptedRepositoryConfig contains the configuration of repository that's persisted in encrypted format.
type encryptedRepositoryConfig struct {
Format repositoryObjectFormat `json:"format"`
}
func parseFormatBlock(b []byte) (*formatBlock, error) {
f := &formatBlock{}
if err := json.Unmarshal(b, &f); err != nil {
return nil, errors.Wrap(err, "invalid format block")
}
return f, nil
}
// RecoverFormatBlock attempts to recover format block replica from the specified file.
// The format block can be either the prefix or a suffix of the given file.
// optionally the length can be provided (if known) to speed up recovery.
func RecoverFormatBlock(ctx context.Context, st storage.Storage, filename string, optionalLength int64) ([]byte, error) {
if optionalLength > 0 {
return recoverFormatBlockWithLength(ctx, st, filename, optionalLength)
}
var foundMetadata storage.BlockMetadata
if err := st.ListBlocks(ctx, filename, func(bm storage.BlockMetadata) error {
if foundMetadata.BlockID != "" {
return fmt.Errorf("found multiple blocks with a given prefix: %v", filename)
}
foundMetadata = bm
return nil
}); err != nil {
return nil, errors.Wrap(err, "error")
}
if foundMetadata.BlockID == "" {
return nil, storage.ErrBlockNotFound
}
return recoverFormatBlockWithLength(ctx, st, foundMetadata.BlockID, foundMetadata.Length)
}
func recoverFormatBlockWithLength(ctx context.Context, st storage.Storage, filename string, length int64) ([]byte, error) {
chunkLength := int64(65536)
if chunkLength > length {
chunkLength = length
}
if chunkLength > 4 {
// try prefix
prefixChunk, err := st.GetBlock(ctx, filename, 0, chunkLength)
if err != nil {
return nil, err
}
if l := int(prefixChunk[0]) + int(prefixChunk[1])<<8; l <= maxChecksummedFormatBytesLength && l+2 < len(prefixChunk) {
if b, ok := verifyFormatBlockChecksum(prefixChunk[2 : 2+l]); ok {
return b, nil
}
}
// try the suffix
suffixChunk, err := st.GetBlock(ctx, filename, length-chunkLength, chunkLength)
if err != nil {
return nil, err
}
if l := int(suffixChunk[len(suffixChunk)-2]) + int(suffixChunk[len(suffixChunk)-1])<<8; l <= maxChecksummedFormatBytesLength && l+2 < len(suffixChunk) {
if b, ok := verifyFormatBlockChecksum(suffixChunk[len(suffixChunk)-2-l : len(suffixChunk)-2]); ok {
return b, nil
}
}
}
return nil, errFormatBlockNotFound
}
func verifyFormatBlockChecksum(b []byte) ([]byte, bool) {
if len(b) < formatBlockChecksumSize {
return nil, false
}
data, checksum := b[0:len(b)-formatBlockChecksumSize], b[len(b)-formatBlockChecksumSize:]
h := hmac.New(sha256.New, formatBlockChecksumSecret)
h.Write(data) //nolint:errcheck
actualChecksum := h.Sum(nil)
if !hmac.Equal(actualChecksum, checksum) {
return nil, false
}
return data, true
}
func writeFormatBlock(ctx context.Context, st storage.Storage, f *formatBlock) error {
var buf bytes.Buffer
e := json.NewEncoder(&buf)
e.SetIndent("", " ")
if err := e.Encode(f); err != nil {
return errors.Wrap(err, "unable to marshal format block")
}
if err := st.PutBlock(ctx, FormatBlockID, buf.Bytes()); err != nil {
return errors.Wrap(err, "unable to write format block")
}
return nil
}
func (f *formatBlock) decryptFormatBytes(masterKey []byte) (*repositoryObjectFormat, error) {
switch f.EncryptionAlgorithm {
case "NONE": // do nothing
return f.UnencryptedFormat, nil
case "AES256_GCM":
aead, authData, err := initCrypto(masterKey, f.UniqueID)
if err != nil {
return nil, errors.Wrap(err, "cannot initialize cipher")
}
content := append([]byte(nil), f.EncryptedFormatBytes...)
if len(content) < aead.NonceSize() {
return nil, fmt.Errorf("invalid encrypted payload, too short")
}
nonce := content[0:aead.NonceSize()]
payload := content[aead.NonceSize():]
plainText, err := aead.Open(payload[:0], nonce, payload, authData)
if err != nil {
return nil, fmt.Errorf("unable to decrypt repository format, invalid credentials?")
}
var erc encryptedRepositoryConfig
if err := json.Unmarshal(plainText, &erc); err != nil {
return nil, errors.Wrap(err, "invalid repository format")
}
return &erc.Format, nil
default:
return nil, fmt.Errorf("unknown encryption algorithm: '%v'", f.EncryptionAlgorithm)
}
}
func initCrypto(masterKey, repositoryID []byte) (cipher.AEAD, []byte, error) {
aesKey := deriveKeyFromMasterKey(masterKey, repositoryID, purposeAESKey, 32)
authData := deriveKeyFromMasterKey(masterKey, repositoryID, purposeAuthData, 32)
blk, err := aes.NewCipher(aesKey)
if err != nil {
return nil, nil, errors.Wrap(err, "cannot create cipher")
}
aead, err := cipher.NewGCM(blk)
if err != nil {
return nil, nil, errors.Wrap(err, "cannot create cipher")
}
return aead, authData, nil
}
func encryptFormatBytes(f *formatBlock, format *repositoryObjectFormat, masterKey, repositoryID []byte) error {
switch f.EncryptionAlgorithm {
case "NONE":
f.UnencryptedFormat = format
return nil
case "AES256_GCM":
content, err := json.Marshal(&encryptedRepositoryConfig{Format: *format})
if err != nil {
return errors.Wrap(err, "can't marshal format to JSON")
}
aead, authData, err := initCrypto(masterKey, repositoryID)
if err != nil {
return errors.Wrap(err, "unable to initialize crypto")
}
nonceLength := aead.NonceSize()
noncePlusContentLength := nonceLength + len(content)
cipherText := make([]byte, noncePlusContentLength+aead.Overhead())
// Store nonce at the beginning of ciphertext.
nonce := cipherText[0:nonceLength]
if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
return err
}
b := aead.Seal(cipherText[nonceLength:nonceLength], nonce, content, authData)
content = nonce[0 : nonceLength+len(b)]
f.EncryptedFormatBytes = content
return nil
default:
return fmt.Errorf("unknown encryption algorithm: '%v'", f.EncryptionAlgorithm)
}
}
func addFormatBlockChecksumAndLength(fb []byte) ([]byte, error) {
h := hmac.New(sha256.New, formatBlockChecksumSecret)
h.Write(fb) //nolint:errcheck
checksummedFormatBytes := h.Sum(fb)
l := len(checksummedFormatBytes)
if l > maxChecksummedFormatBytesLength {
return nil, fmt.Errorf("format block too big: %v", l)
}
// return <length><checksummed-bytes><length>
result := append([]byte(nil), byte(l), byte(l>>8))
result = append(result, checksummedFormatBytes...)
result = append(result, byte(l), byte(l>>8))
return result, nil
}

79
format_block_test.go Normal file
View File

@@ -0,0 +1,79 @@
package repo
import (
"context"
"crypto/sha256"
"reflect"
"testing"
"github.com/kopia/repo/internal/storagetesting"
"github.com/kopia/repo/storage"
)
func TestFormatBlockRecovery(t *testing.T) {
data := map[string][]byte{}
st := storagetesting.NewMapStorage(data, nil, nil)
ctx := context.Background()
someDataBlock := []byte("aadsdasdas")
checksummed, err := addFormatBlockChecksumAndLength(someDataBlock)
if err != nil {
t.Errorf("error appending checksum: %v", err)
}
if got, want := len(checksummed), 2+2+sha256.Size+len(someDataBlock); got != want {
t.Errorf("unexpected checksummed length: %v, want %v", got, want)
}
assertNoError(t, st.PutBlock(ctx, "some-block-by-itself", checksummed))
assertNoError(t, st.PutBlock(ctx, "some-block-suffix", append(append([]byte(nil), 1, 2, 3), checksummed...)))
assertNoError(t, st.PutBlock(ctx, "some-block-prefix", append(append([]byte(nil), checksummed...), 1, 2, 3)))
// mess up checksum
checksummed[len(checksummed)-3] ^= 1
assertNoError(t, st.PutBlock(ctx, "bad-checksum", checksummed))
assertNoError(t, st.PutBlock(ctx, "zero-len", []byte{}))
assertNoError(t, st.PutBlock(ctx, "one-len", []byte{1}))
assertNoError(t, st.PutBlock(ctx, "two-len", []byte{1, 2}))
assertNoError(t, st.PutBlock(ctx, "three-len", []byte{1, 2, 3}))
assertNoError(t, st.PutBlock(ctx, "four-len", []byte{1, 2, 3, 4}))
assertNoError(t, st.PutBlock(ctx, "five-len", []byte{1, 2, 3, 4, 5}))
cases := []struct {
block string
err error
}{
{"some-block-by-itself", nil},
{"some-block-suffix", nil},
{"some-block-prefix", nil},
{"bad-checksum", errFormatBlockNotFound},
{"no-such-block", storage.ErrBlockNotFound},
{"zero-len", errFormatBlockNotFound},
{"one-len", errFormatBlockNotFound},
{"two-len", errFormatBlockNotFound},
{"three-len", errFormatBlockNotFound},
{"four-len", errFormatBlockNotFound},
{"five-len", errFormatBlockNotFound},
}
for _, tc := range cases {
t.Run(tc.block, func(t *testing.T) {
v, err := RecoverFormatBlock(ctx, st, tc.block, -1)
if tc.err == nil {
if !reflect.DeepEqual(v, someDataBlock) || err != nil {
t.Errorf("unexpected result or error: v=%v err=%v, expected success", v, err)
}
} else {
if v != nil || err != tc.err {
t.Errorf("unexpected result or error: v=%v err=%v, expected %v", v, err, tc.err)
}
}
})
}
}
func assertNoError(t *testing.T, err error) {
t.Helper()
if err != nil {
t.Errorf("err: %v", err)
}
}

132
initialize.go Normal file
View File

@@ -0,0 +1,132 @@
package repo
import (
"context"
"crypto/rand"
"fmt"
"io"
"github.com/kopia/repo/block"
"github.com/kopia/repo/object"
"github.com/kopia/repo/storage"
"github.com/pkg/errors"
)
// BuildInfo is the build information of Kopia.
var (
BuildInfo = "unknown"
BuildVersion = "v0-unofficial"
)
// NewRepositoryOptions specifies options that apply to newly created repositories.
// All fields are optional, when not provided, reasonable defaults will be used.
type NewRepositoryOptions struct {
UniqueID []byte // force the use of particular unique ID
BlockFormat block.FormattingOptions
DisableHMAC bool
ObjectFormat object.Format // object format
}
// Initialize creates initial repository data structures in the specified storage with given credentials.
func Initialize(ctx context.Context, st storage.Storage, opt *NewRepositoryOptions, password string) error {
if opt == nil {
opt = &NewRepositoryOptions{}
}
// get the block - expect ErrBlockNotFound
_, err := st.GetBlock(ctx, FormatBlockID, 0, -1)
if err == nil {
return fmt.Errorf("repository already initialized")
}
if err != storage.ErrBlockNotFound {
return err
}
format := formatBlockFromOptions(opt)
masterKey, err := format.deriveMasterKeyFromPassword(password)
if err != nil {
return errors.Wrap(err, "unable to derive master key")
}
if err := encryptFormatBytes(format, repositoryObjectFormatFromOptions(opt), masterKey, format.UniqueID); err != nil {
return errors.Wrap(err, "unable to encrypt format bytes")
}
if err := writeFormatBlock(ctx, st, format); err != nil {
return errors.Wrap(err, "unable to write format block")
}
return nil
}
func formatBlockFromOptions(opt *NewRepositoryOptions) *formatBlock {
f := &formatBlock{
Tool: "https://github.com/kopia/kopia",
BuildInfo: BuildInfo,
KeyDerivationAlgorithm: defaultKeyDerivationAlgorithm,
UniqueID: applyDefaultRandomBytes(opt.UniqueID, 32),
Version: "1",
EncryptionAlgorithm: defaultFormatEncryption,
}
if opt.BlockFormat.Encryption == "NONE" {
f.EncryptionAlgorithm = "NONE"
}
return f
}
func repositoryObjectFormatFromOptions(opt *NewRepositoryOptions) *repositoryObjectFormat {
f := &repositoryObjectFormat{
FormattingOptions: block.FormattingOptions{
Version: 1,
Hash: applyDefaultString(opt.BlockFormat.Hash, block.DefaultHash),
Encryption: applyDefaultString(opt.BlockFormat.Encryption, block.DefaultEncryption),
HMACSecret: applyDefaultRandomBytes(opt.BlockFormat.HMACSecret, 32),
MasterKey: applyDefaultRandomBytes(opt.BlockFormat.MasterKey, 32),
MaxPackSize: applyDefaultInt(opt.BlockFormat.MaxPackSize, applyDefaultInt(opt.ObjectFormat.MaxBlockSize, 20<<20)), // 20 MB
},
Format: object.Format{
Splitter: applyDefaultString(opt.ObjectFormat.Splitter, object.DefaultSplitter),
MaxBlockSize: applyDefaultInt(opt.ObjectFormat.MaxBlockSize, 20<<20), // 20MiB
MinBlockSize: applyDefaultInt(opt.ObjectFormat.MinBlockSize, 10<<20), // 10MiB
AvgBlockSize: applyDefaultInt(opt.ObjectFormat.AvgBlockSize, 16<<20), // 16MiB
},
}
if opt.DisableHMAC {
f.HMACSecret = nil
}
return f
}
func randomBytes(n int) []byte {
b := make([]byte, n)
io.ReadFull(rand.Reader, b) //nolint:errcheck
return b
}
func applyDefaultInt(v, def int) int {
if v == 0 {
return def
}
return v
}
func applyDefaultString(v, def string) string {
if v == "" {
return def
}
return v
}
func applyDefaultRandomBytes(b []byte, n int) []byte {
if b == nil {
return randomBytes(n)
}
return b
}

View File

@@ -0,0 +1,9 @@
// Package repologging provides loggers.
package repologging
import "github.com/op/go-logging"
// Logger returns an instance of a logger used throughout repository codebase.
func Logger(module string) *logging.Logger {
return logging.MustGetLogger(module)
}

View File

@@ -23,6 +23,7 @@ type Environment struct {
configDir string
storageDir string
connected bool
}
// Setup sets up a test environment.
@@ -75,6 +76,8 @@ func (e *Environment) Setup(t *testing.T, opts ...func(*repo.NewRepositoryOption
t.Fatalf("can't connect: %v", err)
}
e.connected = true
e.Repository, err = repo.Open(ctx, e.configFile(), masterPassword, &repo.Options{})
if err != nil {
t.Fatalf("can't open: %v", err)
@@ -88,8 +91,13 @@ func (e *Environment) Close(t *testing.T) {
if err := e.Repository.Close(context.Background()); err != nil {
t.Fatalf("unable to close: %v", err)
}
if err := os.RemoveAll(e.configDir); err != nil {
if e.connected {
if err := repo.Disconnect(e.configFile()); err != nil {
t.Errorf("error disconnecting: %v", err)
}
}
if err := os.Remove(e.configDir); err != nil {
// should be empty, assuming Disconnect was successful
t.Errorf("error removing config directory: %v", err)
}
if err := os.RemoveAll(e.storageDir); err != nil {

44
internal/retry/retry.go Normal file
View File

@@ -0,0 +1,44 @@
// Package retry implements exponential retry policy.
package retry
import (
"fmt"
"time"
"github.com/kopia/repo/internal/repologging"
)
var log = repologging.Logger("repo/retry")
var (
maxAttempts = 10
retryInitialSleepAmount = 1 * time.Second
retryMaxSleepAmount = 32 * time.Second
)
// AttemptFunc performs an attempt and returns a value (optional, may be nil) and an error.
type AttemptFunc func() (interface{}, error)
// IsRetriableFunc is a function that determines whether an error is retriable.
type IsRetriableFunc func(err error) bool
// WithExponentialBackoff runs the provided attempt until it succeeds, retrying on all errors that are
// deemed retriable by the provided function. The delay between retries grows exponentially up to
// a certain limit.
func WithExponentialBackoff(desc string, attempt AttemptFunc, isRetriableError IsRetriableFunc) (interface{}, error) {
sleepAmount := retryInitialSleepAmount
for i := 0; i < maxAttempts; i++ {
v, err := attempt()
if !isRetriableError(err) {
return v, err
}
log.Debugf("got error %v when %v (#%v), sleeping for %v before retrying", err, desc, i, sleepAmount)
time.Sleep(sleepAmount)
sleepAmount *= 2
if sleepAmount > retryMaxSleepAmount {
sleepAmount = retryMaxSleepAmount
}
}
return nil, fmt.Errorf("unable to complete %v despite %v retries", desc, maxAttempts)
}

View File

@@ -0,0 +1,59 @@
package retry
import (
"errors"
"fmt"
"reflect"
"testing"
"time"
)
var (
errRetriable = errors.New("retriable")
)
func isRetriable(e error) bool {
return e == errRetriable
}
func TestRetry(t *testing.T) {
retryInitialSleepAmount = 10 * time.Millisecond
retryMaxSleepAmount = 20 * time.Millisecond
maxAttempts = 3
cnt := 0
cases := []struct {
desc string
f func() (interface{}, error)
want interface{}
wantError error
}{
{"success-nil", func() (interface{}, error) { return nil, nil }, nil, nil},
{"success", func() (interface{}, error) { return 3, nil }, 3, nil},
{"retriable-succeeds", func() (interface{}, error) {
cnt++
if cnt < 2 {
return nil, errRetriable
}
return 4, nil
}, 4, nil},
{"retriable-never-succeeds", func() (interface{}, error) { return nil, errRetriable }, nil, fmt.Errorf("unable to complete retriable-never-succeeds despite 3 retries")},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
tc := tc
t.Parallel()
got, err := WithExponentialBackoff(tc.desc, tc.f, isRetriable)
if !reflect.DeepEqual(err, tc.wantError) {
t.Errorf("invalid error %q, wanted %q", err, tc.wantError)
}
if got != tc.want {
t.Errorf("invalid value %v, wanted %v", got, tc.want)
}
})
}
}

View File

@@ -0,0 +1,110 @@
package storagetesting
import (
"bytes"
"context"
"reflect"
"sort"
"testing"
"github.com/kopia/repo/storage"
)
// AssertGetBlock asserts that the specified storage block has correct content.
func AssertGetBlock(ctx context.Context, t *testing.T, s storage.Storage, block string, expected []byte) {
t.Helper()
b, err := s.GetBlock(ctx, block, 0, -1)
if err != nil {
t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected)
return
}
if !bytes.Equal(b, expected) {
t.Errorf("GetBlock(%v) returned %x, but expected %x", block, b, expected)
}
half := int64(len(expected) / 2)
if half == 0 {
return
}
b, err = s.GetBlock(ctx, block, 0, 0)
if err != nil {
t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected)
return
}
if len(b) != 0 {
t.Errorf("GetBlock(%v) returned non-zero length: %v", block, len(b))
return
}
b, err = s.GetBlock(ctx, block, 0, half)
if err != nil {
t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected)
return
}
if !bytes.Equal(b, expected[0:half]) {
t.Errorf("GetBlock(%v) returned %x, but expected %x", block, b, expected[0:half])
}
b, err = s.GetBlock(ctx, block, half, int64(len(expected))-half)
if err != nil {
t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected)
return
}
if !bytes.Equal(b, expected[len(expected)-int(half):]) {
t.Errorf("GetBlock(%v) returned %x, but expected %x", block, b, expected[len(expected)-int(half):])
}
AssertInvalidOffsetLength(ctx, t, s, block, -3, 1)
AssertInvalidOffsetLength(ctx, t, s, block, int64(len(expected)), 3)
AssertInvalidOffsetLength(ctx, t, s, block, int64(len(expected)-1), 3)
AssertInvalidOffsetLength(ctx, t, s, block, int64(len(expected)+1), 3)
}
// AssertInvalidOffsetLength verifies that the given combination of (offset,length) fails on GetBlock()
func AssertInvalidOffsetLength(ctx context.Context, t *testing.T, s storage.Storage, block string, offset, length int64) {
if _, err := s.GetBlock(ctx, block, offset, length); err == nil {
t.Errorf("GetBlock(%v,%v,%v) did not return error for invalid offset/length", block, offset, length)
}
}
// AssertGetBlockNotFound asserts that GetBlock() for specified storage block returns ErrBlockNotFound.
func AssertGetBlockNotFound(ctx context.Context, t *testing.T, s storage.Storage, block string) {
t.Helper()
b, err := s.GetBlock(ctx, block, 0, -1)
if err != storage.ErrBlockNotFound || b != nil {
t.Errorf("GetBlock(%v) returned %v, %v but expected ErrBlockNotFound", block, b, err)
}
}
// AssertListResults asserts that the list results with given prefix return the specified list of names in order.
func AssertListResults(ctx context.Context, t *testing.T, s storage.Storage, prefix string, want ...string) {
t.Helper()
var names []string
if err := s.ListBlocks(ctx, prefix, func(e storage.BlockMetadata) error {
names = append(names, e.BlockID)
return nil
}); err != nil {
t.Fatalf("err: %v", err)
}
names = sorted(names)
want = sorted(want)
if !reflect.DeepEqual(names, want) {
t.Errorf("ListBlocks(%v) returned %v, but wanted %v", prefix, names, want)
}
}
func sorted(s []string) []string {
x := append([]string(nil), s...)
sort.Strings(x)
return x
}

View File

@@ -0,0 +1,2 @@
// Package storagetesting is used for testing Storage implementations.
package storagetesting

View File

@@ -0,0 +1,115 @@
package storagetesting
import (
"context"
"sync"
"time"
"github.com/kopia/repo/internal/repologging"
"github.com/kopia/repo/storage"
)
var log = repologging.Logger("faulty-storage")
// Fault describes the behavior of a single fault.
type Fault struct {
Repeat int // how many times to repeat this fault
Sleep time.Duration // sleep before returning
ErrCallback func() error
WaitFor chan struct{} // waits until the given channel is closed before returning
Err error // error to return (can be nil in combination with Sleep and WaitFor)
}
// FaultyStorage implements fault injection for Storage.
type FaultyStorage struct {
Base storage.Storage
Faults map[string][]*Fault
mu sync.Mutex
}
// GetBlock implements storage.Storage
func (s *FaultyStorage) GetBlock(ctx context.Context, id string, offset, length int64) ([]byte, error) {
if err := s.getNextFault("GetBlock", id, offset, length); err != nil {
return nil, err
}
return s.Base.GetBlock(ctx, id, offset, length)
}
// PutBlock implements storage.Storage
func (s *FaultyStorage) PutBlock(ctx context.Context, id string, data []byte) error {
if err := s.getNextFault("PutBlock", id, len(data)); err != nil {
return err
}
return s.Base.PutBlock(ctx, id, data)
}
// DeleteBlock implements storage.Storage
func (s *FaultyStorage) DeleteBlock(ctx context.Context, id string) error {
if err := s.getNextFault("DeleteBlock", id); err != nil {
return err
}
return s.Base.DeleteBlock(ctx, id)
}
// ListBlocks implements storage.Storage
func (s *FaultyStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
if err := s.getNextFault("ListBlocks", prefix); err != nil {
return err
}
return s.Base.ListBlocks(ctx, prefix, func(bm storage.BlockMetadata) error {
if err := s.getNextFault("ListBlocksItem", prefix); err != nil {
return err
}
return callback(bm)
})
}
// Close implements storage.Storage
func (s *FaultyStorage) Close(ctx context.Context) error {
if err := s.getNextFault("Close"); err != nil {
return err
}
return s.Base.Close(ctx)
}
// ConnectionInfo implements storage.Storage
func (s *FaultyStorage) ConnectionInfo() storage.ConnectionInfo {
return s.Base.ConnectionInfo()
}
func (s *FaultyStorage) getNextFault(method string, args ...interface{}) error {
s.mu.Lock()
faults := s.Faults[method]
if len(faults) == 0 {
s.mu.Unlock()
log.Debugf("no faults for %v %v", method, args)
return nil
}
f := faults[0]
if f.Repeat > 0 {
f.Repeat--
log.Debugf("will repeat %v more times the fault for %v %v", f.Repeat, method, args)
} else {
s.Faults[method] = faults[1:]
}
s.mu.Unlock()
if f.WaitFor != nil {
log.Debugf("waiting for channel to be closed in %v %v", method, args)
<-f.WaitFor
}
if f.Sleep > 0 {
log.Debugf("sleeping for %v in %v %v", f.Sleep, method, args)
}
if f.ErrCallback != nil {
err := f.ErrCallback()
log.Debugf("returning %v for %v %v", err, method, args)
return err
}
log.Debugf("returning %v for %v %v", f.Err, method, args)
return f.Err
}
var _ storage.Storage = (*FaultyStorage)(nil)

View File

@@ -0,0 +1,133 @@
package storagetesting
import (
"context"
"errors"
"sort"
"strings"
"sync"
"time"
"github.com/kopia/repo/storage"
)
type mapStorage struct {
data map[string][]byte
keyTime map[string]time.Time
timeNow func() time.Time
mutex sync.RWMutex
}
func (s *mapStorage) GetBlock(ctx context.Context, id string, offset, length int64) ([]byte, error) {
s.mutex.RLock()
defer s.mutex.RUnlock()
data, ok := s.data[id]
if ok {
data = append([]byte(nil), data...)
if length < 0 {
return data, nil
}
if int(offset) > len(data) || offset < 0 {
return nil, errors.New("invalid offset")
}
data = data[offset:]
if int(length) > len(data) {
return nil, errors.New("invalid length")
}
return data[0:length], nil
}
return nil, storage.ErrBlockNotFound
}
func (s *mapStorage) PutBlock(ctx context.Context, id string, data []byte) error {
s.mutex.Lock()
defer s.mutex.Unlock()
if _, ok := s.data[id]; ok {
return nil
}
s.keyTime[id] = s.timeNow()
s.data[id] = append([]byte{}, data...)
return nil
}
func (s *mapStorage) DeleteBlock(ctx context.Context, id string) error {
s.mutex.Lock()
defer s.mutex.Unlock()
delete(s.data, id)
delete(s.keyTime, id)
return nil
}
func (s *mapStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
s.mutex.RLock()
keys := []string{}
for k := range s.data {
if strings.HasPrefix(k, prefix) {
keys = append(keys, k)
}
}
s.mutex.RUnlock()
sort.Strings(keys)
for _, k := range keys {
s.mutex.RLock()
v, ok := s.data[k]
ts := s.keyTime[k]
s.mutex.RUnlock()
if !ok {
continue
}
if err := callback(storage.BlockMetadata{
BlockID: k,
Length: int64(len(v)),
Timestamp: ts,
}); err != nil {
return err
}
}
return nil
}
func (s *mapStorage) Close(ctx context.Context) error {
return nil
}
func (s *mapStorage) TouchBlock(ctx context.Context, blockID string, threshold time.Duration) error {
s.mutex.Lock()
defer s.mutex.Unlock()
if v, ok := s.keyTime[blockID]; ok {
n := s.timeNow()
if n.Sub(v) >= threshold {
s.keyTime[blockID] = n
}
}
return nil
}
func (s *mapStorage) ConnectionInfo() storage.ConnectionInfo {
// unsupported
return storage.ConnectionInfo{}
}
// NewMapStorage returns an implementation of Storage backed by the contents of given map.
// Used primarily for testing.
func NewMapStorage(data map[string][]byte, keyTime map[string]time.Time, timeNow func() time.Time) storage.Storage {
if keyTime == nil {
keyTime = make(map[string]time.Time)
}
if timeNow == nil {
timeNow = time.Now
}
return &mapStorage{data: data, keyTime: keyTime, timeNow: timeNow}
}

View File

@@ -0,0 +1,15 @@
package storagetesting
import (
"context"
"testing"
)
func TestMapStorage(t *testing.T) {
data := map[string][]byte{}
r := NewMapStorage(data, nil, nil)
if r == nil {
t.Errorf("unexpected result: %v", r)
}
VerifyStorage(context.Background(), t, r)
}

View File

@@ -0,0 +1,84 @@
package storagetesting
import (
"bytes"
"context"
"reflect"
"testing"
"github.com/kopia/repo/storage"
)
// VerifyStorage verifies the behavior of the specified storage.
func VerifyStorage(ctx context.Context, t *testing.T, r storage.Storage) {
blocks := []struct {
blk string
contents []byte
}{
{blk: string("abcdbbf4f0507d054ed5a80a5b65086f602b"), contents: []byte{}},
{blk: string("zxce0e35630770c54668a8cfb4e414c6bf8f"), contents: []byte{1}},
{blk: string("abff4585856ebf0748fd989e1dd623a8963d"), contents: bytes.Repeat([]byte{1}, 1000)},
{blk: string("abgc3dca496d510f492c858a2df1eb824e62"), contents: bytes.Repeat([]byte{1}, 10000)},
{blk: string("kopia.repository"), contents: bytes.Repeat([]byte{2}, 100)},
}
// First verify that blocks don't exist.
for _, b := range blocks {
AssertGetBlockNotFound(ctx, t, r, b.blk)
}
ctx2 := storage.WithUploadProgressCallback(ctx, func(desc string, completed, total int64) {
log.Infof("progress %v: %v/%v", desc, completed, total)
})
// Now add blocks.
for _, b := range blocks {
if err := r.PutBlock(ctx2, b.blk, b.contents); err != nil {
t.Errorf("can't put block: %v", err)
}
AssertGetBlock(ctx, t, r, b.blk, b.contents)
}
AssertListResults(ctx, t, r, "", blocks[0].blk, blocks[1].blk, blocks[2].blk, blocks[3].blk, blocks[4].blk)
AssertListResults(ctx, t, r, "ab", blocks[0].blk, blocks[2].blk, blocks[3].blk)
// Overwrite blocks.
for _, b := range blocks {
if err := r.PutBlock(ctx, b.blk, b.contents); err != nil {
t.Errorf("can't put block: %v", err)
}
AssertGetBlock(ctx, t, r, b.blk, b.contents)
}
if err := r.DeleteBlock(ctx, blocks[0].blk); err != nil {
t.Errorf("unable to delete block: %v", err)
}
if err := r.DeleteBlock(ctx, blocks[0].blk); err != nil {
t.Errorf("invalid error when deleting deleted block: %v", err)
}
AssertListResults(ctx, t, r, "ab", blocks[2].blk, blocks[3].blk)
AssertListResults(ctx, t, r, "", blocks[1].blk, blocks[2].blk, blocks[3].blk, blocks[4].blk)
}
// AssertConnectionInfoRoundTrips verifies that the ConnectionInfo returned by a given storage can be used to create
// equivalent storage
func AssertConnectionInfoRoundTrips(ctx context.Context, t *testing.T, s storage.Storage) {
t.Helper()
ci := s.ConnectionInfo()
s2, err := storage.NewStorage(ctx, ci)
if err != nil {
t.Fatalf("err: %v", err)
}
ci2 := s2.ConnectionInfo()
if !reflect.DeepEqual(ci, ci2) {
t.Errorf("connection info does not round-trip: %v vs %v", ci, ci2)
}
if err := s2.Close(ctx); err != nil {
t.Errorf("unable to close storage: %v", err)
}
}

View File

@@ -0,0 +1,44 @@
package throttle
import (
"io"
"net/http"
)
type throttlerPool interface {
AddReader(io.ReadCloser) (io.ReadCloser, error)
}
type throttlingRoundTripper struct {
base http.RoundTripper
downloadPool throttlerPool
uploadPool throttlerPool
}
func (rt *throttlingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
if req.Body != nil && rt.uploadPool != nil {
var err error
req.Body, err = rt.uploadPool.AddReader(req.Body)
if err != nil {
return nil, err
}
}
resp, err := rt.base.RoundTrip(req)
if resp != nil && resp.Body != nil && rt.downloadPool != nil {
resp.Body, err = rt.downloadPool.AddReader(resp.Body)
}
return resp, err
}
// NewRoundTripper returns http.RoundTripper that throttles upload and downloads.
func NewRoundTripper(base http.RoundTripper, downloadPool throttlerPool, uploadPool throttlerPool) http.RoundTripper {
if base == nil {
base = http.DefaultTransport
}
return &throttlingRoundTripper{
base: base,
downloadPool: downloadPool,
uploadPool: uploadPool,
}
}

View File

@@ -0,0 +1,103 @@
package throttle
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"net/http"
"testing"
)
type baseRoundTripper struct {
responses map[*http.Request]*http.Response
}
func (rt *baseRoundTripper) add(req *http.Request, resp *http.Response) (*http.Request, *http.Response) {
rt.responses[req] = resp
return req, resp
}
func (rt *baseRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
resp := rt.responses[req]
if resp != nil {
return resp, nil
}
return nil, fmt.Errorf("error occurred")
}
type fakePool struct {
readers []io.ReadCloser
}
func (fp *fakePool) reset() {
fp.readers = nil
}
func (fp *fakePool) AddReader(r io.ReadCloser) (io.ReadCloser, error) {
fp.readers = append(fp.readers, r)
return r, nil
}
func TestRoundTripper(t *testing.T) {
downloadBody := ioutil.NopCloser(bytes.NewReader([]byte("data1")))
uploadBody := ioutil.NopCloser(bytes.NewReader([]byte("data1")))
base := &baseRoundTripper{
responses: make(map[*http.Request]*http.Response),
}
downloadPool := &fakePool{}
uploadPool := &fakePool{}
rt := NewRoundTripper(base, downloadPool, uploadPool)
// Empty request (no request, no response)
uploadPool.reset()
downloadPool.reset()
req1, resp1 := base.add(&http.Request{}, &http.Response{})
resp, err := rt.RoundTrip(req1)
if resp != resp1 || err != nil {
t.Errorf("invalid response or error: %v", err)
}
if len(downloadPool.readers) != 0 || len(uploadPool.readers) != 0 {
t.Errorf("invalid pool contents: %v %v", downloadPool.readers, uploadPool.readers)
}
// Upload request
uploadPool.reset()
downloadPool.reset()
req2, resp2 := base.add(&http.Request{
Body: uploadBody,
}, &http.Response{})
resp, err = rt.RoundTrip(req2)
if resp != resp2 || err != nil {
t.Errorf("invalid response or error: %v", err)
}
if len(downloadPool.readers) != 0 || len(uploadPool.readers) != 1 {
t.Errorf("invalid pool contents: %v %v", downloadPool.readers, uploadPool.readers)
}
// Download request
uploadPool.reset()
downloadPool.reset()
req3, resp3 := base.add(&http.Request{}, &http.Response{Body: downloadBody})
resp, err = rt.RoundTrip(req3)
if resp != resp3 || err != nil {
t.Errorf("invalid response or error: %v", err)
}
if len(downloadPool.readers) != 1 || len(uploadPool.readers) != 0 {
t.Errorf("invalid pool contents: %v %v", downloadPool.readers, uploadPool.readers)
}
// Upload/Download request
uploadPool.reset()
downloadPool.reset()
req4, resp4 := base.add(&http.Request{Body: uploadBody}, &http.Response{Body: downloadBody})
resp, err = rt.RoundTrip(req4)
if resp != resp4 || err != nil {
t.Errorf("invalid response or error: %v", err)
}
if len(downloadPool.readers) != 1 || len(uploadPool.readers) != 1 {
t.Errorf("invalid pool contents: %v %v", downloadPool.readers, uploadPool.readers)
}
}

56
local_config.go Normal file
View File

@@ -0,0 +1,56 @@
package repo
import (
"encoding/json"
"io"
"os"
"github.com/kopia/repo/block"
"github.com/kopia/repo/object"
"github.com/kopia/repo/storage"
)
// LocalConfig is a configuration of Kopia stored in a configuration file.
type LocalConfig struct {
Storage storage.ConnectionInfo `json:"storage"`
Caching block.CachingOptions `json:"caching"`
}
// repositoryObjectFormat describes the format of objects in a repository.
type repositoryObjectFormat struct {
block.FormattingOptions
object.Format
}
// Load reads local configuration from the specified reader.
func (lc *LocalConfig) Load(r io.Reader) error {
*lc = LocalConfig{}
return json.NewDecoder(r).Decode(lc)
}
// Save writes the configuration to the specified writer.
func (lc *LocalConfig) Save(w io.Writer) error {
b, err := json.MarshalIndent(lc, "", " ")
if err != nil {
return nil
}
_, err = w.Write(b)
return err
}
// loadConfigFromFile reads the local configuration from the specified file.
func loadConfigFromFile(fileName string) (*LocalConfig, error) {
f, err := os.Open(fileName)
if err != nil {
return nil, err
}
defer f.Close() //nolint:errcheck
var lc LocalConfig
if err := lc.Load(f); err != nil {
return nil, err
}
return &lc, nil
}

View File

@@ -0,0 +1,12 @@
package manifest
import "time"
// EntryMetadata contains metadata about manifest item. Each manifest item has one or more labels
// Including required "type" label.
type EntryMetadata struct {
ID string
Length int
Labels map[string]string
ModTime time.Time
}

View File

@@ -0,0 +1,516 @@
// Package manifest implements support for managing JSON-based manifests in repository.
package manifest
import (
"bytes"
"compress/gzip"
"context"
"crypto/rand"
"encoding/hex"
"encoding/json"
"fmt"
"sort"
"sync"
"time"
"github.com/kopia/repo/internal/repologging"
"github.com/kopia/repo/storage"
"github.com/pkg/errors"
)
var log = repologging.Logger("kopia/manifest")
// ErrNotFound is returned when the metadata item is not found.
var ErrNotFound = errors.New("not found")
const manifestBlockPrefix = "m"
const autoCompactionBlockCount = 16
type blockManager interface {
GetBlock(ctx context.Context, blockID string) ([]byte, error)
WriteBlock(ctx context.Context, data []byte, prefix string) (string, error)
DeleteBlock(blockID string) error
ListBlocks(prefix string) ([]string, error)
DisableIndexFlush()
EnableIndexFlush()
Flush(ctx context.Context) error
}
// Manager organizes JSON manifests of various kinds, including snapshot manifests
type Manager struct {
mu sync.Mutex
b blockManager
initialized bool
pendingEntries map[string]*manifestEntry
committedEntries map[string]*manifestEntry
committedBlockIDs map[string]bool
}
// Put serializes the provided payload to JSON and persists it. Returns unique handle that represents the object.
func (m *Manager) Put(ctx context.Context, labels map[string]string, payload interface{}) (string, error) {
if labels["type"] == "" {
return "", fmt.Errorf("'type' label is required")
}
if err := m.ensureInitialized(ctx); err != nil {
return "", err
}
m.mu.Lock()
defer m.mu.Unlock()
random := make([]byte, 16)
if _, err := rand.Read(random); err != nil {
return "", errors.Wrap(err, "can't initialize randomness")
}
b, err := json.Marshal(payload)
if err != nil {
return "", errors.Wrap(err, "marshal error")
}
e := &manifestEntry{
ID: hex.EncodeToString(random),
ModTime: time.Now().UTC(),
Labels: copyLabels(labels),
Content: b,
}
m.pendingEntries[e.ID] = e
return e.ID, nil
}
// GetMetadata returns metadata about provided manifest item or ErrNotFound if the item can't be found.
func (m *Manager) GetMetadata(ctx context.Context, id string) (*EntryMetadata, error) {
if err := m.ensureInitialized(ctx); err != nil {
return nil, err
}
m.mu.Lock()
defer m.mu.Unlock()
e := m.pendingEntries[id]
if e == nil {
e = m.committedEntries[id]
}
if e == nil || e.Deleted {
return nil, ErrNotFound
}
return &EntryMetadata{
ID: id,
ModTime: e.ModTime,
Length: len(e.Content),
Labels: copyLabels(e.Labels),
}, nil
}
// Get retrieves the contents of the provided manifest item by deserializing it as JSON to provided object.
// If the manifest is not found, returns ErrNotFound.
func (m *Manager) Get(ctx context.Context, id string, data interface{}) error {
if err := m.ensureInitialized(ctx); err != nil {
return err
}
b, err := m.GetRaw(ctx, id)
if err != nil {
return err
}
if err := json.Unmarshal(b, data); err != nil {
return fmt.Errorf("unable to unmashal %q: %v", id, err)
}
return nil
}
// GetRaw returns raw contents of the provided manifest (JSON bytes) or ErrNotFound if not found.
func (m *Manager) GetRaw(ctx context.Context, id string) ([]byte, error) {
if err := m.ensureInitialized(ctx); err != nil {
return nil, err
}
m.mu.Lock()
defer m.mu.Unlock()
e := m.pendingEntries[id]
if e == nil {
e = m.committedEntries[id]
}
if e == nil || e.Deleted {
return nil, ErrNotFound
}
return e.Content, nil
}
// Find returns the list of EntryMetadata for manifest entries matching all provided labels.
func (m *Manager) Find(ctx context.Context, labels map[string]string) ([]*EntryMetadata, error) {
if err := m.ensureInitialized(ctx); err != nil {
return nil, err
}
m.mu.Lock()
defer m.mu.Unlock()
var matches []*EntryMetadata
for _, e := range m.pendingEntries {
if matchesLabels(e.Labels, labels) {
matches = append(matches, cloneEntryMetadata(e))
}
}
for _, e := range m.committedEntries {
if m.pendingEntries[e.ID] != nil {
// ignore committed that are also in pending
continue
}
if matchesLabels(e.Labels, labels) {
matches = append(matches, cloneEntryMetadata(e))
}
}
sort.Slice(matches, func(i, j int) bool {
return matches[i].ModTime.Before(matches[j].ModTime)
})
return matches, nil
}
func cloneEntryMetadata(e *manifestEntry) *EntryMetadata {
return &EntryMetadata{
ID: e.ID,
Labels: copyLabels(e.Labels),
Length: len(e.Content),
ModTime: e.ModTime,
}
}
// matchesLabels returns true when all entries in 'b' are found in the 'a'.
func matchesLabels(a, b map[string]string) bool {
for k, v := range b {
if a[k] != v {
return false
}
}
return true
}
// Flush persists changes to manifest manager.
func (m *Manager) Flush(ctx context.Context) error {
m.mu.Lock()
defer m.mu.Unlock()
_, err := m.flushPendingEntriesLocked(ctx)
return err
}
func (m *Manager) flushPendingEntriesLocked(ctx context.Context) (string, error) {
if len(m.pendingEntries) == 0 {
return "", nil
}
man := manifest{}
for _, e := range m.pendingEntries {
man.Entries = append(man.Entries, e)
}
var buf bytes.Buffer
gz := gzip.NewWriter(&buf)
mustSucceed(json.NewEncoder(gz).Encode(man))
mustSucceed(gz.Flush())
mustSucceed(gz.Close())
blockID, err := m.b.WriteBlock(ctx, buf.Bytes(), manifestBlockPrefix)
if err != nil {
return "", err
}
for _, e := range m.pendingEntries {
m.committedEntries[e.ID] = e
delete(m.pendingEntries, e.ID)
}
m.committedBlockIDs[blockID] = true
return blockID, nil
}
func mustSucceed(e error) {
if e != nil {
panic("unexpected failure: " + e.Error())
}
}
// Delete marks the specified manifest ID for deletion.
func (m *Manager) Delete(ctx context.Context, id string) error {
if err := m.ensureInitialized(ctx); err != nil {
return err
}
if m.pendingEntries[id] == nil && m.committedEntries[id] == nil {
return nil
}
m.pendingEntries[id] = &manifestEntry{
ID: id,
ModTime: time.Now().UTC(),
Deleted: true,
}
return nil
}
// Refresh updates the committed blocks from the underlying storage.
func (m *Manager) Refresh(ctx context.Context) error {
m.mu.Lock()
defer m.mu.Unlock()
return m.loadCommittedBlocksLocked(ctx)
}
func (m *Manager) loadCommittedBlocksLocked(ctx context.Context) error {
log.Debugf("listing manifest blocks")
for {
blocks, err := m.b.ListBlocks(manifestBlockPrefix)
if err != nil {
return errors.Wrap(err, "unable to list manifest blocks")
}
m.committedEntries = map[string]*manifestEntry{}
m.committedBlockIDs = map[string]bool{}
log.Debugf("found %v manifest blocks", len(blocks))
err = m.loadManifestBlocks(ctx, blocks)
if err == nil {
// success
break
}
if err == storage.ErrBlockNotFound {
// try again, lost a race with another manifest manager which just did compaction
continue
}
return errors.Wrap(err, "unable to load manifest blocks")
}
if err := m.maybeCompactLocked(ctx); err != nil {
return fmt.Errorf("error auto-compacting blocks")
}
return nil
}
func (m *Manager) loadManifestBlocks(ctx context.Context, blockIDs []string) error {
t0 := time.Now()
for _, b := range blockIDs {
m.committedBlockIDs[b] = true
}
manifests, err := m.loadBlocksInParallel(ctx, blockIDs)
if err != nil {
return err
}
for _, man := range manifests {
for _, e := range man.Entries {
m.mergeEntry(e)
}
}
// after merging, remove blocks marked as deleted.
for k, e := range m.committedEntries {
if e.Deleted {
delete(m.committedEntries, k)
}
}
log.Debugf("finished loading manifest blocks in %v.", time.Since(t0))
return nil
}
func (m *Manager) loadBlocksInParallel(ctx context.Context, blockIDs []string) ([]manifest, error) {
errors := make(chan error, len(blockIDs))
manifests := make(chan manifest, len(blockIDs))
ch := make(chan string, len(blockIDs))
var wg sync.WaitGroup
for i := 0; i < 8; i++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
for blk := range ch {
t1 := time.Now()
man, err := m.loadManifestBlock(ctx, blk)
if err != nil {
errors <- err
log.Debugf("block %v failed to be loaded by worker %v in %v: %v.", blk, workerID, time.Since(t1), err)
} else {
log.Debugf("block %v loaded by worker %v in %v.", blk, workerID, time.Since(t1))
manifests <- man
}
}
}(i)
}
// feed block IDs for goroutines
for _, b := range blockIDs {
ch <- b
}
close(ch)
// wait for workers to complete
wg.Wait()
close(errors)
close(manifests)
// if there was any error, forward it
if err := <-errors; err != nil {
return nil, err
}
var man []manifest
for m := range manifests {
man = append(man, m)
}
return man, nil
}
func (m *Manager) loadManifestBlock(ctx context.Context, blockID string) (manifest, error) {
man := manifest{}
blk, err := m.b.GetBlock(ctx, blockID)
if err != nil {
// do not wrap the error here, we want to propagate original ErrBlockNotFound
// which causes a retry if we lose list/delete race.
return man, err
}
gz, err := gzip.NewReader(bytes.NewReader(blk))
if err != nil {
return man, fmt.Errorf("unable to unpack block %q: %v", blockID, err)
}
if err := json.NewDecoder(gz).Decode(&man); err != nil {
return man, fmt.Errorf("unable to parse block %q: %v", blockID, err)
}
return man, nil
}
// Compact performs compaction of manifest blocks.
func (m *Manager) Compact(ctx context.Context) error {
m.mu.Lock()
defer m.mu.Unlock()
return m.compactLocked(ctx)
}
func (m *Manager) maybeCompactLocked(ctx context.Context) error {
if len(m.committedBlockIDs) < autoCompactionBlockCount {
return nil
}
log.Debugf("performing automatic compaction of %v blocks", len(m.committedBlockIDs))
if err := m.compactLocked(ctx); err != nil {
return errors.Wrap(err, "unable to compact manifest blocks")
}
if err := m.b.Flush(ctx); err != nil {
return errors.Wrap(err, "unable to flush blocks after auto-compaction")
}
return nil
}
func (m *Manager) compactLocked(ctx context.Context) error {
log.Debugf("compactLocked: pendingEntries=%v blockIDs=%v", len(m.pendingEntries), len(m.committedBlockIDs))
if len(m.committedBlockIDs) == 1 && len(m.pendingEntries) == 0 {
return nil
}
// compaction needs to be atomic (deletes and rewrite should show up in one index block or not show up at all)
// that's why we want to prevent index flushes while we're d.
m.b.DisableIndexFlush()
defer m.b.EnableIndexFlush()
for _, e := range m.committedEntries {
m.pendingEntries[e.ID] = e
}
blockID, err := m.flushPendingEntriesLocked(ctx)
if err != nil {
return err
}
// add the newly-created block to the list, could be duplicate
for b := range m.committedBlockIDs {
if b == blockID {
// do not delete block that was just written.
continue
}
if err := m.b.DeleteBlock(b); err != nil {
return fmt.Errorf("unable to delete block %q: %v", b, err)
}
delete(m.committedBlockIDs, b)
}
return nil
}
func (m *Manager) mergeEntry(e *manifestEntry) {
prev := m.committedEntries[e.ID]
if prev == nil {
m.committedEntries[e.ID] = e
return
}
if e.ModTime.After(prev.ModTime) {
m.committedEntries[e.ID] = e
}
}
func (m *Manager) ensureInitialized(ctx context.Context) error {
m.mu.Lock()
defer m.mu.Unlock()
if m.initialized {
return nil
}
if err := m.loadCommittedBlocksLocked(ctx); err != nil {
return err
}
m.initialized = true
return nil
}
func copyLabels(m map[string]string) map[string]string {
r := map[string]string{}
for k, v := range m {
r[k] = v
}
return r
}
// NewManager returns new manifest manager for the provided block manager.
func NewManager(ctx context.Context, b blockManager) (*Manager, error) {
m := &Manager{
b: b,
pendingEntries: map[string]*manifestEntry{},
committedEntries: map[string]*manifestEntry{},
committedBlockIDs: map[string]bool{},
}
return m, nil
}

View File

@@ -0,0 +1,321 @@
package manifest
import (
"context"
"reflect"
"sort"
"strings"
"testing"
"time"
"github.com/kopia/repo/block"
"github.com/kopia/repo/internal/storagetesting"
"github.com/pkg/errors"
)
func TestManifest(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
mgr, setupErr := newManagerForTesting(ctx, t, data)
if setupErr != nil {
t.Fatalf("unable to open block manager: %v", setupErr)
}
item1 := map[string]int{"foo": 1, "bar": 2}
item2 := map[string]int{"foo": 2, "bar": 3}
item3 := map[string]int{"foo": 3, "bar": 4}
labels1 := map[string]string{"type": "item", "color": "red"}
labels2 := map[string]string{"type": "item", "color": "blue", "shape": "square"}
labels3 := map[string]string{"type": "item", "shape": "square", "color": "red"}
id1 := addAndVerify(ctx, t, mgr, labels1, item1)
id2 := addAndVerify(ctx, t, mgr, labels2, item2)
id3 := addAndVerify(ctx, t, mgr, labels3, item3)
cases := []struct {
criteria map[string]string
expected []string
}{
{map[string]string{"color": "red"}, []string{id1, id3}},
{map[string]string{"color": "blue"}, []string{id2}},
{map[string]string{"color": "green"}, nil},
{map[string]string{"color": "red", "shape": "square"}, []string{id3}},
{map[string]string{"color": "blue", "shape": "square"}, []string{id2}},
{map[string]string{"color": "red", "shape": "circle"}, nil},
}
// verify before flush
for _, tc := range cases {
verifyMatches(ctx, t, mgr, tc.criteria, tc.expected)
}
verifyItem(ctx, t, mgr, id1, labels1, item1)
verifyItem(ctx, t, mgr, id2, labels2, item2)
verifyItem(ctx, t, mgr, id3, labels3, item3)
if err := mgr.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
}
if err := mgr.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
}
// verify after flush
for _, tc := range cases {
verifyMatches(ctx, t, mgr, tc.criteria, tc.expected)
}
verifyItem(ctx, t, mgr, id1, labels1, item1)
verifyItem(ctx, t, mgr, id2, labels2, item2)
verifyItem(ctx, t, mgr, id3, labels3, item3)
// flush underlying block manager and verify in new manifest manager.
mgr.b.Flush(ctx)
mgr2, setupErr := newManagerForTesting(ctx, t, data)
if setupErr != nil {
t.Fatalf("can't open block manager: %v", setupErr)
}
for _, tc := range cases {
verifyMatches(ctx, t, mgr2, tc.criteria, tc.expected)
}
verifyItem(ctx, t, mgr2, id1, labels1, item1)
verifyItem(ctx, t, mgr2, id2, labels2, item2)
verifyItem(ctx, t, mgr2, id3, labels3, item3)
if err := mgr2.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
}
// delete from one
time.Sleep(1 * time.Second)
if err := mgr.Delete(ctx, id3); err != nil {
t.Errorf("delete error: %v", err)
}
verifyItemNotFound(ctx, t, mgr, id3)
mgr.Flush(ctx)
verifyItemNotFound(ctx, t, mgr, id3)
// still found in another
verifyItem(ctx, t, mgr2, id3, labels3, item3)
if err := mgr2.loadCommittedBlocksLocked(ctx); err != nil {
t.Errorf("unable to load: %v", err)
}
if err := mgr.Compact(ctx); err != nil {
t.Errorf("can't compact: %v", err)
}
blks, err := mgr.b.ListBlocks(manifestBlockPrefix)
if err != nil {
t.Errorf("unable to list manifest blocks: %v", err)
}
if got, want := len(blks), 1; got != want {
t.Errorf("unexpected number of blocks: %v, want %v", got, want)
}
mgr.b.Flush(ctx)
mgr3, err := newManagerForTesting(ctx, t, data)
if err != nil {
t.Fatalf("can't open manager: %v", err)
}
verifyItem(ctx, t, mgr3, id1, labels1, item1)
verifyItem(ctx, t, mgr3, id2, labels2, item2)
verifyItemNotFound(ctx, t, mgr3, id3)
}
func TestManifestInitCorruptedBlock(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
st := storagetesting.NewMapStorage(data, nil, nil)
f := block.FormattingOptions{
Hash: "HMAC-SHA256-128",
Encryption: "NONE",
MaxPackSize: 100000,
}
// write some data to storage
bm, err := block.NewManager(ctx, st, f, block.CachingOptions{}, nil)
if err != nil {
t.Fatalf("err: %v", err)
}
mgr, err := NewManager(ctx, bm)
if err != nil {
t.Fatalf("err: %v", err)
}
mgr.Put(ctx, map[string]string{"type": "foo"}, map[string]string{"some": "value"}) //nolint:errcheck
mgr.Flush(ctx)
bm.Flush(ctx)
// corrupt data at the storage level.
for k, v := range data {
if strings.HasPrefix(k, "p") {
for i := 0; i < len(v); i++ {
v[i] ^= 1
}
}
}
// make a new block manager based on corrupted data.
bm, err = block.NewManager(ctx, st, f, block.CachingOptions{}, nil)
if err != nil {
t.Fatalf("err: %v", err)
}
mgr, err = NewManager(ctx, bm)
if err != nil {
t.Fatalf("err: %v", err)
}
cases := []struct {
desc string
f func() error
}{
{"GetRaw", func() error { _, err := mgr.GetRaw(ctx, "anything"); return err }},
{"GetMetadata", func() error { _, err := mgr.GetMetadata(ctx, "anything"); return err }},
{"Get", func() error { return mgr.Get(ctx, "anything", nil) }},
{"Delete", func() error { return mgr.Delete(ctx, "anything") }},
{"Find", func() error { _, err := mgr.Find(ctx, nil); return err }},
{"Put", func() error {
_, err := mgr.Put(ctx, map[string]string{
"type": "foo",
}, map[string]string{
"some": "value",
})
return err
}},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
err := tc.f()
if err == nil || !strings.Contains(err.Error(), "invalid checksum") {
t.Errorf("invalid error when initializing malformed manifest manager: %v", err)
}
})
}
}
func addAndVerify(ctx context.Context, t *testing.T, mgr *Manager, labels map[string]string, data map[string]int) string {
t.Helper()
id, err := mgr.Put(ctx, labels, data)
if err != nil {
t.Errorf("unable to add %v (%v): %v", labels, data, err)
return ""
}
verifyItem(ctx, t, mgr, id, labels, data)
return id
}
func verifyItem(ctx context.Context, t *testing.T, mgr *Manager, id string, labels map[string]string, data map[string]int) {
t.Helper()
l, err := mgr.GetMetadata(ctx, id)
if err != nil {
t.Errorf("unable to retrieve %q: %v", id, err)
return
}
if !reflect.DeepEqual(l.Labels, labels) {
t.Errorf("invalid labels retrieved %v, wanted %v", l.Labels, labels)
}
var d2 map[string]int
if err := mgr.Get(ctx, id, &d2); err != nil {
t.Errorf("Get failed: %v", err)
}
if !reflect.DeepEqual(d2, data) {
t.Errorf("invalid data retrieved %v, wanted %v", d2, data)
}
}
func verifyItemNotFound(ctx context.Context, t *testing.T, mgr *Manager, id string) {
t.Helper()
_, err := mgr.GetMetadata(ctx, id)
if got, want := err, ErrNotFound; got != want {
t.Errorf("invalid error when getting %q %v, expected %v", id, err, ErrNotFound)
return
}
}
func verifyMatches(ctx context.Context, t *testing.T, mgr *Manager, labels map[string]string, expected []string) {
t.Helper()
var matches []string
items, err := mgr.Find(ctx, labels)
if err != nil {
t.Errorf("error in Find(): %v", err)
return
}
for _, m := range items {
matches = append(matches, m.ID)
}
sort.Strings(matches)
sort.Strings(expected)
if !reflect.DeepEqual(matches, expected) {
t.Errorf("invalid matches for %v: %v, expected %v", labels, matches, expected)
}
}
func newManagerForTesting(ctx context.Context, t *testing.T, data map[string][]byte) (*Manager, error) {
st := storagetesting.NewMapStorage(data, nil, nil)
bm, err := block.NewManager(ctx, st, block.FormattingOptions{
Hash: "HMAC-SHA256-128",
Encryption: "NONE",
MaxPackSize: 100000,
}, block.CachingOptions{}, nil)
if err != nil {
return nil, errors.Wrap(err, "can't create block manager")
}
return NewManager(ctx, bm)
}
func TestManifestInvalidPut(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
mgr, setupErr := newManagerForTesting(ctx, t, data)
if setupErr != nil {
t.Fatalf("unable to open block manager: %v", setupErr)
}
cases := []struct {
labels map[string]string
payload interface{}
expectedError string
}{
{map[string]string{"": ""}, "xxx", "'type' label is required"},
{map[string]string{"type": "blah"}, complex128(1), "marshal error"},
}
for i, tc := range cases {
_, err := mgr.Put(ctx, tc.labels, tc.payload)
if err == nil || !strings.Contains(err.Error(), tc.expectedError) {
t.Errorf("invalid error when putting case %v: %v, expected %v", i, err, tc.expectedError)
}
}
}
func TestManifestAutoCompaction(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
for i := 0; i < 100; i++ {
mgr, setupErr := newManagerForTesting(ctx, t, data)
if setupErr != nil {
t.Fatalf("unable to open block manager: %v", setupErr)
}
item1 := map[string]int{"foo": 1, "bar": 2}
labels1 := map[string]string{"type": "item", "color": "red"}
addAndVerify(ctx, t, mgr, labels1, item1)
mgr.Flush(ctx)
}
}

18
manifest/serialized.go Normal file
View File

@@ -0,0 +1,18 @@
package manifest
import (
"encoding/json"
"time"
)
type manifest struct {
Entries []*manifestEntry `json:"entries"`
}
type manifestEntry struct {
ID string `json:"id"`
Labels map[string]string `json:"labels"`
ModTime time.Time `json:"modified"`
Deleted bool `json:"deleted,omitempty"`
Content json.RawMessage `json:"data"`
}

8
object/indirect.go Normal file
View File

@@ -0,0 +1,8 @@
package object
// indirectObjectEntry represents an entry in indirect object stream.
type indirectObjectEntry struct {
Start int64 `json:"s,omitempty"`
Length int64 `json:"l,omitempty"`
Object ID `json:"o,omitempty"`
}

245
object/object_manager.go Normal file
View File

@@ -0,0 +1,245 @@
// Package object implements repository support for content-addressable objects of arbitrary size.
package object
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"github.com/kopia/repo/block"
"github.com/pkg/errors"
)
// Reader allows reading, seeking, getting the length of and closing of a repository object.
type Reader interface {
io.Reader
io.Seeker
io.Closer
Length() int64
}
type blockManager interface {
BlockInfo(ctx context.Context, blockID string) (block.Info, error)
GetBlock(ctx context.Context, blockID string) ([]byte, error)
WriteBlock(ctx context.Context, data []byte, prefix string) (string, error)
}
// Format describes the format of objects in a repository.
type Format struct {
Splitter string `json:"splitter,omitempty"` // splitter used to break objects into storage blocks
MinBlockSize int `json:"minBlockSize,omitempty"` // minimum block size used with dynamic splitter
AvgBlockSize int `json:"avgBlockSize,omitempty"` // approximate size of storage block (used with dynamic splitter)
MaxBlockSize int `json:"maxBlockSize,omitempty"` // maximum size of storage block
}
// Manager implements a content-addressable storage on top of blob storage.
type Manager struct {
Format Format
blockMgr blockManager
trace func(message string, args ...interface{})
newSplitter func() objectSplitter
}
// NewWriter creates an ObjectWriter for writing to the repository.
func (om *Manager) NewWriter(ctx context.Context, opt WriterOptions) Writer {
return &objectWriter{
ctx: ctx,
repo: om,
splitter: om.newSplitter(),
description: opt.Description,
prefix: opt.Prefix,
}
}
// Open creates new ObjectReader for reading given object from a repository.
func (om *Manager) Open(ctx context.Context, objectID ID) (Reader, error) {
// log.Printf("Repository::Open %v", objectID.String())
// defer log.Printf("finished Repository::Open() %v", objectID.String())
if indexObjectID, ok := objectID.IndexObjectID(); ok {
rd, err := om.Open(ctx, indexObjectID)
if err != nil {
return nil, err
}
defer rd.Close() //nolint:errcheck
seekTable, err := om.flattenListChunk(rd)
if err != nil {
return nil, err
}
totalLength := seekTable[len(seekTable)-1].endOffset()
return &objectReader{
ctx: ctx,
repo: om,
seekTable: seekTable,
totalLength: totalLength,
}, nil
}
return om.newRawReader(ctx, objectID)
}
// VerifyObject ensures that all objects backing ObjectID are present in the repository
// and returns the total length of the object and storage blocks of which it is composed.
func (om *Manager) VerifyObject(ctx context.Context, oid ID) (int64, []string, error) {
blocks := &blockTracker{}
l, err := om.verifyObjectInternal(ctx, oid, blocks)
if err != nil {
return 0, nil, err
}
return l, blocks.blockIDs(), nil
}
func (om *Manager) verifyIndirectObjectInternal(ctx context.Context, indexObjectID ID, blocks *blockTracker) (int64, error) {
if _, err := om.verifyObjectInternal(ctx, indexObjectID, blocks); err != nil {
return 0, errors.Wrap(err, "unable to read index")
}
rd, err := om.Open(ctx, indexObjectID)
if err != nil {
return 0, err
}
defer rd.Close() //nolint:errcheck
seekTable, err := om.flattenListChunk(rd)
if err != nil {
return 0, err
}
for i, m := range seekTable {
l, err := om.verifyObjectInternal(ctx, m.Object, blocks)
if err != nil {
return 0, err
}
if l != m.Length {
return 0, fmt.Errorf("unexpected length of part %#v of indirect object %q: %v %v, expected %v", i, indexObjectID, m.Object, l, m.Length)
}
}
totalLength := seekTable[len(seekTable)-1].endOffset()
return totalLength, nil
}
func (om *Manager) verifyObjectInternal(ctx context.Context, oid ID, blocks *blockTracker) (int64, error) {
if indexObjectID, ok := oid.IndexObjectID(); ok {
return om.verifyIndirectObjectInternal(ctx, indexObjectID, blocks)
}
if blockID, ok := oid.BlockID(); ok {
p, err := om.blockMgr.BlockInfo(ctx, blockID)
if err != nil {
return 0, err
}
blocks.addBlock(blockID)
return int64(p.Length), nil
}
return 0, fmt.Errorf("unrecognized object type: %v", oid)
}
func nullTrace(message string, args ...interface{}) {
}
// ManagerOptions specifies object manager options.
type ManagerOptions struct {
Trace func(message string, args ...interface{})
}
// NewObjectManager creates an ObjectManager with the specified block manager and format.
func NewObjectManager(ctx context.Context, bm blockManager, f Format, opts ManagerOptions) (*Manager, error) {
om := &Manager{
blockMgr: bm,
Format: f,
trace: nullTrace,
}
splitterID := f.Splitter
if splitterID == "" {
splitterID = "FIXED"
}
os := splitterFactories[splitterID]
if os == nil {
return nil, fmt.Errorf("unsupported splitter %q", f.Splitter)
}
om.newSplitter = func() objectSplitter {
return os(&f)
}
if opts.Trace != nil {
om.trace = opts.Trace
} else {
om.trace = nullTrace
}
return om, nil
}
/*
{"stream":"kopia:indirect","entries":[
{"l":1698099,"o":"D13ea27f9ad891ad4a2edfa983906863d"},
{"s":1698099,"l":1302081,"o":"De8ca8327cd3af5f4edbd5ed1009c525e"},
{"s":3000180,"l":4352499,"o":"D6b6eb48ca5361d06d72fe193813e42e1"},
{"s":7352679,"l":1170821,"o":"Dd14653f76b63802ed48be64a0e67fea9"},
{"s":91094118,"l":1645153,"o":"Daa55df764d881a1daadb5ea9de17abbb"}
]}
*/
type indirectObject struct {
StreamID string `json:"stream"`
Entries []indirectObjectEntry `json:"entries"`
}
func (om *Manager) flattenListChunk(rawReader io.Reader) ([]indirectObjectEntry, error) {
var ind indirectObject
if err := json.NewDecoder(rawReader).Decode(&ind); err != nil {
return nil, errors.Wrap(err, "invalid indirect object")
}
return ind.Entries, nil
}
func (om *Manager) newRawReader(ctx context.Context, objectID ID) (Reader, error) {
if blockID, ok := objectID.BlockID(); ok {
payload, err := om.blockMgr.GetBlock(ctx, blockID)
if err != nil {
return nil, err
}
return newObjectReaderWithData(payload), nil
}
return nil, fmt.Errorf("unsupported object ID: %v", objectID)
}
type readerWithData struct {
io.ReadSeeker
length int64
}
func (rwd *readerWithData) Close() error {
return nil
}
func (rwd *readerWithData) Length() int64 {
return rwd.length
}
func newObjectReaderWithData(data []byte) Reader {
return &readerWithData{
ReadSeeker: bytes.NewReader(data),
length: int64(len(data)),
}
}

View File

@@ -0,0 +1,344 @@
package object
import (
"bytes"
"context"
cryptorand "crypto/rand"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io/ioutil"
"math/rand"
"reflect"
"runtime/debug"
"sync"
"testing"
"github.com/kopia/repo/block"
"github.com/kopia/repo/storage"
)
type fakeBlockManager struct {
mu sync.Mutex
data map[string][]byte
}
func (f *fakeBlockManager) GetBlock(ctx context.Context, blockID string) ([]byte, error) {
f.mu.Lock()
defer f.mu.Unlock()
if d, ok := f.data[blockID]; ok {
return append([]byte(nil), d...), nil
}
return nil, storage.ErrBlockNotFound
}
func (f *fakeBlockManager) WriteBlock(ctx context.Context, data []byte, prefix string) (string, error) {
h := sha256.New()
h.Write(data) //nolint:errcheck
blockID := prefix + string(hex.EncodeToString(h.Sum(nil)))
f.mu.Lock()
defer f.mu.Unlock()
f.data[blockID] = append([]byte(nil), data...)
return blockID, nil
}
func (f *fakeBlockManager) BlockInfo(ctx context.Context, blockID string) (block.Info, error) {
f.mu.Lock()
defer f.mu.Unlock()
if d, ok := f.data[blockID]; ok {
return block.Info{BlockID: blockID, Length: uint32(len(d))}, nil
}
return block.Info{}, storage.ErrBlockNotFound
}
func (f *fakeBlockManager) Flush(ctx context.Context) error {
return nil
}
func setupTest(t *testing.T) (map[string][]byte, *Manager) {
return setupTestWithData(t, map[string][]byte{}, ManagerOptions{})
}
func setupTestWithData(t *testing.T, data map[string][]byte, opts ManagerOptions) (map[string][]byte, *Manager) {
r, err := NewObjectManager(context.Background(), &fakeBlockManager{data: data}, Format{
MaxBlockSize: 400,
Splitter: "FIXED",
}, opts)
if err != nil {
t.Fatalf("can't create object manager: %v", err)
}
return data, r
}
func TestWriters(t *testing.T) {
ctx := context.Background()
cases := []struct {
data []byte
objectID ID
}{
{
[]byte("the quick brown fox jumps over the lazy dog"),
"05c6e08f1d9fdafa03147fcb8f82f124c76d2f70e3d989dc8aadb5e7d7450bec",
},
{make([]byte, 100), "cd00e292c5970d3c5e2f0ffa5171e555bc46bfc4faddfb4a418b6840b86e79a3"}, // 100 zero bytes
}
for _, c := range cases {
data, om := setupTest(t)
writer := om.NewWriter(ctx, WriterOptions{})
if _, err := writer.Write(c.data); err != nil {
t.Errorf("write error: %v", err)
}
result, err := writer.Result()
if err != nil {
t.Errorf("error getting writer results for %v, expected: %v", c.data, c.objectID.String())
continue
}
if !objectIDsEqual(result, c.objectID) {
t.Errorf("incorrect result for %v, expected: %v got: %v", c.data, c.objectID.String(), result.String())
}
if _, ok := c.objectID.BlockID(); !ok {
if len(data) != 0 {
t.Errorf("unexpected data written to the storage: %v", data)
}
} else {
if len(data) != 1 {
// 1 data block
t.Errorf("unexpected data written to the storage: %v", data)
}
}
}
}
func objectIDsEqual(o1 ID, o2 ID) bool {
return reflect.DeepEqual(o1, o2)
}
func TestWriterCompleteChunkInTwoWrites(t *testing.T) {
ctx := context.Background()
_, om := setupTest(t)
bytes := make([]byte, 100)
writer := om.NewWriter(ctx, WriterOptions{})
writer.Write(bytes[0:50]) //nolint:errcheck
writer.Write(bytes[0:50]) //nolint:errcheck
result, err := writer.Result()
if !objectIDsEqual(result, "cd00e292c5970d3c5e2f0ffa5171e555bc46bfc4faddfb4a418b6840b86e79a3") {
t.Errorf("unexpected result: %v err: %v", result, err)
}
}
func verifyIndirectBlock(ctx context.Context, t *testing.T, r *Manager, oid ID) {
for indexBlockID, isIndirect := oid.IndexObjectID(); isIndirect; indexBlockID, isIndirect = indexBlockID.IndexObjectID() {
rd, err := r.Open(ctx, indexBlockID)
if err != nil {
t.Errorf("unable to open %v: %v", oid.String(), err)
return
}
defer rd.Close()
var ind indirectObject
if err := json.NewDecoder(rd).Decode(&ind); err != nil {
t.Errorf("cannot parse indirect stream: %v", err)
}
}
}
func TestIndirection(t *testing.T) {
ctx := context.Background()
cases := []struct {
dataLength int
expectedBlockCount int
expectedIndirection int
}{
{dataLength: 200, expectedBlockCount: 1, expectedIndirection: 0},
{dataLength: 1400, expectedBlockCount: 3, expectedIndirection: 1},
{dataLength: 2000, expectedBlockCount: 4, expectedIndirection: 2},
{dataLength: 3000, expectedBlockCount: 5, expectedIndirection: 2},
{dataLength: 4000, expectedBlockCount: 5, expectedIndirection: 2},
{dataLength: 10000, expectedBlockCount: 10, expectedIndirection: 3},
}
for _, c := range cases {
data, om := setupTest(t)
contentBytes := make([]byte, c.dataLength)
writer := om.NewWriter(ctx, WriterOptions{})
if _, err := writer.Write(contentBytes); err != nil {
t.Errorf("write error: %v", err)
}
result, err := writer.Result()
if err != nil {
t.Errorf("error getting writer results: %v", err)
}
if indirectionLevel(result) != c.expectedIndirection {
t.Errorf("incorrect indirection level for size: %v: %v, expected %v", c.dataLength, indirectionLevel(result), c.expectedIndirection)
}
if got, want := len(data), c.expectedBlockCount; got != want {
t.Errorf("unexpected block count for %v: %v, expected %v", c.dataLength, got, want)
}
l, b, err := om.VerifyObject(ctx, result)
if err != nil {
t.Errorf("error verifying %q: %v", result, err)
}
if got, want := int(l), len(contentBytes); got != want {
t.Errorf("got invalid byte count for %q: %v, wanted %v", result, got, want)
}
if got, want := len(b), c.expectedBlockCount; got != want {
t.Errorf("invalid block count for %v, got %v, wanted %v", result, got, want)
}
verifyIndirectBlock(ctx, t, om, result)
}
}
func indirectionLevel(oid ID) int {
indexObjectID, ok := oid.IndexObjectID()
if !ok {
return 0
}
return 1 + indirectionLevel(indexObjectID)
}
func TestHMAC(t *testing.T) {
ctx := context.Background()
content := bytes.Repeat([]byte{0xcd}, 50)
_, om := setupTest(t)
w := om.NewWriter(ctx, WriterOptions{})
w.Write(content) //nolint:errcheck
result, err := w.Result()
if result.String() != "cad29ff89951a3c085c86cb7ed22b82b51f7bdfda24f932c7f9601f51d5975ba" {
t.Errorf("unexpected result: %v err: %v", result.String(), err)
}
}
func TestReader(t *testing.T) {
ctx := context.Background()
data, om := setupTest(t)
storedPayload := []byte("foo\nbar")
data["a76999788386641a3ec798554f1fe7e6"] = storedPayload
cases := []struct {
text string
payload []byte
}{
{"a76999788386641a3ec798554f1fe7e6", storedPayload},
}
for _, c := range cases {
objectID, err := ParseID(c.text)
if err != nil {
t.Errorf("cannot parse object ID: %v", err)
continue
}
reader, err := om.Open(ctx, objectID)
if err != nil {
t.Errorf("cannot create reader for %v: %v", objectID, err)
continue
}
d, err := ioutil.ReadAll(reader)
if err != nil {
t.Errorf("cannot read all data for %v: %v", objectID, err)
continue
}
if !bytes.Equal(d, c.payload) {
t.Errorf("incorrect payload for %v: expected: %v got: %v", objectID, c.payload, d)
continue
}
}
}
func TestReaderStoredBlockNotFound(t *testing.T) {
ctx := context.Background()
_, om := setupTest(t)
objectID, err := ParseID("deadbeef")
if err != nil {
t.Errorf("cannot parse object ID: %v", err)
}
reader, err := om.Open(ctx, objectID)
if err != storage.ErrBlockNotFound || reader != nil {
t.Errorf("unexpected result: reader: %v err: %v", reader, err)
}
}
func TestEndToEndReadAndSeek(t *testing.T) {
ctx := context.Background()
_, om := setupTest(t)
for _, size := range []int{1, 199, 200, 201, 9999, 512434} {
// Create some random data sample of the specified size.
randomData := make([]byte, size)
cryptorand.Read(randomData) //nolint:errcheck
writer := om.NewWriter(ctx, WriterOptions{})
if _, err := writer.Write(randomData); err != nil {
t.Errorf("write error: %v", err)
}
objectID, err := writer.Result()
writer.Close()
if err != nil {
t.Errorf("cannot get writer result for %v: %v", size, err)
continue
}
verify(ctx, t, om, objectID, randomData, fmt.Sprintf("%v %v", objectID, size))
}
}
func verify(ctx context.Context, t *testing.T, om *Manager, objectID ID, expectedData []byte, testCaseID string) {
t.Helper()
reader, err := om.Open(ctx, objectID)
if err != nil {
t.Errorf("cannot get reader for %v (%v): %v %v", testCaseID, objectID, err, string(debug.Stack()))
return
}
for i := 0; i < 20; i++ {
sampleSize := int(rand.Int31n(300))
seekOffset := int(rand.Int31n(int32(len(expectedData))))
if seekOffset+sampleSize > len(expectedData) {
sampleSize = len(expectedData) - seekOffset
}
if sampleSize > 0 {
got := make([]byte, sampleSize)
if offset, err := reader.Seek(int64(seekOffset), 0); err != nil || offset != int64(seekOffset) {
t.Errorf("seek error: %v offset=%v expected:%v", err, offset, seekOffset)
}
if n, err := reader.Read(got); err != nil || n != sampleSize {
t.Errorf("invalid data: n=%v, expected=%v, err:%v", n, sampleSize, err)
}
expected := expectedData[seekOffset : seekOffset+sampleSize]
if !bytes.Equal(expected, got) {
t.Errorf("incorrect data read for %v: expected: %x, got: %x", testCaseID, expected, got)
}
}
}
}

159
object/object_reader.go Normal file
View File

@@ -0,0 +1,159 @@
package object
import (
"context"
"fmt"
"io"
)
func (i *indirectObjectEntry) endOffset() int64 {
return i.Start + i.Length
}
type objectReader struct {
ctx context.Context
repo *Manager
seekTable []indirectObjectEntry
currentPosition int64 // Overall position in the objectReader
totalLength int64 // Overall length
currentChunkIndex int // Index of current chunk in the seek table
currentChunkData []byte // Current chunk data
currentChunkPosition int // Read position in the current chunk
}
func (r *objectReader) Read(buffer []byte) (int, error) {
readBytes := 0
remaining := len(buffer)
for remaining > 0 {
if r.currentChunkData != nil {
toCopy := len(r.currentChunkData) - r.currentChunkPosition
if toCopy == 0 {
// EOF on curren chunk
r.closeCurrentChunk()
r.currentChunkIndex++
continue
}
if toCopy > remaining {
toCopy = remaining
}
copy(buffer[readBytes:],
r.currentChunkData[r.currentChunkPosition:r.currentChunkPosition+toCopy])
r.currentChunkPosition += toCopy
r.currentPosition += int64(toCopy)
readBytes += toCopy
remaining -= toCopy
} else if r.currentChunkIndex < len(r.seekTable) {
err := r.openCurrentChunk()
if err != nil {
return 0, err
}
} else {
break
}
}
if readBytes == 0 {
return readBytes, io.EOF
}
return readBytes, nil
}
func (r *objectReader) openCurrentChunk() error {
st := r.seekTable[r.currentChunkIndex]
blockData, err := r.repo.Open(r.ctx, st.Object)
if err != nil {
return err
}
defer blockData.Close() //nolint:errcheck
b := make([]byte, st.Length)
if _, err := io.ReadFull(blockData, b); err != nil {
return err
}
r.currentChunkData = b
r.currentChunkPosition = 0
return nil
}
func (r *objectReader) closeCurrentChunk() {
r.currentChunkData = nil
}
func (r *objectReader) findChunkIndexForOffset(offset int64) (int, error) {
left := 0
right := len(r.seekTable) - 1
for left <= right {
middle := (left + right) / 2
if offset < r.seekTable[middle].Start {
right = middle - 1
continue
}
if offset >= r.seekTable[middle].endOffset() {
left = middle + 1
continue
}
return middle, nil
}
return 0, fmt.Errorf("can't find chunk for offset %v", offset)
}
func (r *objectReader) Seek(offset int64, whence int) (int64, error) {
if whence == 1 {
return r.Seek(r.currentPosition+offset, 0)
}
if whence == 2 {
return r.Seek(r.totalLength+offset, 0)
}
if offset < 0 {
return -1, fmt.Errorf("invalid seek %v %v", offset, whence)
}
if offset > r.totalLength {
offset = r.totalLength
}
index, err := r.findChunkIndexForOffset(offset)
if err != nil {
return -1, fmt.Errorf("invalid seek %v %v: %v", offset, whence, err)
}
chunkStartOffset := r.seekTable[index].Start
if index != r.currentChunkIndex {
r.closeCurrentChunk()
r.currentChunkIndex = index
}
if r.currentChunkData == nil {
if err := r.openCurrentChunk(); err != nil {
return 0, err
}
}
r.currentChunkPosition = int(offset - chunkStartOffset)
r.currentPosition = offset
return r.currentPosition, nil
}
func (r *objectReader) Close() error {
return nil
}
func (r *objectReader) Length() int64 {
return r.totalLength
}

110
object/object_splitter.go Normal file
View File

@@ -0,0 +1,110 @@
package object
import (
"math"
"sort"
"github.com/silvasur/buzhash"
)
type objectSplitter interface {
add(b byte) bool
}
// SupportedSplitters is a list of supported object splitters including:
//
// NEVER - prevents objects from ever splitting
// FIXED - always splits large objects exactly at the maximum block size boundary
// DYNAMIC - dynamically splits large objects based on rolling hash of contents.
var SupportedSplitters []string
var splitterFactories = map[string]func(*Format) objectSplitter{
"NEVER": func(f *Format) objectSplitter {
return newNeverSplitter()
},
"FIXED": func(f *Format) objectSplitter {
return newFixedSplitter(f.MaxBlockSize)
},
"DYNAMIC": func(f *Format) objectSplitter {
return newRollingHashSplitter(buzhash.NewBuzHash(32), f.MinBlockSize, f.AvgBlockSize, f.MaxBlockSize)
},
}
func init() {
for k := range splitterFactories {
SupportedSplitters = append(SupportedSplitters, k)
}
sort.Strings(SupportedSplitters)
}
// DefaultSplitter is the name of the splitter used by default for new repositories.
const DefaultSplitter = "DYNAMIC"
type neverSplitter struct{}
func (s *neverSplitter) add(b byte) bool {
return false
}
func newNeverSplitter() objectSplitter {
return &neverSplitter{}
}
type fixedSplitter struct {
cur int
chunkLength int
}
func (s *fixedSplitter) add(b byte) bool {
s.cur++
if s.cur >= s.chunkLength {
s.cur = 0
return true
}
return false
}
func newFixedSplitter(chunkLength int) objectSplitter {
return &fixedSplitter{chunkLength: chunkLength}
}
type rollingHash interface {
HashByte(b byte) uint32
}
type rollingHashSplitter struct {
rh rollingHash
mask uint32
currentBlockSize int
minBlockSize int
maxBlockSize int
}
func (rs *rollingHashSplitter) add(b byte) bool {
sum := rs.rh.HashByte(b)
rs.currentBlockSize++
if rs.currentBlockSize >= rs.maxBlockSize {
rs.currentBlockSize = 0
return true
}
if sum&rs.mask == 0 && rs.currentBlockSize > rs.minBlockSize && sum != 0 {
//log.Printf("splitting %v on sum %x mask %x", rs.currentBlockSize, sum, rs.mask)
rs.currentBlockSize = 0
return true
}
return false
}
func newRollingHashSplitter(rh rollingHash, minBlockSize int, approxBlockSize int, maxBlockSize int) objectSplitter {
bits := rollingHashBits(approxBlockSize)
mask := ^(^uint32(0) << bits)
return &rollingHashSplitter{rh, mask, 0, minBlockSize, maxBlockSize}
}
func rollingHashBits(n int) uint {
e := math.Log2(float64(n))
exp := math.Floor(e + 0.5)
return uint(exp)
}

View File

@@ -0,0 +1,134 @@
package object
import (
"math"
"math/rand"
"testing"
"github.com/silvasur/buzhash"
)
func TestSplitters(t *testing.T) {
cases := []struct {
desc string
newSplitter func() objectSplitter
}{
{"rolling buzhash with 3 bits", func() objectSplitter { return newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 8, 20) }},
{"rolling buzhash with 5 bits", func() objectSplitter { return newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 32, 20) }},
}
for _, tc := range cases {
s1 := tc.newSplitter()
s2 := tc.newSplitter()
rnd := make([]byte, 50000000)
rand.Read(rnd)
for i, p := range rnd {
if got, want := s1.add(p), s2.add(p); got != want {
t.Errorf("incorrect add() result for %v at offset %v", tc.desc, i)
}
}
}
}
func TestSplitterStability(t *testing.T) {
r := rand.New(rand.NewSource(5))
rnd := make([]byte, 5000000)
if n, err := r.Read(rnd); n != len(rnd) || err != nil {
t.Fatalf("can't initialize random data: %v", err)
}
cases := []struct {
splitter objectSplitter
count int
avg int
minSplit int
maxSplit int
}{
{newFixedSplitter(1000), 5000, 1000, 1000, 1000},
{newFixedSplitter(10000), 500, 10000, 10000, 10000},
{newNeverSplitter(), 0, 0, math.MaxInt32, 0},
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 32, math.MaxInt32), 156262, 31, 1, 404},
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 1024, math.MaxInt32), 4933, 1013, 1, 8372},
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 2048, math.MaxInt32), 2476, 2019, 1, 19454},
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 32768, math.MaxInt32), 185, 27027, 1, 177510},
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 65536, math.MaxInt32), 99, 50505, 418, 230449},
// min and max
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 32, 64), 179921, 27, 1, 64},
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 1024, 10000), 4933, 1013, 1, 8372},
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 2048, 10000), 2490, 2008, 1, 10000},
{newRollingHashSplitter(buzhash.NewBuzHash(32), 500, 32768, 100000), 183, 27322, 522, 100000},
{newRollingHashSplitter(buzhash.NewBuzHash(32), 500, 65536, 100000), 113, 44247, 522, 100000},
}
for _, tc := range cases {
s := tc.splitter
lastSplit := -1
maxSplit := 0
minSplit := int(math.MaxInt32)
count := 0
for i, p := range rnd {
if s.add(p) {
l := i - lastSplit
if l >= maxSplit {
maxSplit = l
}
if l < minSplit {
minSplit = l
}
count++
lastSplit = i
}
}
var avg int
if count > 0 {
avg = len(rnd) / count
}
if got, want := avg, tc.avg; got != want {
t.Errorf("invalid split average size %v, wanted %v", got, want)
}
if got, want := count, tc.count; got != want {
t.Errorf("invalid split count %v, wanted %v", got, want)
}
if got, want := minSplit, tc.minSplit; got != want {
t.Errorf("min split %v, wanted %v", got, want)
}
if got, want := maxSplit, tc.maxSplit; got != want {
t.Errorf("max split %v, wanted %v", got, want)
}
}
}
func TestRollingHashBits(t *testing.T) {
cases := []struct {
blockSize int
bits uint
}{
{256, 8},
{128, 7},
{100, 7},
{500, 9},
{700, 9},
{724, 9},
{725, 10},
{768, 10},
{1000, 10},
{1000000, 20},
{10000000, 23},
{20000000, 24},
}
for _, tc := range cases {
if got, want := rollingHashBits(tc.blockSize), tc.bits; got != want {
t.Errorf("rollingHashBits(%v) = %v, wanted %v", tc.blockSize, got, want)
}
}
}

145
object/object_writer.go Normal file
View File

@@ -0,0 +1,145 @@
package object
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"sync"
"github.com/pkg/errors"
)
// Writer allows writing content to the storage and supports automatic deduplication and encryption
// of written data.
type Writer interface {
io.WriteCloser
Result() (ID, error)
}
type blockTracker struct {
mu sync.Mutex
blocks map[string]bool
}
func (t *blockTracker) addBlock(blockID string) {
t.mu.Lock()
defer t.mu.Unlock()
if t.blocks == nil {
t.blocks = make(map[string]bool)
}
t.blocks[blockID] = true
}
func (t *blockTracker) blockIDs() []string {
t.mu.Lock()
defer t.mu.Unlock()
result := make([]string, 0, len(t.blocks))
for k := range t.blocks {
result = append(result, k)
}
return result
}
type objectWriter struct {
ctx context.Context
repo *Manager
prefix string
buffer bytes.Buffer
totalLength int64
currentPosition int64
blockIndex []indirectObjectEntry
description string
splitter objectSplitter
}
func (w *objectWriter) Close() error {
return nil
}
func (w *objectWriter) Write(data []byte) (n int, err error) {
dataLen := len(data)
w.totalLength += int64(dataLen)
for _, d := range data {
w.buffer.WriteByte(d)
if w.splitter.add(d) {
if err := w.flushBuffer(); err != nil {
return 0, err
}
}
}
return dataLen, nil
}
func (w *objectWriter) flushBuffer() error {
length := w.buffer.Len()
chunkID := len(w.blockIndex)
w.blockIndex = append(w.blockIndex, indirectObjectEntry{})
w.blockIndex[chunkID].Start = w.currentPosition
w.blockIndex[chunkID].Length = int64(length)
w.currentPosition += int64(length)
var b2 bytes.Buffer
w.buffer.WriteTo(&b2) //nolint:errcheck
w.buffer.Reset()
blockID, err := w.repo.blockMgr.WriteBlock(w.ctx, b2.Bytes(), w.prefix)
w.repo.trace("OBJECT_WRITER(%q) stored %v (%v bytes)", w.description, blockID, length)
if err != nil {
return fmt.Errorf("error when flushing chunk %d of %s: %v", chunkID, w.description, err)
}
w.blockIndex[chunkID].Object = DirectObjectID(blockID)
return nil
}
func (w *objectWriter) Result() (ID, error) {
if w.buffer.Len() > 0 || len(w.blockIndex) == 0 {
if err := w.flushBuffer(); err != nil {
return "", err
}
}
if len(w.blockIndex) == 1 {
return w.blockIndex[0].Object, nil
}
iw := &objectWriter{
ctx: w.ctx,
repo: w.repo,
description: "LIST(" + w.description + ")",
splitter: w.repo.newSplitter(),
prefix: w.prefix,
}
ind := indirectObject{
StreamID: "kopia:indirect",
Entries: w.blockIndex,
}
if err := json.NewEncoder(iw).Encode(ind); err != nil {
return "", errors.Wrap(err, "unable to write indirect block index")
}
oid, err := iw.Result()
if err != nil {
return "", err
}
return IndirectObjectID(oid), nil
}
// WriterOptions can be passed to Repository.NewWriter()
type WriterOptions struct {
Description string
Prefix string // empty string or a single-character ('g'..'z')
}

94
object/objectid.go Normal file
View File

@@ -0,0 +1,94 @@
package object
import (
"encoding/hex"
"fmt"
"strings"
)
// ID is an identifier of a repository object. Repository objects can be stored.
//
// 1. In a single content block, this is the most common case for small objects.
// 2. In a series of content blocks with an indirect block pointing at them (multiple indirections are allowed).
// This is used for larger files. Object IDs using indirect blocks start with "I"
type ID string
// HasObjectID exposes the identifier of an object.
type HasObjectID interface {
ObjectID() ID
}
// String returns string representation of ObjectID that is suitable for displaying in the UI.
func (i ID) String() string {
return strings.Replace(string(i), "D", "", -1)
}
// IndexObjectID returns the object ID of the underlying index object.
func (i ID) IndexObjectID() (ID, bool) {
if strings.HasPrefix(string(i), "I") {
return i[1:], true
}
return "", false
}
// BlockID returns the block ID of the underlying content storage block.
func (i ID) BlockID() (string, bool) {
if strings.HasPrefix(string(i), "D") {
return string(i[1:]), true
}
if strings.HasPrefix(string(i), "I") {
return "", false
}
return string(i), true
}
// Validate checks the ID format for validity and reports any errors.
func (i ID) Validate() error {
if indexObjectID, ok := i.IndexObjectID(); ok {
if err := indexObjectID.Validate(); err != nil {
return fmt.Errorf("invalid indirect object ID %v: %v", i, err)
}
return nil
}
if blockID, ok := i.BlockID(); ok {
if len(blockID) < 2 {
return fmt.Errorf("missing block ID")
}
// odd length - firstcharacter must be a single character between 'g' and 'z'
if len(blockID)%2 == 1 {
if blockID[0] < 'g' || blockID[0] > 'z' {
return fmt.Errorf("invalid block ID prefix: %v", blockID)
}
blockID = blockID[1:]
}
if _, err := hex.DecodeString(blockID); err != nil {
return fmt.Errorf("invalid blockID suffix, must be base-16 encoded: %v", blockID)
}
return nil
}
return fmt.Errorf("invalid object ID: %v", i)
}
// DirectObjectID returns direct object ID based on the provided block ID.
func DirectObjectID(blockID string) ID {
return ID(blockID)
}
// IndirectObjectID returns indirect object ID based on the underlying index object ID.
func IndirectObjectID(indexObjectID ID) ID {
return "I" + indexObjectID
}
// ParseID converts the specified string into object ID
func ParseID(s string) (ID, error) {
i := ID(s)
return i, i.Validate()
}

46
object/objectid_test.go Normal file
View File

@@ -0,0 +1,46 @@
package object
import (
"testing"
)
func TestParseObjectID(t *testing.T) {
cases := []struct {
text string
isValid bool
}{
{"Df0f0", true},
{"IDf0f0", true},
{"IDf0f0", true},
{"IIDf0f0", true},
{"Dxf0f0", true},
{"IDxf0f0", true},
{"IDxf0f0", true},
{"IIDxf0f0", true},
{"Dxf0f", false},
{"IDxf0f", false},
{"Da", false},
{"Daf0f0", false},
{"", false},
{"B!$@#$!@#$", false},
{"X", false},
{"I.", false},
{"I.x", false},
{"I.af", false},
{"Ix.ag", false},
{"Iab.", false},
{"I1", false},
{"I1,", false},
{"I-1,X", false},
{"Xsomething", false},
}
for _, tc := range cases {
_, err := ParseID(tc.text)
if err != nil && tc.isValid {
t.Errorf("error parsing %q: %v", tc.text, err)
} else if err == nil && !tc.isValid {
t.Errorf("unexpected success parsing %v", tc.text)
}
}
}

209
open.go Normal file
View File

@@ -0,0 +1,209 @@
package repo
import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"path/filepath"
"github.com/kopia/repo/block"
"github.com/kopia/repo/internal/repologging"
"github.com/kopia/repo/manifest"
"github.com/kopia/repo/object"
"github.com/kopia/repo/storage"
"github.com/kopia/repo/storage/logging"
"github.com/pkg/errors"
)
var (
log = repologging.Logger("kopia/repo")
)
// Options provides configuration parameters for connection to a repository.
type Options struct {
TraceStorage func(f string, args ...interface{}) // Logs all storage access using provided Printf-style function
ObjectManagerOptions object.ManagerOptions
}
// Open opens a Repository specified in the configuration file.
func Open(ctx context.Context, configFile string, password string, options *Options) (rep *Repository, err error) {
log.Debugf("opening repository from %v", configFile)
defer func() {
if err == nil {
log.Debugf("opened repository")
} else {
log.Errorf("failed to open repository: %v", err)
}
}()
if options == nil {
options = &Options{}
}
configFile, err = filepath.Abs(configFile)
if err != nil {
return nil, err
}
log.Debugf("loading config from file: %v", configFile)
lc, err := loadConfigFromFile(configFile)
if err != nil {
return nil, err
}
log.Debugf("opening storage: %v", lc.Storage.Type)
st, err := storage.NewStorage(ctx, lc.Storage)
if err != nil {
return nil, errors.Wrap(err, "cannot open storage")
}
if options.TraceStorage != nil {
st = logging.NewWrapper(st, logging.Prefix("[STORAGE] "), logging.Output(options.TraceStorage))
}
r, err := OpenWithConfig(ctx, st, lc, password, options, lc.Caching)
if err != nil {
st.Close(ctx) //nolint:errcheck
return nil, err
}
r.ConfigFile = configFile
return r, nil
}
// OpenWithConfig opens the repository with a given configuration, avoiding the need for a config file.
func OpenWithConfig(ctx context.Context, st storage.Storage, lc *LocalConfig, password string, options *Options, caching block.CachingOptions) (*Repository, error) {
log.Debugf("reading encrypted format block")
// Read cache block, potentially from cache.
fb, err := readAndCacheFormatBlockBytes(ctx, st, caching.CacheDirectory)
if err != nil {
return nil, errors.Wrap(err, "unable to read format block")
}
f, err := parseFormatBlock(fb)
if err != nil {
return nil, errors.Wrap(err, "can't parse format block")
}
fb, err = addFormatBlockChecksumAndLength(fb)
if err != nil {
return nil, fmt.Errorf("unable to add checksum")
}
masterKey, err := f.deriveMasterKeyFromPassword(password)
if err != nil {
return nil, err
}
repoConfig, err := f.decryptFormatBytes(masterKey)
if err != nil {
return nil, errors.Wrap(err, "unable to decrypt repository config")
}
caching.HMACSecret = deriveKeyFromMasterKey(masterKey, f.UniqueID, []byte("local-cache-integrity"), 16)
fo := repoConfig.FormattingOptions
if fo.MaxPackSize == 0 {
fo.MaxPackSize = repoConfig.MaxBlockSize
}
log.Debugf("initializing block manager")
bm, err := block.NewManager(ctx, st, fo, caching, fb)
if err != nil {
return nil, errors.Wrap(err, "unable to open block manager")
}
log.Debugf("initializing object manager")
om, err := object.NewObjectManager(ctx, bm, repoConfig.Format, options.ObjectManagerOptions)
if err != nil {
return nil, errors.Wrap(err, "unable to open object manager")
}
log.Debugf("initializing manifest manager")
manifests, err := manifest.NewManager(ctx, bm)
if err != nil {
return nil, errors.Wrap(err, "unable to open manifests")
}
return &Repository{
Blocks: bm,
Objects: om,
Storage: st,
Manifests: manifests,
CacheDirectory: caching.CacheDirectory,
UniqueID: f.UniqueID,
formatBlock: f,
masterKey: masterKey,
}, nil
}
// SetCachingConfig changes caching configuration for a given repository config file.
func SetCachingConfig(ctx context.Context, configFile string, opt block.CachingOptions) error {
configFile, err := filepath.Abs(configFile)
if err != nil {
return err
}
lc, err := loadConfigFromFile(configFile)
if err != nil {
return err
}
st, err := storage.NewStorage(ctx, lc.Storage)
if err != nil {
return errors.Wrap(err, "cannot open storage")
}
fb, err := readAndCacheFormatBlockBytes(ctx, st, "")
if err != nil {
return errors.Wrap(err, "can't read format block")
}
f, err := parseFormatBlock(fb)
if err != nil {
return errors.Wrap(err, "can't parse format block")
}
if err = setupCaching(configFile, lc, opt, f.UniqueID); err != nil {
return errors.Wrap(err, "unable to set up caching")
}
d, err := json.MarshalIndent(&lc, "", " ")
if err != nil {
return err
}
if err := ioutil.WriteFile(configFile, d, 0600); err != nil {
return nil
}
return nil
}
func readAndCacheFormatBlockBytes(ctx context.Context, st storage.Storage, cacheDirectory string) ([]byte, error) {
cachedFile := filepath.Join(cacheDirectory, "kopia.repository")
if cacheDirectory != "" {
b, err := ioutil.ReadFile(cachedFile)
if err == nil {
// read from cache.
return b, nil
}
}
b, err := st.GetBlock(ctx, FormatBlockID, 0, -1)
if err != nil {
return nil, err
}
if cacheDirectory != "" {
if err := ioutil.WriteFile(cachedFile, b, 0600); err != nil {
log.Warningf("warning: unable to write cache: %v", err)
}
}
return b, nil
}

87
repository.go Normal file
View File

@@ -0,0 +1,87 @@
package repo
import (
"context"
"time"
"github.com/kopia/repo/block"
"github.com/kopia/repo/manifest"
"github.com/kopia/repo/object"
"github.com/kopia/repo/storage"
"github.com/pkg/errors"
)
// Repository represents storage where both content-addressable and user-addressable data is kept.
type Repository struct {
Blocks *block.Manager
Objects *object.Manager
Storage storage.Storage
Manifests *manifest.Manager
UniqueID []byte
ConfigFile string
CacheDirectory string
formatBlock *formatBlock
masterKey []byte
}
// Close closes the repository and releases all resources.
func (r *Repository) Close(ctx context.Context) error {
if err := r.Manifests.Flush(ctx); err != nil {
return errors.Wrap(err, "error flushing manifests")
}
if err := r.Blocks.Flush(ctx); err != nil {
return errors.Wrap(err, "error closing blocks")
}
if err := r.Storage.Close(ctx); err != nil {
return errors.Wrap(err, "error closing storage")
}
return nil
}
// Flush waits for all in-flight writes to complete.
func (r *Repository) Flush(ctx context.Context) error {
if err := r.Manifests.Flush(ctx); err != nil {
return err
}
return r.Blocks.Flush(ctx)
}
// Refresh periodically makes external changes visible to repository.
func (r *Repository) Refresh(ctx context.Context) error {
updated, err := r.Blocks.Refresh(ctx)
if err != nil {
return errors.Wrap(err, "error refreshing block index")
}
if !updated {
return nil
}
log.Debugf("block index refreshed")
if err := r.Manifests.Refresh(ctx); err != nil {
return errors.Wrap(err, "error reloading manifests")
}
log.Debugf("manifests refreshed")
return nil
}
// RefreshPeriodically periodically refreshes the repository to reflect the changes made by other hosts.
func (r *Repository) RefreshPeriodically(ctx context.Context, interval time.Duration) {
for {
select {
case <-ctx.Done():
return
case <-time.After(interval):
if err := r.Refresh(ctx); err != nil {
log.Warningf("error refreshing repository: %v", err)
}
}
}
}

328
repository_test.go Normal file
View File

@@ -0,0 +1,328 @@
package repo_test
import (
"bytes"
"context"
cryptorand "crypto/rand"
"fmt"
"io/ioutil"
"math/rand"
"reflect"
"runtime/debug"
"testing"
"github.com/kopia/repo"
"github.com/kopia/repo/block"
"github.com/kopia/repo/internal/repotesting"
"github.com/kopia/repo/object"
"github.com/kopia/repo/storage"
)
func TestWriters(t *testing.T) {
cases := []struct {
data []byte
objectID object.ID
}{
{
[]byte("the quick brown fox jumps over the lazy dog"),
"345acef0bcf82f1daf8e49fab7b7fac7ec296c518501eabea3645b99345a4e08",
},
{make([]byte, 100), "1d804f1f69df08f3f59070bf962de69433e3d61ac18522a805a84d8c92741340"}, // 100 zero bytes
}
ctx := context.Background()
for _, c := range cases {
var env repotesting.Environment
defer env.Setup(t).Close(t)
writer := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{})
if _, err := writer.Write(c.data); err != nil {
t.Fatalf("write error: %v", err)
}
result, err := writer.Result()
if err != nil {
t.Errorf("error getting writer results for %v, expected: %v", c.data, c.objectID.String())
continue
}
if !objectIDsEqual(result, c.objectID) {
t.Errorf("incorrect result for %v, expected: %v got: %v", c.data, c.objectID.String(), result.String())
}
env.Repository.Blocks.Flush(ctx)
}
}
func objectIDsEqual(o1 object.ID, o2 object.ID) bool {
return reflect.DeepEqual(o1, o2)
}
func TestWriterCompleteChunkInTwoWrites(t *testing.T) {
var env repotesting.Environment
defer env.Setup(t).Close(t)
ctx := context.Background()
bytes := make([]byte, 100)
writer := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{})
writer.Write(bytes[0:50]) //nolint:errcheck
writer.Write(bytes[0:50]) //nolint:errcheck
result, err := writer.Result()
if result != "1d804f1f69df08f3f59070bf962de69433e3d61ac18522a805a84d8c92741340" {
t.Errorf("unexpected result: %v err: %v", result, err)
}
}
func TestPackingSimple(t *testing.T) {
var env repotesting.Environment
defer env.Setup(t).Close(t)
ctx := context.Background()
content1 := "hello, how do you do?"
content2 := "hi, how are you?"
content3 := "thank you!"
oid1a := writeObject(ctx, t, env.Repository, []byte(content1), "packed-object-1a")
oid1b := writeObject(ctx, t, env.Repository, []byte(content1), "packed-object-1b")
oid2a := writeObject(ctx, t, env.Repository, []byte(content2), "packed-object-2a")
oid2b := writeObject(ctx, t, env.Repository, []byte(content2), "packed-object-2b")
oid3a := writeObject(ctx, t, env.Repository, []byte(content3), "packed-object-3a")
oid3b := writeObject(ctx, t, env.Repository, []byte(content3), "packed-object-3b")
verify(ctx, t, env.Repository, oid1a, []byte(content1), "packed-object-1")
verify(ctx, t, env.Repository, oid2a, []byte(content2), "packed-object-2")
oid2c := writeObject(ctx, t, env.Repository, []byte(content2), "packed-object-2c")
oid1c := writeObject(ctx, t, env.Repository, []byte(content1), "packed-object-1c")
env.Repository.Blocks.Flush(ctx)
if got, want := oid1a.String(), oid1b.String(); got != want {
t.Errorf("oid1a(%q) != oid1b(%q)", got, want)
}
if got, want := oid1a.String(), oid1c.String(); got != want {
t.Errorf("oid1a(%q) != oid1c(%q)", got, want)
}
if got, want := oid2a.String(), oid2b.String(); got != want {
t.Errorf("oid2(%q)a != oidb(%q)", got, want)
}
if got, want := oid2a.String(), oid2c.String(); got != want {
t.Errorf("oid2(%q)a != oidc(%q)", got, want)
}
if got, want := oid3a.String(), oid3b.String(); got != want {
t.Errorf("oid3a(%q) != oid3b(%q)", got, want)
}
env.VerifyStorageBlockCount(t, 3)
env.MustReopen(t)
verify(ctx, t, env.Repository, oid1a, []byte(content1), "packed-object-1")
verify(ctx, t, env.Repository, oid2a, []byte(content2), "packed-object-2")
verify(ctx, t, env.Repository, oid3a, []byte(content3), "packed-object-3")
if err := env.Repository.Blocks.CompactIndexes(ctx, block.CompactOptions{MinSmallBlocks: 1, MaxSmallBlocks: 1}); err != nil {
t.Errorf("optimize error: %v", err)
}
env.MustReopen(t)
verify(ctx, t, env.Repository, oid1a, []byte(content1), "packed-object-1")
verify(ctx, t, env.Repository, oid2a, []byte(content2), "packed-object-2")
verify(ctx, t, env.Repository, oid3a, []byte(content3), "packed-object-3")
if err := env.Repository.Blocks.CompactIndexes(ctx, block.CompactOptions{MinSmallBlocks: 1, MaxSmallBlocks: 1}); err != nil {
t.Errorf("optimize error: %v", err)
}
env.MustReopen(t)
verify(ctx, t, env.Repository, oid1a, []byte(content1), "packed-object-1")
verify(ctx, t, env.Repository, oid2a, []byte(content2), "packed-object-2")
verify(ctx, t, env.Repository, oid3a, []byte(content3), "packed-object-3")
}
func TestHMAC(t *testing.T) {
var env repotesting.Environment
defer env.Setup(t).Close(t)
ctx := context.Background()
content := bytes.Repeat([]byte{0xcd}, 50)
w := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{})
w.Write(content) //nolint:errcheck
result, err := w.Result()
if result.String() != "367352007ee6ca9fa755ce8352347d092c17a24077fd33c62f655574a8cf906d" {
t.Errorf("unexpected result: %v err: %v", result.String(), err)
}
}
func TestUpgrade(t *testing.T) {
var env repotesting.Environment
defer env.Setup(t).Close(t)
ctx := context.Background()
if err := env.Repository.Upgrade(ctx); err != nil {
t.Errorf("upgrade error: %v", err)
}
if err := env.Repository.Upgrade(ctx); err != nil {
t.Errorf("2nd upgrade error: %v", err)
}
}
func TestReaderStoredBlockNotFound(t *testing.T) {
var env repotesting.Environment
defer env.Setup(t).Close(t)
ctx := context.Background()
objectID, err := object.ParseID("Ddeadbeef")
if err != nil {
t.Errorf("cannot parse object ID: %v", err)
}
reader, err := env.Repository.Objects.Open(ctx, objectID)
if err != storage.ErrBlockNotFound || reader != nil {
t.Errorf("unexpected result: reader: %v err: %v", reader, err)
}
}
func TestEndToEndReadAndSeek(t *testing.T) {
var env repotesting.Environment
defer env.Setup(t).Close(t)
ctx := context.Background()
for _, size := range []int{1, 199, 200, 201, 9999, 512434} {
// Create some random data sample of the specified size.
randomData := make([]byte, size)
cryptorand.Read(randomData) //nolint:errcheck
writer := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{})
writer.Write(randomData) //nolint:errcheck
objectID, err := writer.Result()
writer.Close()
if err != nil {
t.Errorf("cannot get writer result for %v: %v", size, err)
continue
}
verify(ctx, t, env.Repository, objectID, randomData, fmt.Sprintf("%v %v", objectID, size))
}
}
func writeObject(ctx context.Context, t *testing.T, rep *repo.Repository, data []byte, testCaseID string) object.ID {
w := rep.Objects.NewWriter(ctx, object.WriterOptions{})
if _, err := w.Write(data); err != nil {
t.Fatalf("can't write object %q - write failed: %v", testCaseID, err)
}
oid, err := w.Result()
if err != nil {
t.Fatalf("can't write object %q - result failed: %v", testCaseID, err)
}
return oid
}
func verify(ctx context.Context, t *testing.T, rep *repo.Repository, objectID object.ID, expectedData []byte, testCaseID string) {
t.Helper()
reader, err := rep.Objects.Open(ctx, objectID)
if err != nil {
t.Errorf("cannot get reader for %v (%v): %v %v", testCaseID, objectID, err, string(debug.Stack()))
return
}
for i := 0; i < 20; i++ {
sampleSize := int(rand.Int31n(300))
seekOffset := int(rand.Int31n(int32(len(expectedData))))
if seekOffset+sampleSize > len(expectedData) {
sampleSize = len(expectedData) - seekOffset
}
if sampleSize > 0 {
got := make([]byte, sampleSize)
if offset, err := reader.Seek(int64(seekOffset), 0); err != nil || offset != int64(seekOffset) {
t.Errorf("seek error: %v offset=%v expected:%v", err, offset, seekOffset)
}
if n, err := reader.Read(got); err != nil || n != sampleSize {
t.Errorf("invalid data: n=%v, expected=%v, err:%v", n, sampleSize, err)
}
expected := expectedData[seekOffset : seekOffset+sampleSize]
if !bytes.Equal(expected, got) {
t.Errorf("incorrect data read for %v: expected: %x, got: %x", testCaseID, expected, got)
}
}
}
}
func TestFormats(t *testing.T) {
ctx := context.Background()
makeFormat := func(hash, encryption string) func(*repo.NewRepositoryOptions) {
return func(n *repo.NewRepositoryOptions) {
n.BlockFormat.Hash = hash
n.BlockFormat.Encryption = encryption
n.BlockFormat.HMACSecret = []byte("key")
n.ObjectFormat.MaxBlockSize = 10000
n.ObjectFormat.Splitter = "FIXED"
}
}
cases := []struct {
format func(*repo.NewRepositoryOptions)
oids map[string]object.ID
}{
{
format: func(n *repo.NewRepositoryOptions) {
n.ObjectFormat.MaxBlockSize = 10000
},
oids: map[string]object.ID{
"": "b613679a0814d9ec772f95d778c35fc5ff1697c493715653c6c712144292c5ad",
"The quick brown fox jumps over the lazy dog": "fb011e6154a19b9a4c767373c305275a5a69e8b68b0b4c9200c383dced19a416",
},
},
{
format: makeFormat("HMAC-SHA256", "NONE"),
oids: map[string]object.ID{
"The quick brown fox jumps over the lazy dog": "f7bc83f430538424b13298e6aa6fb143ef4d59a14946175997479dbc2d1a3cd8",
},
},
{
format: makeFormat("HMAC-SHA256-128", "NONE"),
oids: map[string]object.ID{
"The quick brown fox jumps over the lazy dog": "f7bc83f430538424b13298e6aa6fb143",
},
},
}
for caseIndex, c := range cases {
var env repotesting.Environment
defer env.Setup(t, c.format).Close(t)
for k, v := range c.oids {
bytesToWrite := []byte(k)
w := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{})
w.Write(bytesToWrite) //nolint:errcheck
oid, err := w.Result()
if err != nil {
t.Errorf("error: %v", err)
}
if !objectIDsEqual(oid, v) {
t.Errorf("invalid oid for #%v\ngot:\n%#v\nexpected:\n%#v", caseIndex, oid.String(), v.String())
}
rc, err := env.Repository.Objects.Open(ctx, oid)
if err != nil {
t.Errorf("open failed: %v", err)
continue
}
bytesRead, err := ioutil.ReadAll(rc)
if err != nil {
t.Errorf("error reading: %v", err)
}
if !bytes.Equal(bytesRead, bytesToWrite) {
t.Errorf("data mismatch, read:%x vs written:%v", bytesRead, bytesToWrite)
}
}
}
}

47
storage/config.go Normal file
View File

@@ -0,0 +1,47 @@
package storage
import (
"encoding/json"
"fmt"
)
// ConnectionInfo represents JSON-serializable configuration of a blob storage.
type ConnectionInfo struct {
Type string
Config interface{}
}
// UnmarshalJSON parses the JSON-encoded data into ConnectionInfo.
func (c *ConnectionInfo) UnmarshalJSON(b []byte) error {
raw := struct {
Type string `json:"type"`
Data json.RawMessage `json:"config"`
}{}
if err := json.Unmarshal(b, &raw); err != nil {
return err
}
c.Type = raw.Type
f := factories[raw.Type]
if f == nil {
return fmt.Errorf("storage type '%v' not registered", raw.Type)
}
c.Config = f.defaultConfigFunc()
if err := json.Unmarshal(raw.Data, c.Config); err != nil {
return fmt.Errorf("unable to unmarshal config: %v", err)
}
return nil
}
// MarshalJSON returns JSON-encoded storage configuration.
func (c ConnectionInfo) MarshalJSON() ([]byte, error) {
return json.Marshal(struct {
Type string `json:"type"`
Data interface{} `json:"config"`
}{
Type: c.Type,
Data: c.Config,
})
}

2
storage/doc.go Normal file
View File

@@ -0,0 +1,2 @@
// Package storage implements simple storage of immutable, unstructured binary large objects (BLOBs).
package storage

View File

@@ -0,0 +1,40 @@
package filesystem
import "os"
// Options defines options for Filesystem-backed storage.
type Options struct {
Path string `json:"path"`
DirectoryShards []int `json:"dirShards"`
FileMode os.FileMode `json:"fileMode,omitempty"`
DirectoryMode os.FileMode `json:"dirMode,omitempty"`
FileUID *int `json:"uid,omitempty"`
FileGID *int `json:"gid,omitempty"`
}
func (fso *Options) fileMode() os.FileMode {
if fso.FileMode == 0 {
return fsDefaultFileMode
}
return fso.FileMode
}
func (fso *Options) dirMode() os.FileMode {
if fso.DirectoryMode == 0 {
return fsDefaultDirMode
}
return fso.DirectoryMode
}
func (fso *Options) shards() []int {
if fso.DirectoryShards == nil {
return fsDefaultShards
}
return fso.DirectoryShards
}

View File

@@ -0,0 +1,248 @@
// Package filesystem implements filesystem-based Storage.
package filesystem
import (
"context"
"fmt"
"io"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"strings"
"time"
"github.com/kopia/repo/internal/repologging"
"github.com/kopia/repo/storage"
)
var log = repologging.Logger("repo/filesystem")
const (
fsStorageType = "filesystem"
fsStorageChunkSuffix = ".f"
)
var (
fsDefaultShards = []int{3, 3}
fsDefaultFileMode os.FileMode = 0600
fsDefaultDirMode os.FileMode = 0700
)
type fsStorage struct {
Options
}
func (fs *fsStorage) GetBlock(ctx context.Context, blockID string, offset, length int64) ([]byte, error) {
_, path := fs.getShardedPathAndFilePath(blockID)
f, err := os.Open(path)
if os.IsNotExist(err) {
return nil, storage.ErrBlockNotFound
}
if err != nil {
return nil, err
}
defer f.Close() //nolint:errcheck
if length < 0 {
return ioutil.ReadAll(f)
}
if _, err = f.Seek(offset, io.SeekStart); err != nil {
return nil, err
}
b, err := ioutil.ReadAll(io.LimitReader(f, length))
if err != nil {
return nil, err
}
if int64(len(b)) != length {
return nil, fmt.Errorf("invalid length")
}
return b, nil
}
func getstringFromFileName(name string) (string, bool) {
if strings.HasSuffix(name, fsStorageChunkSuffix) {
return name[0 : len(name)-len(fsStorageChunkSuffix)], true
}
return string(""), false
}
func makeFileName(blockID string) string {
return blockID + fsStorageChunkSuffix
}
func (fs *fsStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
var walkDir func(string, string) error
walkDir = func(directory string, currentPrefix string) error {
entries, err := ioutil.ReadDir(directory)
if err != nil {
return err
}
for _, e := range entries {
if e.IsDir() {
newPrefix := currentPrefix + e.Name()
var match bool
if len(prefix) > len(newPrefix) {
match = strings.HasPrefix(prefix, newPrefix)
} else {
match = strings.HasPrefix(newPrefix, prefix)
}
if match {
if err := walkDir(directory+"/"+e.Name(), currentPrefix+e.Name()); err != nil {
return err
}
}
} else if fullID, ok := getstringFromFileName(currentPrefix + e.Name()); ok {
if strings.HasPrefix(fullID, prefix) {
if err := callback(storage.BlockMetadata{
BlockID: fullID,
Length: e.Size(),
Timestamp: e.ModTime(),
}); err != nil {
return err
}
}
}
}
return nil
}
return walkDir(fs.Path, "")
}
// TouchBlock updates file modification time to current time if it's sufficiently old.
func (fs *fsStorage) TouchBlock(ctx context.Context, blockID string, threshold time.Duration) error {
_, path := fs.getShardedPathAndFilePath(blockID)
st, err := os.Stat(path)
if err != nil {
return err
}
n := time.Now()
age := n.Sub(st.ModTime())
if age < threshold {
return nil
}
log.Debugf("updating timestamp on %v to %v", path, n)
return os.Chtimes(path, n, n)
}
func (fs *fsStorage) PutBlock(ctx context.Context, blockID string, data []byte) error {
_, path := fs.getShardedPathAndFilePath(blockID)
tempFile := fmt.Sprintf("%s.tmp.%d", path, rand.Int())
f, err := fs.createTempFileAndDir(tempFile)
if err != nil {
return fmt.Errorf("cannot create temporary file: %v", err)
}
if _, err = f.Write(data); err != nil {
return fmt.Errorf("can't write temporary file: %v", err)
}
if err = f.Close(); err != nil {
return fmt.Errorf("can't close temporary file: %v", err)
}
err = os.Rename(tempFile, path)
if err != nil {
if removeErr := os.Remove(tempFile); removeErr != nil {
log.Warningf("can't remove temp file: %v", removeErr)
}
return err
}
if fs.FileUID != nil && fs.FileGID != nil && os.Geteuid() == 0 {
if chownErr := os.Chown(path, *fs.FileUID, *fs.FileGID); chownErr != nil {
log.Warningf("can't change file permissions: %v", chownErr)
}
}
return nil
}
func (fs *fsStorage) createTempFileAndDir(tempFile string) (*os.File, error) {
flags := os.O_CREATE | os.O_WRONLY | os.O_EXCL
f, err := os.OpenFile(tempFile, flags, fs.fileMode())
if os.IsNotExist(err) {
if err = os.MkdirAll(filepath.Dir(tempFile), fs.dirMode()); err != nil {
return nil, fmt.Errorf("cannot create directory: %v", err)
}
return os.OpenFile(tempFile, flags, fs.fileMode())
}
return f, err
}
func (fs *fsStorage) DeleteBlock(ctx context.Context, blockID string) error {
_, path := fs.getShardedPathAndFilePath(blockID)
err := os.Remove(path)
if err == nil || os.IsNotExist(err) {
return nil
}
return err
}
func (fs *fsStorage) getShardDirectory(blockID string) (string, string) {
shardPath := fs.Path
if len(blockID) < 20 {
return shardPath, blockID
}
for _, size := range fs.shards() {
shardPath = filepath.Join(shardPath, blockID[0:size])
blockID = blockID[size:]
}
return shardPath, blockID
}
func (fs *fsStorage) getShardedPathAndFilePath(blockID string) (string, string) {
shardPath, blockID := fs.getShardDirectory(blockID)
result := filepath.Join(shardPath, makeFileName(blockID))
return shardPath, result
}
func (fs *fsStorage) ConnectionInfo() storage.ConnectionInfo {
return storage.ConnectionInfo{
Type: fsStorageType,
Config: &fs.Options,
}
}
func (fs *fsStorage) Close(ctx context.Context) error {
return nil
}
// New creates new filesystem-backed storage in a specified directory.
func New(ctx context.Context, opts *Options) (storage.Storage, error) {
var err error
if _, err = os.Stat(opts.Path); err != nil {
return nil, fmt.Errorf("cannot access storage path: %v", err)
}
r := &fsStorage{
Options: *opts,
}
return r, nil
}
func init() {
storage.AddSupportedStorage(
fsStorageType,
func() interface{} { return &Options{} },
func(ctx context.Context, o interface{}) (storage.Storage, error) {
return New(ctx, o.(*Options))
})
}

View File

@@ -0,0 +1,120 @@
package filesystem
import (
"context"
"io/ioutil"
"os"
"reflect"
"sort"
"testing"
"time"
"github.com/kopia/repo/storage"
"github.com/kopia/repo/internal/storagetesting"
)
func TestFileStorage(t *testing.T) {
t.Parallel()
ctx := context.Background()
// Test varioush shard configurations.
for _, shardSpec := range [][]int{
{0},
{1},
{3, 3},
{2},
{1, 1},
{1, 2},
{2, 2, 2},
} {
path, _ := ioutil.TempDir("", "r-fs")
defer os.RemoveAll(path)
r, err := New(ctx, &Options{
Path: path,
DirectoryShards: shardSpec,
})
if r == nil || err != nil {
t.Errorf("unexpected result: %v %v", r, err)
}
storagetesting.VerifyStorage(ctx, t, r)
storagetesting.AssertConnectionInfoRoundTrips(ctx, t, r)
if err := r.Close(ctx); err != nil {
t.Fatalf("err: %v", err)
}
}
}
func TestFileStorageTouch(t *testing.T) {
t.Parallel()
ctx := context.Background()
t1 := "392ee1bc299db9f235e046a62625afb84902"
t2 := "2a7ff4f29eddbcd4c18fa9e73fec20bbb71f"
t3 := "0dae5918f83e6a24c8b3e274ca1026e43f24"
path, _ := ioutil.TempDir("", "r-fs")
defer os.RemoveAll(path)
r, err := New(ctx, &Options{
Path: path,
})
if r == nil || err != nil {
t.Errorf("unexpected result: %v %v", r, err)
}
fs := r.(*fsStorage)
assertNoError(t, fs.PutBlock(ctx, t1, []byte{1}))
time.Sleep(1 * time.Second) // sleep a bit to accommodate Apple filesystems with low timestamp resolution
assertNoError(t, fs.PutBlock(ctx, t2, []byte{1}))
time.Sleep(1 * time.Second)
assertNoError(t, fs.PutBlock(ctx, t3, []byte{1}))
verifyBlockTimestampOrder(t, fs, t1, t2, t3)
assertNoError(t, fs.TouchBlock(ctx, t2, 1*time.Hour)) // has no effect, all timestamps are very new
verifyBlockTimestampOrder(t, fs, t1, t2, t3)
assertNoError(t, fs.TouchBlock(ctx, t1, 0)) // moves t1 to the top of the pile
verifyBlockTimestampOrder(t, fs, t2, t3, t1)
time.Sleep(1 * time.Second)
assertNoError(t, fs.TouchBlock(ctx, t2, 0)) // moves t2 to the top of the pile
verifyBlockTimestampOrder(t, fs, t3, t1, t2)
time.Sleep(1 * time.Second)
assertNoError(t, fs.TouchBlock(ctx, t1, 0)) // moves t1 to the top of the pile
verifyBlockTimestampOrder(t, fs, t3, t2, t1)
}
func verifyBlockTimestampOrder(t *testing.T, st storage.Storage, want ...string) {
blocks, err := storage.ListAllBlocks(context.Background(), st, "")
if err != nil {
t.Errorf("error listing blocks: %v", err)
return
}
sort.Slice(blocks, func(i, j int) bool {
return blocks[i].Timestamp.Before(blocks[j].Timestamp)
})
var got []string
for _, b := range blocks {
got = append(got, b.BlockID)
}
if !reflect.DeepEqual(got, want) {
t.Errorf("incorrect block order: %v, wanted %v", blocks, want)
}
}
func assertNoError(t *testing.T, err error) {
t.Helper()
if err != nil {
t.Errorf("err: %v", err)
}
}

View File

@@ -0,0 +1,20 @@
package gcs
// Options defines options Google Cloud Storage-backed storage.
type Options struct {
// BucketName is the name of the GCS bucket where data is stored.
BucketName string `json:"bucket"`
// Prefix specifies additional string to prepend to all objects.
Prefix string `json:"prefix,omitempty"`
// ServiceAccountCredentials specifies the name of the file with GCS credentials.
ServiceAccountCredentials string `json:"credentialsFile,omitempty"`
// ReadOnly causes GCS connection to be opened with read-only scope to prevent accidental mutations.
ReadOnly bool `json:"readOnly,omitempty"`
MaxUploadSpeedBytesPerSecond int `json:"maxUploadSpeedBytesPerSecond,omitempty"`
MaxDownloadSpeedBytesPerSecond int `json:"maxDownloadSpeedBytesPerSecond,omitempty"`
}

270
storage/gcs/gcs_storage.go Normal file
View File

@@ -0,0 +1,270 @@
// Package gcs implements Storage based on Google Cloud Storage bucket.
package gcs
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/ioutil"
"google.golang.org/api/googleapi"
"github.com/efarrer/iothrottler"
"github.com/kopia/repo/internal/retry"
"github.com/kopia/repo/internal/throttle"
"github.com/kopia/repo/storage"
"golang.org/x/oauth2"
"golang.org/x/oauth2/google"
"google.golang.org/api/iterator"
"google.golang.org/api/option"
gcsclient "cloud.google.com/go/storage"
)
const (
gcsStorageType = "gcs"
)
type gcsStorage struct {
Options
ctx context.Context
storageClient *gcsclient.Client
bucket *gcsclient.BucketHandle
downloadThrottler *iothrottler.IOThrottlerPool
uploadThrottler *iothrottler.IOThrottlerPool
}
func (gcs *gcsStorage) GetBlock(ctx context.Context, b string, offset, length int64) ([]byte, error) {
if offset < 0 {
return nil, fmt.Errorf("invalid offset")
}
attempt := func() (interface{}, error) {
reader, err := gcs.bucket.Object(gcs.getObjectNameString(b)).NewRangeReader(gcs.ctx, offset, length)
if err != nil {
return nil, err
}
defer reader.Close() //nolint:errcheck
return ioutil.ReadAll(reader)
}
v, err := exponentialBackoff(fmt.Sprintf("GetBlock(%q,%v,%v)", b, offset, length), attempt)
if err != nil {
return nil, translateError(err)
}
fetched := v.([]byte)
if len(fetched) != int(length) && length >= 0 {
return nil, fmt.Errorf("invalid offset/length")
}
return fetched, nil
}
func exponentialBackoff(desc string, att retry.AttemptFunc) (interface{}, error) {
return retry.WithExponentialBackoff(desc, att, isRetriableError)
}
func isRetriableError(err error) bool {
if apiError, ok := err.(*googleapi.Error); ok {
return apiError.Code >= 500
}
switch err {
case nil:
return false
case gcsclient.ErrObjectNotExist:
return false
case gcsclient.ErrBucketNotExist:
return false
default:
return true
}
}
func translateError(err error) error {
switch err {
case nil:
return nil
case gcsclient.ErrObjectNotExist:
return storage.ErrBlockNotFound
case gcsclient.ErrBucketNotExist:
return storage.ErrBlockNotFound
default:
return fmt.Errorf("unexpected GCS error: %v", err)
}
}
func (gcs *gcsStorage) PutBlock(ctx context.Context, b string, data []byte) error {
ctx, cancel := context.WithCancel(ctx)
obj := gcs.bucket.Object(gcs.getObjectNameString(b))
writer := obj.NewWriter(ctx)
writer.ChunkSize = 1 << 20
writer.ContentType = "application/x-kopia"
progressCallback := storage.ProgressCallback(ctx)
if progressCallback != nil {
progressCallback(b, 0, int64(len(data)))
defer progressCallback(b, int64(len(data)), int64(len(data)))
writer.ProgressFunc = func(completed int64) {
if completed != int64(len(data)) {
progressCallback(b, completed, int64(len(data)))
}
}
}
_, err := io.Copy(writer, bytes.NewReader(data))
if err != nil {
// cancel context before closing the writer causes it to abandon the upload.
cancel()
writer.Close() //nolint:errcheck
return translateError(err)
}
defer cancel()
// calling close before cancel() causes it to commit the upload.
return translateError(writer.Close())
}
func (gcs *gcsStorage) DeleteBlock(ctx context.Context, b string) error {
attempt := func() (interface{}, error) {
return nil, gcs.bucket.Object(gcs.getObjectNameString(b)).Delete(gcs.ctx)
}
_, err := exponentialBackoff(fmt.Sprintf("DeleteBlock(%q)", b), attempt)
err = translateError(err)
if err == storage.ErrBlockNotFound {
return nil
}
return err
}
func (gcs *gcsStorage) getObjectNameString(blockID string) string {
return gcs.Prefix + blockID
}
func (gcs *gcsStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
lst := gcs.bucket.Objects(gcs.ctx, &gcsclient.Query{
Prefix: gcs.getObjectNameString(prefix),
})
oa, err := lst.Next()
for err == nil {
if err = callback(storage.BlockMetadata{
BlockID: oa.Name[len(gcs.Prefix):],
Length: oa.Size,
Timestamp: oa.Created,
}); err != nil {
return err
}
oa, err = lst.Next()
}
if err != iterator.Done {
return err
}
return nil
}
func (gcs *gcsStorage) ConnectionInfo() storage.ConnectionInfo {
return storage.ConnectionInfo{
Type: gcsStorageType,
Config: &gcs.Options,
}
}
func (gcs *gcsStorage) Close(ctx context.Context) error {
gcs.storageClient.Close() //nolint:errcheck
return nil
}
func toBandwidth(bytesPerSecond int) iothrottler.Bandwidth {
if bytesPerSecond <= 0 {
return iothrottler.Unlimited
}
return iothrottler.Bandwidth(bytesPerSecond) * iothrottler.BytesPerSecond
}
func tokenSourceFromCredentialsFile(ctx context.Context, fn string, scopes ...string) (oauth2.TokenSource, error) {
data, err := ioutil.ReadFile(fn)
if err != nil {
return nil, err
}
cfg, err := google.JWTConfigFromJSON(data, scopes...)
if err != nil {
return nil, fmt.Errorf("google.JWTConfigFromJSON: %v", err)
}
return cfg.TokenSource(ctx), nil
}
// New creates new Google Cloud Storage-backed storage with specified options:
//
// - the 'BucketName' field is required and all other parameters are optional.
//
// By default the connection reuses credentials managed by (https://cloud.google.com/sdk/),
// but this can be disabled by setting IgnoreDefaultCredentials to true.
func New(ctx context.Context, opt *Options) (storage.Storage, error) {
var ts oauth2.TokenSource
var err error
scope := gcsclient.ScopeReadWrite
if opt.ReadOnly {
scope = gcsclient.ScopeReadOnly
}
if sa := opt.ServiceAccountCredentials; sa != "" {
ts, err = tokenSourceFromCredentialsFile(ctx, sa, scope)
} else {
ts, err = google.DefaultTokenSource(ctx, scope)
}
if err != nil {
return nil, err
}
downloadThrottler := iothrottler.NewIOThrottlerPool(toBandwidth(opt.MaxDownloadSpeedBytesPerSecond))
uploadThrottler := iothrottler.NewIOThrottlerPool(toBandwidth(opt.MaxUploadSpeedBytesPerSecond))
hc := oauth2.NewClient(ctx, ts)
hc.Transport = throttle.NewRoundTripper(hc.Transport, downloadThrottler, uploadThrottler)
cli, err := gcsclient.NewClient(ctx, option.WithHTTPClient(hc))
if err != nil {
return nil, err
}
if opt.BucketName == "" {
return nil, errors.New("bucket name must be specified")
}
return &gcsStorage{
Options: *opt,
ctx: ctx,
storageClient: cli,
bucket: cli.Bucket(opt.BucketName),
downloadThrottler: downloadThrottler,
uploadThrottler: uploadThrottler,
}, nil
}
func init() {
storage.AddSupportedStorage(
gcsStorageType,
func() interface{} {
return &Options{}
},
func(ctx context.Context, o interface{}) (storage.Storage, error) {
return New(ctx, o.(*Options))
})
}

View File

@@ -0,0 +1,75 @@
package gcs_test
import (
"context"
"os"
"testing"
"github.com/kopia/repo/internal/storagetesting"
"github.com/kopia/repo/storage"
"github.com/kopia/repo/storage/gcs"
)
func TestGCSStorage(t *testing.T) {
bucket := os.Getenv("KOPIA_GCS_TEST_BUCKET")
if bucket == "" {
t.Skip("KOPIA_GCS_TEST_BUCKET not provided")
}
credsFile := os.Getenv("KOPIA_GCS_CREDENTIALS_FILE")
if _, err := os.Stat(credsFile); err != nil {
t.Skip("skipping test because GCS credentials file can't be opened")
}
ctx := context.Background()
st, err := gcs.New(ctx, &gcs.Options{
BucketName: bucket,
ServiceAccountCredentials: credsFile,
})
if err != nil {
t.Fatalf("unable to connect to GCS: %v", err)
}
if err := st.ListBlocks(ctx, "", func(bm storage.BlockMetadata) error {
return st.DeleteBlock(ctx, bm.BlockID)
}); err != nil {
t.Fatalf("unable to clear GCS bucket: %v", err)
}
storagetesting.VerifyStorage(ctx, t, st)
storagetesting.AssertConnectionInfoRoundTrips(ctx, t, st)
// delete everything again
if err := st.ListBlocks(ctx, "", func(bm storage.BlockMetadata) error {
return st.DeleteBlock(ctx, bm.BlockID)
}); err != nil {
t.Fatalf("unable to clear GCS bucket: %v", err)
}
if err := st.Close(ctx); err != nil {
t.Fatalf("err: %v", err)
}
}
func TestGCSStorageInvalid(t *testing.T) {
bucket := os.Getenv("KOPIA_GCS_TEST_BUCKET")
if bucket == "" {
t.Skip("KOPIA_GCS_TEST_BUCKET not provided")
}
ctx := context.Background()
st, err := gcs.New(ctx, &gcs.Options{
BucketName: bucket + "-no-such-bucket",
ServiceAccountCredentials: os.Getenv("KOPIA_GCS_CREDENTIALS_FILE"),
})
if err != nil {
t.Fatalf("unable to connect to GCS: %v", err)
}
defer st.Close(ctx)
if err := st.PutBlock(ctx, "xxx", []byte{1, 2, 3}); err == nil {
t.Errorf("unexpecte success when adding to non-existent bucket")
}
}

View File

@@ -0,0 +1,96 @@
// Package logging implements wrapper around Storage that logs all activity.
package logging
import (
"context"
"time"
"github.com/kopia/repo/internal/repologging"
"github.com/kopia/repo/storage"
)
var log = repologging.Logger("repo/storage")
type loggingStorage struct {
base storage.Storage
printf func(string, ...interface{})
prefix string
}
func (s *loggingStorage) GetBlock(ctx context.Context, id string, offset, length int64) ([]byte, error) {
t0 := time.Now()
result, err := s.base.GetBlock(ctx, id, offset, length)
dt := time.Since(t0)
if len(result) < 20 {
s.printf(s.prefix+"GetBlock(%q,%v,%v)=(%#v, %#v) took %v", id, offset, length, result, err, dt)
} else {
s.printf(s.prefix+"GetBlock(%q,%v,%v)=({%#v bytes}, %#v) took %v", id, offset, length, len(result), err, dt)
}
return result, err
}
func (s *loggingStorage) PutBlock(ctx context.Context, id string, data []byte) error {
t0 := time.Now()
err := s.base.PutBlock(ctx, id, data)
dt := time.Since(t0)
s.printf(s.prefix+"PutBlock(%q,len=%v)=%#v took %v", id, len(data), err, dt)
return err
}
func (s *loggingStorage) DeleteBlock(ctx context.Context, id string) error {
t0 := time.Now()
err := s.base.DeleteBlock(ctx, id)
dt := time.Since(t0)
s.printf(s.prefix+"DeleteBlock(%q)=%#v took %v", id, err, dt)
return err
}
func (s *loggingStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
t0 := time.Now()
cnt := 0
err := s.base.ListBlocks(ctx, prefix, func(bi storage.BlockMetadata) error {
cnt++
return callback(bi)
})
s.printf(s.prefix+"ListBlocks(%q)=%v returned %v items and took %v", prefix, err, cnt, time.Since(t0))
return err
}
func (s *loggingStorage) Close(ctx context.Context) error {
t0 := time.Now()
err := s.base.Close(ctx)
dt := time.Since(t0)
s.printf(s.prefix+"Close()=%#v took %v", err, dt)
return err
}
func (s *loggingStorage) ConnectionInfo() storage.ConnectionInfo {
return s.base.ConnectionInfo()
}
// Option modifies the behavior of logging storage wrapper.
type Option func(s *loggingStorage)
// NewWrapper returns a Storage wrapper that logs all storage commands.
func NewWrapper(wrapped storage.Storage, options ...Option) storage.Storage {
s := &loggingStorage{base: wrapped, printf: log.Debugf}
for _, o := range options {
o(s)
}
return s
}
// Output is a logging storage option that causes all output to be sent to a given function instead of log.Printf()
func Output(outputFunc func(fmt string, args ...interface{})) Option {
return func(s *loggingStorage) {
s.printf = outputFunc
}
}
// Prefix specifies prefix to be prepended to all log output.
func Prefix(prefix string) Option {
return func(s *loggingStorage) {
s.prefix = prefix
}
}

View File

@@ -0,0 +1,39 @@
package logging
import (
"context"
"strings"
"testing"
"github.com/kopia/repo/internal/storagetesting"
)
func TestLoggingStorage(t *testing.T) {
var outputCount int
myPrefix := "myprefix"
myOutput := func(msg string, args ...interface{}) {
if !strings.HasPrefix(msg, myPrefix) {
t.Errorf("unexpected prefix %v", msg)
}
outputCount++
}
data := map[string][]byte{}
underlying := storagetesting.NewMapStorage(data, nil, nil)
st := NewWrapper(underlying, Output(myOutput), Prefix(myPrefix))
if st == nil {
t.Fatalf("unexpected result: %v", st)
}
ctx := context.Background()
storagetesting.VerifyStorage(ctx, t, st)
if err := st.Close(ctx); err != nil {
t.Fatalf("err: %v", err)
}
if outputCount == 0 {
t.Errorf("did not write any output!")
}
if got, want := st.ConnectionInfo().Type, underlying.ConnectionInfo().Type; got != want {
t.Errorf("unexpected connection infor %v, want %v", got, want)
}
}

21
storage/progress.go Normal file
View File

@@ -0,0 +1,21 @@
package storage
import "context"
type contextKey string
var progressCallbackContextKey contextKey = "progress-callback"
// ProgressFunc is used to report progress of a long-running storage operation.
type ProgressFunc func(desc string, completed, total int64)
// WithUploadProgressCallback returns a context that passes callback function to be used storage upload progress.
func WithUploadProgressCallback(ctx context.Context, callback ProgressFunc) context.Context {
return context.WithValue(ctx, progressCallbackContextKey, callback)
}
// ProgressCallback gets the progress callback function from the context.
func ProgressCallback(ctx context.Context) ProgressFunc {
pf, _ := ctx.Value(progressCallbackContextKey).(ProgressFunc)
return pf
}

View File

@@ -0,0 +1,8 @@
// Package providers registers all storage providers that are included as part of Kopia.
package providers
import (
// Register well-known blob storage providers
_ "github.com/kopia/repo/storage/filesystem"
_ "github.com/kopia/repo/storage/gcs"
)

39
storage/registry.go Normal file
View File

@@ -0,0 +1,39 @@
package storage
import (
"context"
"fmt"
)
var (
factories = map[string]*storageFactory{}
)
// StorageFactory allows creation of repositories in a generic way.
type storageFactory struct {
defaultConfigFunc func() interface{}
createStorageFunc func(context.Context, interface{}) (Storage, error)
}
// AddSupportedStorage registers factory function to create storage with a given type name.
func AddSupportedStorage(
urlScheme string,
defaultConfigFunc func() interface{},
createStorageFunc func(context.Context, interface{}) (Storage, error)) {
f := &storageFactory{
defaultConfigFunc: defaultConfigFunc,
createStorageFunc: createStorageFunc,
}
factories[urlScheme] = f
}
// NewStorage creates new storage based on ConnectionInfo.
// The storage type must be previously registered using AddSupportedStorage.
func NewStorage(ctx context.Context, cfg ConnectionInfo) (Storage, error) {
if factory, ok := factories[cfg.Type]; ok {
return factory.createStorageFunc(ctx, cfg.Config)
}
return nil, fmt.Errorf("unknown storage type: %s", cfg.Type)
}

20
storage/s3/s3_options.go Normal file
View File

@@ -0,0 +1,20 @@
package s3
// Options defines options for S3-based storage.
type Options struct {
// BucketName is the name of the bucket where data is stored.
BucketName string `json:"bucket"`
// Prefix specifies additional string to prepend to all objects.
Prefix string `json:"prefix,omitempty"`
Endpoint string `json:"endpoint"`
DoNotUseTLS bool `json:"doNotUseTLS,omitempyy"`
AccessKeyID string `json:"accessKeyID"`
SecretAccessKey string `json:"secretAccessKey" kopia:"sensitive"`
MaxUploadSpeedBytesPerSecond int `json:"maxUploadSpeedBytesPerSecond,omitempty"`
MaxDownloadSpeedBytesPerSecond int `json:"maxDownloadSpeedBytesPerSecond,omitempty"`
}

244
storage/s3/s3_storage.go Normal file
View File

@@ -0,0 +1,244 @@
// Package s3 implements Storage based on an S3 bucket.
package s3
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/ioutil"
"github.com/efarrer/iothrottler"
"github.com/kopia/repo/internal/retry"
"github.com/kopia/repo/storage"
"github.com/minio/minio-go"
)
const (
s3storageType = "s3"
)
type s3Storage struct {
Options
ctx context.Context
cli *minio.Client
downloadThrottler *iothrottler.IOThrottlerPool
uploadThrottler *iothrottler.IOThrottlerPool
}
func (s *s3Storage) GetBlock(ctx context.Context, b string, offset, length int64) ([]byte, error) {
attempt := func() (interface{}, error) {
var opt minio.GetObjectOptions
if length > 0 {
if err := opt.SetRange(offset, offset+length-1); err != nil {
return nil, fmt.Errorf("unable to set range: %v", err)
}
}
o, err := s.cli.GetObject(s.BucketName, s.getObjectNameString(b), opt)
if err != nil {
return 0, err
}
defer o.Close() //nolint:errcheck
throttled, err := s.downloadThrottler.AddReader(o)
if err != nil {
return nil, err
}
b, err := ioutil.ReadAll(throttled)
if err != nil {
return nil, err
}
if len(b) != int(length) && length > 0 {
return nil, fmt.Errorf("invalid length, got %v bytes, but expected %v", len(b), length)
}
if length == 0 {
return []byte{}, nil
}
return b, nil
}
v, err := exponentialBackoff(fmt.Sprintf("GetBlock(%q,%v,%v)", b, offset, length), attempt)
if err != nil {
return nil, translateError(err)
}
return v.([]byte), nil
}
func exponentialBackoff(desc string, att retry.AttemptFunc) (interface{}, error) {
return retry.WithExponentialBackoff(desc, att, isRetriableError)
}
func isRetriableError(err error) bool {
if me, ok := err.(minio.ErrorResponse); ok {
// retry on server errors, not on client errors
return me.StatusCode >= 500
}
return false
}
func translateError(err error) error {
if me, ok := err.(minio.ErrorResponse); ok {
if me.StatusCode == 200 {
return nil
}
if me.StatusCode == 404 {
return storage.ErrBlockNotFound
}
}
return err
}
func (s *s3Storage) PutBlock(ctx context.Context, b string, data []byte) error {
throttled, err := s.uploadThrottler.AddReader(ioutil.NopCloser(bytes.NewReader(data)))
if err != nil {
return err
}
progressCallback := storage.ProgressCallback(ctx)
if progressCallback != nil {
progressCallback(b, 0, int64(len(data)))
defer progressCallback(b, int64(len(data)), int64(len(data)))
}
n, err := s.cli.PutObject(s.BucketName, s.getObjectNameString(b), throttled, -1, minio.PutObjectOptions{
ContentType: "application/x-kopia",
Progress: newProgressReader(progressCallback, b, int64(len(data))),
})
if err == io.EOF && n == 0 {
// special case empty stream
_, err = s.cli.PutObject(s.BucketName, s.getObjectNameString(b), bytes.NewBuffer(nil), 0, minio.PutObjectOptions{
ContentType: "application/x-kopia",
})
}
return translateError(err)
}
func (s *s3Storage) DeleteBlock(ctx context.Context, b string) error {
attempt := func() (interface{}, error) {
return nil, s.cli.RemoveObject(s.BucketName, s.getObjectNameString(b))
}
_, err := exponentialBackoff(fmt.Sprintf("DeleteBlock(%q)", b), attempt)
return translateError(err)
}
func (s *s3Storage) getObjectNameString(b string) string {
return s.Prefix + b
}
func (s *s3Storage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
oi := s.cli.ListObjects(s.BucketName, s.Prefix+prefix, false, ctx.Done())
for o := range oi {
if err := o.Err; err != nil {
return err
}
bm := storage.BlockMetadata{
BlockID: o.Key[len(s.Prefix):],
Length: o.Size,
Timestamp: o.LastModified,
}
if err := callback(bm); err != nil {
return err
}
}
return nil
}
func (s *s3Storage) ConnectionInfo() storage.ConnectionInfo {
return storage.ConnectionInfo{
Type: s3storageType,
Config: &s.Options,
}
}
func (s *s3Storage) Close(ctx context.Context) error {
return nil
}
func (s *s3Storage) String() string {
return fmt.Sprintf("s3://%v/%v", s.BucketName, s.Prefix)
}
type progressReader struct {
cb storage.ProgressFunc
blockID string
completed int64
totalLength int64
lastReported int64
}
func (r *progressReader) Read(b []byte) (int, error) {
r.completed += int64(len(b))
if r.completed >= r.lastReported+1000000 && r.completed < r.totalLength {
r.cb(r.blockID, r.completed, r.totalLength)
r.lastReported = r.completed
}
return len(b), nil
}
func newProgressReader(cb storage.ProgressFunc, blockID string, totalLength int64) io.Reader {
if cb == nil {
return nil
}
return &progressReader{cb: cb, blockID: blockID, totalLength: totalLength}
}
func toBandwidth(bytesPerSecond int) iothrottler.Bandwidth {
if bytesPerSecond <= 0 {
return iothrottler.Unlimited
}
return iothrottler.Bandwidth(bytesPerSecond) * iothrottler.BytesPerSecond
}
// New creates new S3-backed storage with specified options:
//
// - the 'BucketName' field is required and all other parameters are optional.
func New(ctx context.Context, opt *Options) (storage.Storage, error) {
if opt.BucketName == "" {
return nil, errors.New("bucket name must be specified")
}
cli, err := minio.New(opt.Endpoint, opt.AccessKeyID, opt.SecretAccessKey, !opt.DoNotUseTLS)
if err != nil {
return nil, fmt.Errorf("unable to create client: %v", err)
}
downloadThrottler := iothrottler.NewIOThrottlerPool(toBandwidth(opt.MaxDownloadSpeedBytesPerSecond))
uploadThrottler := iothrottler.NewIOThrottlerPool(toBandwidth(opt.MaxUploadSpeedBytesPerSecond))
return &s3Storage{
Options: *opt,
ctx: ctx,
cli: cli,
downloadThrottler: downloadThrottler,
uploadThrottler: uploadThrottler,
}, nil
}
func init() {
storage.AddSupportedStorage(
s3storageType,
func() interface{} {
return &Options{}
},
func(ctx context.Context, o interface{}) (storage.Storage, error) {
return New(ctx, o.(*Options))
})
}

View File

@@ -0,0 +1,116 @@
package s3
import (
"context"
"crypto/rand"
"crypto/sha1"
"fmt"
"log"
"net"
"os"
"testing"
"time"
"github.com/kopia/repo/internal/storagetesting"
"github.com/kopia/repo/storage"
"github.com/minio/minio-go"
)
// https://github.com/minio/minio-go
const (
endpoint = "play.minio.io:9000"
accessKeyID = "Q3AM3UQ867SPQQA43P2F"
secretAccessKey = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG"
useSSL = true
// the test takes a few seconds, delete stuff older than 1h to avoid accumulating cruft
cleanupAge = 1 * time.Hour
)
var bucketName = getBucketName()
func getBucketName() string {
hn, err := os.Hostname()
if err != nil {
return "kopia-test-1"
}
h := sha1.New()
fmt.Fprintf(h, "%v", hn)
return fmt.Sprintf("kopia-test-%x", h.Sum(nil)[0:8])
}
func endpointReachable() bool {
conn, err := net.DialTimeout("tcp4", endpoint, 5*time.Second)
if err == nil {
conn.Close()
return true
}
return false
}
func TestS3Storage(t *testing.T) {
if !endpointReachable() {
t.Skip("endpoint not reachable")
}
ctx := context.Background()
// recreate per-host bucket, which sometimes get cleaned up by play.minio.io
createBucket(t)
cleanupOldData(ctx, t)
data := make([]byte, 8)
rand.Read(data) //nolint:errcheck
st, err := New(context.Background(), &Options{
AccessKeyID: accessKeyID,
SecretAccessKey: secretAccessKey,
Endpoint: endpoint,
BucketName: bucketName,
Prefix: fmt.Sprintf("test-%v-%x-", time.Now().Unix(), data),
})
if err != nil {
t.Fatalf("err: %v", err)
}
storagetesting.VerifyStorage(ctx, t, st)
storagetesting.AssertConnectionInfoRoundTrips(ctx, t, st)
if err := st.Close(ctx); err != nil {
t.Fatalf("err: %v", err)
}
}
func createBucket(t *testing.T) {
minioClient, err := minio.New(endpoint, accessKeyID, secretAccessKey, useSSL)
if err != nil {
t.Fatalf("can't initialize minio client: %v", err)
}
// ignore error
_ = minioClient.MakeBucket(bucketName, "us-east-1")
}
func cleanupOldData(ctx context.Context, t *testing.T) {
// cleanup old data from the bucket
st, err := New(context.Background(), &Options{
AccessKeyID: accessKeyID,
SecretAccessKey: secretAccessKey,
Endpoint: endpoint,
BucketName: bucketName,
})
if err != nil {
t.Fatalf("err: %v", err)
}
_ = st.ListBlocks(ctx, "", func(it storage.BlockMetadata) error {
age := time.Since(it.Timestamp)
if age > cleanupAge {
if err := st.DeleteBlock(ctx, it.BlockID); err != nil {
t.Errorf("warning: unable to delete %q: %v", it.BlockID, err)
}
} else {
log.Printf("keeping %v", it.BlockID)
}
return nil
})
}

108
storage/storage.go Normal file
View File

@@ -0,0 +1,108 @@
package storage
import (
"context"
"errors"
"fmt"
"time"
)
// CancelFunc requests cancellation of a storage operation.
type CancelFunc func()
// Storage encapsulates API for connecting to blob storage.
//
// The underlying storage system must provide:
//
// * high durability, availability and bit-rot protection
// * read-after-write - block written using PutBlock() must be immediately readable using GetBlock() and ListBlocks()
// * atomicity - it mustn't be possible to observe partial results of PutBlock() via either GetBlock() or ListBlocks()
// * timestamps that don't go back in time (small clock skew up to minutes is allowed)
// * reasonably low latency for retrievals
//
// The required semantics are provided by existing commercial cloud storage products (Google Cloud, AWS, Azure).
type Storage interface {
// PutBlock uploads the block with given data to the repository or replaces existing block with the provided
// id with given contents.
PutBlock(ctx context.Context, id string, data []byte) error
// DeleteBlock removes the block from storage. Future GetBlock() operations will fail with ErrBlockNotFound.
DeleteBlock(ctx context.Context, id string) error
// GetBlock returns full or partial contents of a block with given ID.
// If length>0, the the function retrieves a range of bytes [offset,offset+length)
// If length<0, the entire block must be fetched.
GetBlock(ctx context.Context, id string, offset, length int64) ([]byte, error)
// ListBlocks returns a channel of BlockMetadata that describes storage blocks with existing name prefixes.
// Iteration continues until all blocks have been listed or until client code invokes the returned cancellation function.
ListBlocks(ctx context.Context, prefix string, cb func(bm BlockMetadata) error) error
// ConnectionInfo returns JSON-serializable data structure containing information required to
// connect to storage.
ConnectionInfo() ConnectionInfo
// Close releases all resources associated with storage.
Close(ctx context.Context) error
}
// BlockMetadata represents metadata about a single block in a storage.
type BlockMetadata struct {
BlockID string
Length int64
Timestamp time.Time
}
// ErrBlockNotFound is returned when a block cannot be found in storage.
var ErrBlockNotFound = errors.New("block not found")
// ListAllBlocks returns BlockMetadata for all blocks in a given storage that have the provided name prefix.
func ListAllBlocks(ctx context.Context, st Storage, prefix string) ([]BlockMetadata, error) {
var result []BlockMetadata
err := st.ListBlocks(ctx, prefix, func(bm BlockMetadata) error {
result = append(result, bm)
return nil
})
return result, err
}
// ListAllBlocksConsistent lists all blocks with given name prefix in the provided storage until the results are
// consistent. The results are consistent if the list result fetched twice is identical. This guarantees that while
// the first scan was in progress, no new block was added or removed.
// maxAttempts specifies maximum number of list attempts (must be >= 2)
func ListAllBlocksConsistent(ctx context.Context, st Storage, prefix string, maxAttempts int) ([]BlockMetadata, error) {
var previous []BlockMetadata
for i := 0; i < maxAttempts; i++ {
result, err := ListAllBlocks(ctx, st, prefix)
if err != nil {
return nil, err
}
if i > 0 && sameBlocks(result, previous) {
return result, nil
}
previous = result
}
return nil, fmt.Errorf("unable to achieve consistent snapshot despite %v attempts", maxAttempts)
}
// sameBlocks returns true if b1 & b2 contain the same blocks (ignoring order).
func sameBlocks(b1, b2 []BlockMetadata) bool {
if len(b1) != len(b2) {
return false
}
m := map[string]BlockMetadata{}
for _, b := range b1 {
m[b.BlockID] = b
}
for _, b := range b2 {
if m[b.BlockID] != b {
return false
}
}
return true
}

57
storage/storage_test.go Normal file
View File

@@ -0,0 +1,57 @@
package storage_test
import (
"context"
"testing"
"time"
"github.com/kopia/repo/internal/storagetesting"
"github.com/kopia/repo/storage"
)
func TestListAllBlocksConsistent(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
st := storagetesting.NewMapStorage(data, nil, time.Now)
st.PutBlock(ctx, "foo1", []byte{1, 2, 3}) //nolint:errcheck
st.PutBlock(ctx, "foo2", []byte{1, 2, 3}) //nolint:errcheck
st.PutBlock(ctx, "foo3", []byte{1, 2, 3}) //nolint:errcheck
// set up faulty storage that will add a block while a scan is in progress.
f := &storagetesting.FaultyStorage{
Base: st,
Faults: map[string][]*storagetesting.Fault{
"ListBlocksItem": {
{ErrCallback: func() error {
st.PutBlock(ctx, "foo0", []byte{1, 2, 3}) //nolint:errcheck
return nil
}},
},
},
}
r, err := storage.ListAllBlocksConsistent(ctx, f, "foo", 3)
if err != nil {
t.Fatalf("error: %v", err)
}
// make sure we get the list with 4 items, not 3.
if got, want := len(r), 4; got != want {
t.Errorf("unexpected list result count: %v, want %v", got, want)
}
}
func TestListAllBlocksConsistentEmpty(t *testing.T) {
ctx := context.Background()
data := map[string][]byte{}
st := storagetesting.NewMapStorage(data, nil, time.Now)
r, err := storage.ListAllBlocksConsistent(ctx, st, "foo", 3)
if err != nil {
t.Fatalf("error: %v", err)
}
if got, want := len(r), 0; got != want {
t.Errorf("unexpected list result count: %v, want %v", got, want)
}
}

View File

@@ -0,0 +1,17 @@
package webdav
// Options defines options for Filesystem-backed storage.
type Options struct {
URL string `json:"url"`
DirectoryShards []int `json:"dirShards"`
Username string `json:"username,omitempty"`
Password string `json:"password,omitempty" kopia:"sensitive"`
}
func (fso *Options) shards() []int {
if fso.DirectoryShards == nil {
return fsDefaultShards
}
return fso.DirectoryShards
}

View File

@@ -0,0 +1,210 @@
// Package webdav implements WebDAV-based Storage.
package webdav
import (
"context"
"errors"
"fmt"
"math/rand"
"os"
"path/filepath"
"sort"
"strings"
"github.com/kopia/repo/storage"
"github.com/studio-b12/gowebdav"
)
const (
davStorageType = "webdav"
fsStorageChunkSuffix = ".f"
)
var (
fsDefaultShards = []int{3, 3}
)
// davStorage implements blob.Storage on top of remove WebDAV repository.
// It is very similar to File storage, except uses HTTP URLs instead of local files.
// Storage formats are compatible (both use sharded directory structure), so a repository
// may be accessed using WebDAV or File interchangeably.
type davStorage struct {
Options
cli *gowebdav.Client
}
func (d *davStorage) GetBlock(ctx context.Context, blockID string, offset, length int64) ([]byte, error) {
_, path := d.getDirPathAndFilePath(blockID)
data, err := d.cli.Read(path)
if err != nil {
return nil, d.translateError(err)
}
if length < 0 {
return data, nil
}
if int(offset) > len(data) || offset < 0 {
return nil, errors.New("invalid offset")
}
data = data[offset:]
if int(length) > len(data) {
return nil, errors.New("invalid length")
}
return data[0:length], nil
}
func (d *davStorage) translateError(err error) error {
switch err := err.(type) {
case *os.PathError:
switch err.Err.Error() {
case "404":
return storage.ErrBlockNotFound
}
return err
default:
return err
}
}
func getBlockIDFromFileName(name string) (string, bool) {
if strings.HasSuffix(name, fsStorageChunkSuffix) {
return name[0 : len(name)-len(fsStorageChunkSuffix)], true
}
return "", false
}
func makeFileName(blockID string) string {
return blockID + fsStorageChunkSuffix
}
func (d *davStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
var walkDir func(string, string) error
walkDir = func(path string, currentPrefix string) error {
entries, err := d.cli.ReadDir(gowebdav.FixSlash(path))
if err != nil {
return fmt.Errorf("read dir error on %v: %v", path, err)
}
sort.Slice(entries, func(i, j int) bool {
return entries[i].Name() < entries[j].Name()
})
for _, e := range entries {
if e.IsDir() {
newPrefix := currentPrefix + e.Name()
var match bool
if len(prefix) > len(newPrefix) {
// looking for 'abcd', got 'ab' so far, worth trying
match = strings.HasPrefix(prefix, newPrefix)
} else {
match = strings.HasPrefix(newPrefix, prefix)
}
if match {
if err := walkDir(path+"/"+e.Name(), currentPrefix+e.Name()); err != nil {
return err
}
}
} else if fullID, ok := getBlockIDFromFileName(currentPrefix + e.Name()); ok {
if strings.HasPrefix(fullID, prefix) {
if err := callback(storage.BlockMetadata{
BlockID: fullID,
Length: e.Size(),
Timestamp: e.ModTime(),
}); err != nil {
return err
}
}
}
}
return nil
}
return walkDir("", "")
}
func (d *davStorage) PutBlock(ctx context.Context, blockID string, data []byte) error {
dirPath, filePath := d.getDirPathAndFilePath(blockID)
tmpPath := fmt.Sprintf("%v-%v", filePath, rand.Int63())
if err := d.translateError(d.cli.Write(tmpPath, data, 0600)); err != nil {
if err != storage.ErrBlockNotFound {
return err
}
d.cli.MkdirAll(dirPath, 0700) //nolint:errcheck
if err = d.translateError(d.cli.Write(tmpPath, data, 0600)); err != nil {
return err
}
}
return d.translateError(d.cli.Rename(tmpPath, filePath, true))
}
func (d *davStorage) DeleteBlock(ctx context.Context, blockID string) error {
_, filePath := d.getDirPathAndFilePath(blockID)
return d.translateError(d.cli.Remove(filePath))
}
func (d *davStorage) getShardDirectory(blockID string) (string, string) {
shardPath := "/"
if len(blockID) < 20 {
return shardPath, blockID
}
for _, size := range d.shards() {
shardPath = filepath.Join(shardPath, blockID[0:size])
blockID = blockID[size:]
}
return shardPath, blockID
}
func (d *davStorage) getDirPathAndFilePath(blockID string) (string, string) {
shardPath, blockID := d.getShardDirectory(blockID)
result := filepath.Join(shardPath, makeFileName(blockID))
return shardPath, result
}
func (d *davStorage) ConnectionInfo() storage.ConnectionInfo {
return storage.ConnectionInfo{
Type: davStorageType,
Config: &d.Options,
}
}
func (d *davStorage) Close(ctx context.Context) error {
return nil
}
// New creates new WebDAV-backed storage in a specified URL.
func New(ctx context.Context, opts *Options) (storage.Storage, error) {
r := &davStorage{
Options: *opts,
cli: gowebdav.NewClient(opts.URL, opts.Username, opts.Password),
}
for _, s := range r.shards() {
if s == 0 {
return nil, fmt.Errorf("invalid shard spec: %v", opts.DirectoryShards)
}
}
r.Options.URL = strings.TrimSuffix(r.Options.URL, "/")
return r, nil
}
func init() {
storage.AddSupportedStorage(
davStorageType,
func() interface{} { return &Options{} },
func(ctx context.Context, o interface{}) (storage.Storage, error) {
return New(ctx, o.(*Options))
})
}

View File

@@ -0,0 +1,65 @@
package webdav
import (
"context"
"fmt"
"io/ioutil"
"net/http"
"net/http/httptest"
"os"
"testing"
"golang.org/x/net/webdav"
"github.com/kopia/repo/internal/storagetesting"
)
func TestWebDAVStorage(t *testing.T) {
tmpDir, _ := ioutil.TempDir("", "webdav")
defer os.RemoveAll(tmpDir)
t.Logf("tmpDir: %v", tmpDir)
mux := http.NewServeMux()
mux.Handle("/", &webdav.Handler{
FileSystem: webdav.Dir(tmpDir),
LockSystem: webdav.NewMemLS(),
})
server := httptest.NewServer(mux)
defer server.Close()
ctx := context.Background()
// Test varioush shard configurations.
for _, shardSpec := range [][]int{
{1},
{3, 3},
{2},
{1, 1},
{1, 2},
{2, 2, 2},
} {
t.Run(fmt.Sprintf("shards-%v", shardSpec), func(t *testing.T) {
if err := os.RemoveAll(tmpDir); err != nil {
t.Errorf("can't remove all: %q", tmpDir)
}
os.MkdirAll(tmpDir, 0700) //nolint:errcheck
r, err := New(context.Background(), &Options{
URL: server.URL,
DirectoryShards: shardSpec,
})
if r == nil || err != nil {
t.Errorf("unexpected result: %v %v", r, err)
}
storagetesting.VerifyStorage(ctx, t, r)
storagetesting.AssertConnectionInfoRoundTrips(ctx, t, r)
if err := r.Close(ctx); err != nil {
t.Fatalf("err: %v", err)
}
})
}
}

View File

Binary file not shown.

View File

@@ -0,0 +1,3 @@
package repositorystress
// dummy package

View File

@@ -0,0 +1,319 @@
package repositorystress_test
import (
"context"
"fmt"
"io/ioutil"
"log"
"math/rand"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"testing"
"time"
"github.com/kopia/repo"
"github.com/kopia/repo/block"
"github.com/kopia/repo/storage"
"github.com/kopia/repo/storage/filesystem"
)
const masterPassword = "foo-bar-baz-1234"
var (
knownBlocks []string
knownBlocksMutex sync.Mutex
)
func TestStressRepository(t *testing.T) {
if testing.Short() {
t.Skip("skipping stress test during short tests")
}
ctx := block.UsingListCache(context.Background(), false)
tmpPath, err := ioutil.TempDir("", "kopia")
if err != nil {
t.Fatalf("unable to create temp directory")
}
defer func() {
if !t.Failed() {
os.RemoveAll(tmpPath)
}
}()
t.Logf("path: %v", tmpPath)
storagePath := filepath.Join(tmpPath, "storage")
configFile1 := filepath.Join(tmpPath, "kopia1.config")
configFile2 := filepath.Join(tmpPath, "kopia2.config")
assertNoError(t, os.MkdirAll(storagePath, 0700))
st, err := filesystem.New(ctx, &filesystem.Options{
Path: storagePath,
})
if err != nil {
t.Fatalf("unable to initialize storage: %v", err)
}
// create repository
if err := repo.Initialize(ctx, st, &repo.NewRepositoryOptions{}, masterPassword); err != nil {
t.Fatalf("unable to initialize repository: %v", err)
}
// set up two parallel kopia connections, each with its own config file and cache.
if err := repo.Connect(ctx, configFile1, st, masterPassword, repo.ConnectOptions{
CachingOptions: block.CachingOptions{
CacheDirectory: filepath.Join(tmpPath, "cache1"),
MaxCacheSizeBytes: 2000000000,
},
}); err != nil {
t.Fatalf("unable to connect 1: %v", err)
}
if err := repo.Connect(ctx, configFile2, st, masterPassword, repo.ConnectOptions{
CachingOptions: block.CachingOptions{
CacheDirectory: filepath.Join(tmpPath, "cache2"),
MaxCacheSizeBytes: 2000000000,
},
}); err != nil {
t.Fatalf("unable to connect 2: %v", err)
}
cancel := make(chan struct{})
var wg sync.WaitGroup
wg.Add(1)
go longLivedRepositoryTest(ctx, t, cancel, configFile1, &wg)
wg.Add(1)
go longLivedRepositoryTest(ctx, t, cancel, configFile1, &wg)
wg.Add(1)
go longLivedRepositoryTest(ctx, t, cancel, configFile1, &wg)
wg.Add(1)
go longLivedRepositoryTest(ctx, t, cancel, configFile1, &wg)
wg.Add(1)
go longLivedRepositoryTest(ctx, t, cancel, configFile2, &wg)
wg.Add(1)
go longLivedRepositoryTest(ctx, t, cancel, configFile2, &wg)
wg.Add(1)
go longLivedRepositoryTest(ctx, t, cancel, configFile2, &wg)
wg.Add(1)
go longLivedRepositoryTest(ctx, t, cancel, configFile2, &wg)
time.Sleep(5 * time.Second)
close(cancel)
wg.Wait()
}
func longLivedRepositoryTest(ctx context.Context, t *testing.T, cancel chan struct{}, configFile string, wg *sync.WaitGroup) {
defer wg.Done()
rep, err := repo.Open(ctx, configFile, masterPassword, &repo.Options{})
if err != nil {
t.Errorf("error opening repository: %v", err)
return
}
defer rep.Close(ctx)
var wg2 sync.WaitGroup
for i := 0; i < 4; i++ {
wg2.Add(1)
go func() {
defer wg2.Done()
repositoryTest(ctx, t, cancel, rep)
}()
}
wg2.Wait()
}
func repositoryTest(ctx context.Context, t *testing.T, cancel chan struct{}, rep *repo.Repository) {
// reopen := func(t *testing.T, r *repo.Repository) error {
// if err := rep.Close(ctx); err != nil {
// return fmt.Errorf("error closing: %v", err)
// }
// t0 := time.Now()
// rep, err = repo.Open(ctx, configFile, &repo.Options{})
// log.Printf("reopened in %v", time.Since(t0))
// return err
// }
workTypes := []*struct {
name string
fun func(ctx context.Context, t *testing.T, r *repo.Repository) error
weight int
hitCount int
}{
//{"reopen", reopen, 1, 0},
{"writeRandomBlock", writeRandomBlock, 100, 0},
{"writeRandomManifest", writeRandomManifest, 100, 0},
{"readKnownBlock", readKnownBlock, 500, 0},
{"listBlocks", listBlocks, 50, 0},
{"listAndReadAllBlocks", listAndReadAllBlocks, 5, 0},
{"readRandomManifest", readRandomManifest, 50, 0},
{"compact", compact, 1, 0},
{"refresh", refresh, 3, 0},
{"flush", flush, 1, 0},
}
var totalWeight int
for _, w := range workTypes {
totalWeight += w.weight
}
iter := 0
for {
select {
case <-cancel:
rep.Close(ctx)
return
default:
}
if iter%1000 == 0 {
var bits []string
for _, w := range workTypes {
bits = append(bits, fmt.Sprintf("%v:%v", w.name, w.hitCount))
}
log.Printf("#%v %v %v goroutines", iter, strings.Join(bits, " "), runtime.NumGoroutine())
}
iter++
roulette := rand.Intn(totalWeight)
for _, w := range workTypes {
if roulette < w.weight {
w.hitCount++
//log.Printf("running %v", w.name)
if err := w.fun(ctx, t, rep); err != nil {
w.hitCount++
t.Errorf("error: %v", fmt.Errorf("error running %v: %v", w.name, err))
return
}
break
}
roulette -= w.weight
}
}
}
func writeRandomBlock(ctx context.Context, t *testing.T, r *repo.Repository) error {
data := make([]byte, 1000)
rand.Read(data)
blockID, err := r.Blocks.WriteBlock(ctx, data, "")
if err == nil {
knownBlocksMutex.Lock()
if len(knownBlocks) >= 1000 {
n := rand.Intn(len(knownBlocks))
knownBlocks[n] = blockID
} else {
knownBlocks = append(knownBlocks, blockID)
}
knownBlocksMutex.Unlock()
}
return err
}
func readKnownBlock(ctx context.Context, t *testing.T, r *repo.Repository) error {
knownBlocksMutex.Lock()
if len(knownBlocks) == 0 {
knownBlocksMutex.Unlock()
return nil
}
blockID := knownBlocks[rand.Intn(len(knownBlocks))]
knownBlocksMutex.Unlock()
_, err := r.Blocks.GetBlock(ctx, blockID)
if err == nil || err == storage.ErrBlockNotFound {
return nil
}
return err
}
func listBlocks(ctx context.Context, t *testing.T, r *repo.Repository) error {
_, err := r.Blocks.ListBlocks("")
return err
}
func listAndReadAllBlocks(ctx context.Context, t *testing.T, r *repo.Repository) error {
blocks, err := r.Blocks.ListBlocks("")
if err != nil {
return err
}
for _, bi := range blocks {
_, err := r.Blocks.GetBlock(ctx, bi)
if err != nil {
if err == storage.ErrBlockNotFound && strings.HasPrefix(bi, "m") {
// this is ok, sometimes manifest manager will perform compaction and 'm' blocks will be marked as deleted
continue
}
return fmt.Errorf("error reading block %v: %v", bi, err)
}
}
return nil
}
func compact(ctx context.Context, t *testing.T, r *repo.Repository) error {
return r.Blocks.CompactIndexes(ctx, block.CompactOptions{
MinSmallBlocks: 1,
MaxSmallBlocks: 1,
})
}
func flush(ctx context.Context, t *testing.T, r *repo.Repository) error {
return r.Flush(ctx)
}
func refresh(ctx context.Context, t *testing.T, r *repo.Repository) error {
return r.Refresh(ctx)
}
func readRandomManifest(ctx context.Context, t *testing.T, r *repo.Repository) error {
manifests, err := r.Manifests.Find(ctx, nil)
if err != nil {
return err
}
if len(manifests) == 0 {
return nil
}
n := rand.Intn(len(manifests))
_, err = r.Manifests.GetRaw(ctx, manifests[n].ID)
return err
}
func writeRandomManifest(ctx context.Context, t *testing.T, r *repo.Repository) error {
key1 := fmt.Sprintf("key-%v", rand.Intn(10))
key2 := fmt.Sprintf("key-%v", rand.Intn(10))
val1 := fmt.Sprintf("val1-%v", rand.Intn(10))
val2 := fmt.Sprintf("val2-%v", rand.Intn(10))
content1 := fmt.Sprintf("content-%v", rand.Intn(10))
content2 := fmt.Sprintf("content-%v", rand.Intn(10))
content1val := fmt.Sprintf("val1-%v", rand.Intn(10))
content2val := fmt.Sprintf("val2-%v", rand.Intn(10))
_, err := r.Manifests.Put(ctx, map[string]string{
"type": key1,
key1: val1,
key2: val2,
}, map[string]string{
content1: content1val,
content2: content2val,
})
return err
}
func assertNoError(t *testing.T, err error) {
t.Helper()
if err != nil {
t.Errorf("err: %v", err)
}
}

View File

@@ -0,0 +1,3 @@
package stress
// dummy package

View File

@@ -0,0 +1,132 @@
package stress_test
import (
"context"
"fmt"
"math/rand"
"os"
"reflect"
"testing"
"time"
"github.com/kopia/repo/block"
"github.com/kopia/repo/internal/storagetesting"
"github.com/kopia/repo/storage"
)
const goroutineCount = 16
func TestStressBlockManager(t *testing.T) {
if testing.Short() {
t.Skip("skipping stress test during short tests")
}
data := map[string][]byte{}
keyTimes := map[string]time.Time{}
memst := storagetesting.NewMapStorage(data, keyTimes, time.Now)
var duration = 3 * time.Second
if os.Getenv("KOPIA_LONG_STRESS_TEST") != "" {
duration = 3 * time.Minute
}
stressTestWithStorage(t, memst, duration)
}
func stressTestWithStorage(t *testing.T, st storage.Storage, duration time.Duration) {
ctx := context.Background()
openMgr := func() (*block.Manager, error) {
return block.NewManager(ctx, st, block.FormattingOptions{
Version: 1,
Hash: "HMAC-SHA256-128",
Encryption: "AES-256-CTR",
MaxPackSize: 20000000,
MasterKey: []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
}, block.CachingOptions{}, nil)
}
seed0 := time.Now().Nanosecond()
t.Logf("running with seed %v", seed0)
deadline := time.Now().Add(duration)
t.Run("workers", func(t *testing.T) {
for i := 0; i < goroutineCount; i++ {
i := i
t.Run(fmt.Sprintf("worker-%v", i), func(t *testing.T) {
t.Parallel()
stressWorker(ctx, t, deadline, i, openMgr, int64(seed0+i))
})
}
})
}
func stressWorker(ctx context.Context, t *testing.T, deadline time.Time, workerID int, openMgr func() (*block.Manager, error), seed int64) {
src := rand.NewSource(seed)
rand := rand.New(src)
bm, err := openMgr()
if err != nil {
t.Fatalf("error opening manager: %v", err)
}
type writtenBlock struct {
contentID string
data []byte
}
var workerBlocks []writtenBlock
for time.Now().Before(deadline) {
l := rand.Intn(30000)
data := make([]byte, l)
if _, err := rand.Read(data); err != nil {
t.Errorf("err: %v", err)
return
}
dataCopy := append([]byte{}, data...)
contentID, err := bm.WriteBlock(ctx, data, "")
if err != nil {
t.Errorf("err: %v", err)
return
}
switch rand.Intn(20) {
case 0:
if err := bm.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
return
}
case 1:
if err := bm.Flush(ctx); err != nil {
t.Errorf("flush error: %v", err)
return
}
bm, err = openMgr()
if err != nil {
t.Errorf("error opening: %v", err)
return
}
}
//log.Printf("wrote %v", contentID)
workerBlocks = append(workerBlocks, writtenBlock{contentID, dataCopy})
if len(workerBlocks) > 5 {
pos := rand.Intn(len(workerBlocks))
previous := workerBlocks[pos]
//log.Printf("reading %v", previous.contentID)
d2, err := bm.GetBlock(ctx, previous.contentID)
if err != nil {
t.Errorf("error verifying block %q: %v", previous.contentID, err)
return
}
if !reflect.DeepEqual(previous.data, d2) {
t.Errorf("invalid previous data for %q %x %x", previous.contentID, d2, previous.data)
return
}
workerBlocks = append(workerBlocks[0:pos], workerBlocks[pos+1:]...)
}
}
}

35
upgrade.go Normal file
View File

@@ -0,0 +1,35 @@
package repo
import (
"context"
"fmt"
"github.com/pkg/errors"
)
// Upgrade upgrades repository data structures to the latest version.
func (r *Repository) Upgrade(ctx context.Context) error {
f := r.formatBlock
log.Debug("decrypting format...")
repoConfig, err := f.decryptFormatBytes(r.masterKey)
if err != nil {
return errors.Wrap(err, "unable to decrypt repository config")
}
var migrated bool
// TODO(jkowalski): add migration code here
if !migrated {
log.Infof("nothing to do")
return nil
}
log.Debug("encrypting format...")
if err := encryptFormatBytes(f, repoConfig, r.masterKey, f.UniqueID); err != nil {
return fmt.Errorf("unable to encrypt format bytes")
}
log.Infof("writing updated format block...")
return writeFormatBlock(ctx, r.Storage, f)
}