mirror of
https://github.com/kopia/kopia.git
synced 2026-01-26 15:28:06 -05:00
Merge github.com:kopia/repo into import-repo
This commit is contained in:
220
block/block_cache.go
Normal file
220
block/block_cache.go
Normal file
@@ -0,0 +1,220 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/storage"
|
||||
"github.com/kopia/repo/storage/filesystem"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultSweepFrequency = 1 * time.Minute
|
||||
defaultTouchThreshold = 10 * time.Minute
|
||||
)
|
||||
|
||||
type blockCache struct {
|
||||
st storage.Storage
|
||||
cacheStorage storage.Storage
|
||||
maxSizeBytes int64
|
||||
hmacSecret []byte
|
||||
sweepFrequency time.Duration
|
||||
touchThreshold time.Duration
|
||||
|
||||
mu sync.Mutex
|
||||
lastTotalSizeBytes int64
|
||||
|
||||
closed chan struct{}
|
||||
}
|
||||
|
||||
type blockToucher interface {
|
||||
TouchBlock(ctx context.Context, blockID string, threshold time.Duration) error
|
||||
}
|
||||
|
||||
func adjustCacheKey(cacheKey string) string {
|
||||
// block IDs with odd length have a single-byte prefix.
|
||||
// move the prefix to the end of cache key to make sure the top level shard is spread 256 ways.
|
||||
if len(cacheKey)%2 == 1 {
|
||||
return cacheKey[1:] + cacheKey[0:1]
|
||||
}
|
||||
|
||||
return cacheKey
|
||||
}
|
||||
|
||||
func (c *blockCache) getContentBlock(ctx context.Context, cacheKey string, physicalBlockID string, offset, length int64) ([]byte, error) {
|
||||
cacheKey = adjustCacheKey(cacheKey)
|
||||
|
||||
useCache := shouldUseBlockCache(ctx) && c.cacheStorage != nil
|
||||
if useCache {
|
||||
if b := c.readAndVerifyCacheBlock(ctx, cacheKey); b != nil {
|
||||
return b, nil
|
||||
}
|
||||
}
|
||||
|
||||
b, err := c.st.GetBlock(ctx, physicalBlockID, offset, length)
|
||||
if err == storage.ErrBlockNotFound {
|
||||
// not found in underlying storage
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err == nil && useCache {
|
||||
if puterr := c.cacheStorage.PutBlock(ctx, cacheKey, appendHMAC(b, c.hmacSecret)); puterr != nil {
|
||||
log.Warningf("unable to write cache item %v: %v", cacheKey, puterr)
|
||||
}
|
||||
}
|
||||
|
||||
return b, err
|
||||
}
|
||||
|
||||
func (c *blockCache) readAndVerifyCacheBlock(ctx context.Context, cacheKey string) []byte {
|
||||
b, err := c.cacheStorage.GetBlock(ctx, cacheKey, 0, -1)
|
||||
if err == nil {
|
||||
b, err = verifyAndStripHMAC(b, c.hmacSecret)
|
||||
if err == nil {
|
||||
if t, ok := c.cacheStorage.(blockToucher); ok {
|
||||
t.TouchBlock(ctx, cacheKey, c.touchThreshold) //nolint:errcheck
|
||||
}
|
||||
|
||||
// retrieved from cache and HMAC valid
|
||||
return b
|
||||
}
|
||||
|
||||
// ignore malformed blocks
|
||||
log.Warningf("malformed block %v: %v", cacheKey, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
if err != storage.ErrBlockNotFound {
|
||||
log.Warningf("unable to read cache %v: %v", cacheKey, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *blockCache) close() {
|
||||
close(c.closed)
|
||||
}
|
||||
|
||||
func (c *blockCache) sweepDirectoryPeriodically(ctx context.Context) {
|
||||
for {
|
||||
select {
|
||||
case <-c.closed:
|
||||
return
|
||||
|
||||
case <-time.After(c.sweepFrequency):
|
||||
err := c.sweepDirectory(ctx)
|
||||
if err != nil {
|
||||
log.Warningf("blockCache sweep failed: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A blockMetadataHeap implements heap.Interface and holds storage.BlockMetadata.
|
||||
type blockMetadataHeap []storage.BlockMetadata
|
||||
|
||||
func (h blockMetadataHeap) Len() int { return len(h) }
|
||||
|
||||
func (h blockMetadataHeap) Less(i, j int) bool {
|
||||
return h[i].Timestamp.Before(h[j].Timestamp)
|
||||
}
|
||||
|
||||
func (h blockMetadataHeap) Swap(i, j int) {
|
||||
h[i], h[j] = h[j], h[i]
|
||||
}
|
||||
|
||||
func (h *blockMetadataHeap) Push(x interface{}) {
|
||||
*h = append(*h, x.(storage.BlockMetadata))
|
||||
}
|
||||
|
||||
func (h *blockMetadataHeap) Pop() interface{} {
|
||||
old := *h
|
||||
n := len(old)
|
||||
item := old[n-1]
|
||||
*h = old[0 : n-1]
|
||||
return item
|
||||
}
|
||||
|
||||
func (c *blockCache) sweepDirectory(ctx context.Context) (err error) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
if c.cacheStorage == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
t0 := time.Now()
|
||||
|
||||
var h blockMetadataHeap
|
||||
var totalRetainedSize int64
|
||||
|
||||
err = c.cacheStorage.ListBlocks(ctx, "", func(it storage.BlockMetadata) error {
|
||||
heap.Push(&h, it)
|
||||
totalRetainedSize += it.Length
|
||||
|
||||
if totalRetainedSize > c.maxSizeBytes {
|
||||
oldest := heap.Pop(&h).(storage.BlockMetadata)
|
||||
if delerr := c.cacheStorage.DeleteBlock(ctx, oldest.BlockID); delerr != nil {
|
||||
log.Warningf("unable to remove %v: %v", oldest.BlockID, delerr)
|
||||
} else {
|
||||
totalRetainedSize -= oldest.Length
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("error listing cache: %v", err)
|
||||
}
|
||||
|
||||
log.Debugf("finished sweeping directory in %v and retained %v/%v bytes (%v %%)", time.Since(t0), totalRetainedSize, c.maxSizeBytes, 100*totalRetainedSize/c.maxSizeBytes)
|
||||
c.lastTotalSizeBytes = totalRetainedSize
|
||||
return nil
|
||||
}
|
||||
|
||||
func newBlockCache(ctx context.Context, st storage.Storage, caching CachingOptions) (*blockCache, error) {
|
||||
var cacheStorage storage.Storage
|
||||
var err error
|
||||
|
||||
if caching.MaxCacheSizeBytes > 0 && caching.CacheDirectory != "" {
|
||||
blockCacheDir := filepath.Join(caching.CacheDirectory, "blocks")
|
||||
|
||||
if _, err = os.Stat(blockCacheDir); os.IsNotExist(err) {
|
||||
if err = os.MkdirAll(blockCacheDir, 0700); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
cacheStorage, err = filesystem.New(context.Background(), &filesystem.Options{
|
||||
Path: blockCacheDir,
|
||||
DirectoryShards: []int{2},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return newBlockCacheWithCacheStorage(ctx, st, cacheStorage, caching, defaultTouchThreshold, defaultSweepFrequency)
|
||||
}
|
||||
|
||||
func newBlockCacheWithCacheStorage(ctx context.Context, st, cacheStorage storage.Storage, caching CachingOptions, touchThreshold time.Duration, sweepFrequency time.Duration) (*blockCache, error) {
|
||||
c := &blockCache{
|
||||
st: st,
|
||||
cacheStorage: cacheStorage,
|
||||
maxSizeBytes: caching.MaxCacheSizeBytes,
|
||||
hmacSecret: append([]byte(nil), caching.HMACSecret...),
|
||||
closed: make(chan struct{}),
|
||||
touchThreshold: touchThreshold,
|
||||
sweepFrequency: sweepFrequency,
|
||||
}
|
||||
|
||||
if err := c.sweepDirectory(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
go c.sweepDirectoryPeriodically(ctx)
|
||||
|
||||
return c, nil
|
||||
}
|
||||
298
block/block_cache_test.go
Normal file
298
block/block_cache_test.go
Normal file
@@ -0,0 +1,298 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/internal/storagetesting"
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
func newUnderlyingStorageForBlockCacheTesting(t *testing.T) storage.Storage {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
st := storagetesting.NewMapStorage(data, nil, nil)
|
||||
assertNoError(t, st.PutBlock(ctx, "block-1", []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}))
|
||||
assertNoError(t, st.PutBlock(ctx, "block-4k", bytes.Repeat([]byte{1, 2, 3, 4}, 1000))) // 4000 bytes
|
||||
return st
|
||||
}
|
||||
|
||||
func TestCacheExpiration(t *testing.T) {
|
||||
cacheData := map[string][]byte{}
|
||||
cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil)
|
||||
|
||||
underlyingStorage := newUnderlyingStorageForBlockCacheTesting(t)
|
||||
|
||||
cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, cacheStorage, CachingOptions{
|
||||
MaxCacheSizeBytes: 10000,
|
||||
}, 0, 500*time.Millisecond)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer cache.close()
|
||||
|
||||
ctx := context.Background()
|
||||
_, err = cache.getContentBlock(ctx, "00000a", "block-4k", 0, -1) // 4k
|
||||
assertNoError(t, err)
|
||||
_, err = cache.getContentBlock(ctx, "00000b", "block-4k", 0, -1) // 4k
|
||||
assertNoError(t, err)
|
||||
_, err = cache.getContentBlock(ctx, "00000c", "block-4k", 0, -1) // 4k
|
||||
assertNoError(t, err)
|
||||
_, err = cache.getContentBlock(ctx, "00000d", "block-4k", 0, -1) // 4k
|
||||
assertNoError(t, err)
|
||||
|
||||
// wait for a sweep
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
// 00000a and 00000b will be removed from cache because it's the oldest.
|
||||
// to verify, let's remove block-4k from the underlying storage and make sure we can still read
|
||||
// 00000c and 00000d from the cache but not 00000a nor 00000b
|
||||
assertNoError(t, underlyingStorage.DeleteBlock(ctx, "block-4k"))
|
||||
|
||||
cases := []struct {
|
||||
block string
|
||||
expectedError error
|
||||
}{
|
||||
{"00000a", storage.ErrBlockNotFound},
|
||||
{"00000b", storage.ErrBlockNotFound},
|
||||
{"00000c", nil},
|
||||
{"00000d", nil},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
_, got := cache.getContentBlock(ctx, tc.block, "block-4k", 0, -1)
|
||||
if want := tc.expectedError; got != want {
|
||||
t.Errorf("unexpected error when getting block %v: %v wanted %v", tc.block, got, want)
|
||||
} else {
|
||||
t.Logf("got correct error %v when reading block %v", tc.expectedError, tc.block)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDiskBlockCache(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
tmpDir, err := ioutil.TempDir("", "kopia")
|
||||
if err != nil {
|
||||
t.Fatalf("error getting temp dir: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
cache, err := newBlockCache(ctx, newUnderlyingStorageForBlockCacheTesting(t), CachingOptions{
|
||||
MaxCacheSizeBytes: 10000,
|
||||
CacheDirectory: tmpDir,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
defer cache.close()
|
||||
verifyBlockCache(t, cache)
|
||||
}
|
||||
|
||||
func verifyBlockCache(t *testing.T, cache *blockCache) {
|
||||
ctx := context.Background()
|
||||
|
||||
t.Run("GetContentBlock", func(t *testing.T) {
|
||||
cases := []struct {
|
||||
cacheKey string
|
||||
physicalBlockID string
|
||||
offset int64
|
||||
length int64
|
||||
|
||||
expected []byte
|
||||
err error
|
||||
}{
|
||||
{"xf0f0f1", "block-1", 1, 5, []byte{2, 3, 4, 5, 6}, nil},
|
||||
{"xf0f0f2", "block-1", 0, -1, []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil},
|
||||
{"xf0f0f1", "block-1", 1, 5, []byte{2, 3, 4, 5, 6}, nil},
|
||||
{"xf0f0f2", "block-1", 0, -1, []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil},
|
||||
{"xf0f0f3", "no-such-block", 0, -1, nil, storage.ErrBlockNotFound},
|
||||
{"xf0f0f4", "no-such-block", 10, 5, nil, storage.ErrBlockNotFound},
|
||||
{"f0f0f5", "block-1", 7, 3, []byte{8, 9, 10}, nil},
|
||||
{"xf0f0f6", "block-1", 11, 10, nil, fmt.Errorf("invalid offset")},
|
||||
{"xf0f0f6", "block-1", -1, 5, nil, fmt.Errorf("invalid offset")},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
v, err := cache.getContentBlock(ctx, tc.cacheKey, tc.physicalBlockID, tc.offset, tc.length)
|
||||
if !reflect.DeepEqual(err, tc.err) {
|
||||
t.Errorf("unexpected error for %v: %+v, wanted %+v", tc.cacheKey, err, tc.err)
|
||||
}
|
||||
if !reflect.DeepEqual(v, tc.expected) {
|
||||
t.Errorf("unexpected data for %v: %x, wanted %x", tc.cacheKey, v, tc.expected)
|
||||
}
|
||||
}
|
||||
|
||||
verifyStorageBlockList(t, cache.cacheStorage, "f0f0f1x", "f0f0f2x", "f0f0f5")
|
||||
})
|
||||
|
||||
t.Run("DataCorruption", func(t *testing.T) {
|
||||
cacheKey := "f0f0f1x"
|
||||
d, err := cache.cacheStorage.GetBlock(ctx, cacheKey, 0, -1)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to retrieve data from cache: %v", err)
|
||||
}
|
||||
|
||||
// corrupt the data and write back
|
||||
d[0] ^= 1
|
||||
|
||||
if err := cache.cacheStorage.PutBlock(ctx, cacheKey, d); err != nil {
|
||||
t.Fatalf("unable to write corrupted block: %v", err)
|
||||
}
|
||||
|
||||
v, err := cache.getContentBlock(ctx, "xf0f0f1", "block-1", 1, 5)
|
||||
if err != nil {
|
||||
t.Fatalf("error in getContentBlock: %v", err)
|
||||
}
|
||||
if got, want := v, []byte{2, 3, 4, 5, 6}; !reflect.DeepEqual(v, want) {
|
||||
t.Errorf("invalid result when reading corrupted data: %v, wanted %v", got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestCacheFailureToOpen(t *testing.T) {
|
||||
someError := errors.New("some error")
|
||||
|
||||
cacheData := map[string][]byte{}
|
||||
cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil)
|
||||
underlyingStorage := newUnderlyingStorageForBlockCacheTesting(t)
|
||||
faultyCache := &storagetesting.FaultyStorage{
|
||||
Base: cacheStorage,
|
||||
Faults: map[string][]*storagetesting.Fault{
|
||||
"ListBlocks": {
|
||||
{Err: someError},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Will fail because of ListBlocks failure.
|
||||
_, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{
|
||||
MaxCacheSizeBytes: 10000,
|
||||
}, 0, 5*time.Hour)
|
||||
if err == nil || !strings.Contains(err.Error(), someError.Error()) {
|
||||
t.Errorf("invalid error %v, wanted: %v", err, someError)
|
||||
}
|
||||
|
||||
// ListBlocks fails only once, next time it succeeds.
|
||||
cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{
|
||||
MaxCacheSizeBytes: 10000,
|
||||
}, 0, 100*time.Millisecond)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
defer cache.close()
|
||||
}
|
||||
|
||||
func TestCacheFailureToWrite(t *testing.T) {
|
||||
someError := errors.New("some error")
|
||||
|
||||
cacheData := map[string][]byte{}
|
||||
cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil)
|
||||
underlyingStorage := newUnderlyingStorageForBlockCacheTesting(t)
|
||||
faultyCache := &storagetesting.FaultyStorage{
|
||||
Base: cacheStorage,
|
||||
}
|
||||
|
||||
cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{
|
||||
MaxCacheSizeBytes: 10000,
|
||||
}, 0, 5*time.Hour)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
defer cache.close()
|
||||
|
||||
ctx := context.Background()
|
||||
faultyCache.Faults = map[string][]*storagetesting.Fault{
|
||||
"PutBlock": {
|
||||
{Err: someError},
|
||||
},
|
||||
}
|
||||
|
||||
v, err := cache.getContentBlock(ctx, "aa", "block-1", 0, 3)
|
||||
if err != nil {
|
||||
t.Errorf("write failure wasn't ignored: %v", err)
|
||||
}
|
||||
|
||||
if got, want := v, []byte{1, 2, 3}; !reflect.DeepEqual(got, want) {
|
||||
t.Errorf("unexpected value retrieved from cache: %v, want: %v", got, want)
|
||||
}
|
||||
|
||||
all, err := storage.ListAllBlocks(ctx, cacheStorage, "")
|
||||
if err != nil {
|
||||
t.Errorf("error listing cache: %v", err)
|
||||
}
|
||||
if len(all) != 0 {
|
||||
t.Errorf("invalid test - cache was written")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCacheFailureToRead(t *testing.T) {
|
||||
someError := errors.New("some error")
|
||||
|
||||
cacheData := map[string][]byte{}
|
||||
cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil)
|
||||
underlyingStorage := newUnderlyingStorageForBlockCacheTesting(t)
|
||||
faultyCache := &storagetesting.FaultyStorage{
|
||||
Base: cacheStorage,
|
||||
}
|
||||
|
||||
cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{
|
||||
MaxCacheSizeBytes: 10000,
|
||||
}, 0, 5*time.Hour)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
defer cache.close()
|
||||
|
||||
ctx := context.Background()
|
||||
faultyCache.Faults = map[string][]*storagetesting.Fault{
|
||||
"GetBlock": {
|
||||
{Err: someError, Repeat: 100},
|
||||
},
|
||||
}
|
||||
|
||||
for i := 0; i < 2; i++ {
|
||||
v, err := cache.getContentBlock(ctx, "aa", "block-1", 0, 3)
|
||||
if err != nil {
|
||||
t.Errorf("read failure wasn't ignored: %v", err)
|
||||
}
|
||||
|
||||
if got, want := v, []byte{1, 2, 3}; !reflect.DeepEqual(got, want) {
|
||||
t.Errorf("unexpected value retrieved from cache: %v, want: %v", got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func verifyStorageBlockList(t *testing.T, st storage.Storage, expectedBlocks ...string) {
|
||||
t.Helper()
|
||||
var foundBlocks []string
|
||||
assertNoError(t, st.ListBlocks(context.Background(), "", func(bm storage.BlockMetadata) error {
|
||||
foundBlocks = append(foundBlocks, bm.BlockID)
|
||||
return nil
|
||||
}))
|
||||
|
||||
sort.Strings(foundBlocks)
|
||||
if !reflect.DeepEqual(foundBlocks, expectedBlocks) {
|
||||
t.Errorf("unexpected block list: %v, wanted %v", foundBlocks, expectedBlocks)
|
||||
}
|
||||
}
|
||||
|
||||
func assertNoError(t *testing.T, err error) {
|
||||
t.Helper()
|
||||
if err != nil {
|
||||
t.Errorf("err: %v", err)
|
||||
}
|
||||
}
|
||||
217
block/block_formatter.go
Normal file
217
block/block_formatter.go
Normal file
@@ -0,0 +1,217 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"crypto/hmac" //nolint:gas
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"hash"
|
||||
"sort"
|
||||
|
||||
"golang.org/x/crypto/blake2b"
|
||||
"golang.org/x/crypto/blake2s"
|
||||
"golang.org/x/crypto/salsa20"
|
||||
"golang.org/x/crypto/sha3"
|
||||
)
|
||||
|
||||
// HashFunc computes hash of block of data using a cryptographic hash function, possibly with HMAC and/or truncation.
|
||||
type HashFunc func(data []byte) []byte
|
||||
|
||||
// HashFuncFactory returns a hash function for given formatting options.
|
||||
type HashFuncFactory func(o FormattingOptions) (HashFunc, error)
|
||||
|
||||
// Encryptor performs encryption and decryption of blocks of data.
|
||||
type Encryptor interface {
|
||||
// Encrypt returns encrypted bytes corresponding to the given plaintext. Must not clobber the input slice.
|
||||
Encrypt(plainText []byte, blockID []byte) ([]byte, error)
|
||||
|
||||
// Decrypt returns unencrypted bytes corresponding to the given ciphertext. Must not clobber the input slice.
|
||||
Decrypt(cipherText []byte, blockID []byte) ([]byte, error)
|
||||
}
|
||||
|
||||
// EncryptorFactory creates new Encryptor for given FormattingOptions
|
||||
type EncryptorFactory func(o FormattingOptions) (Encryptor, error)
|
||||
|
||||
var hashFunctions = map[string]HashFuncFactory{}
|
||||
var encryptors = map[string]EncryptorFactory{}
|
||||
|
||||
// nullEncryptor implements non-encrypted format.
|
||||
type nullEncryptor struct {
|
||||
}
|
||||
|
||||
func (fi nullEncryptor) Encrypt(plainText []byte, blockID []byte) ([]byte, error) {
|
||||
return cloneBytes(plainText), nil
|
||||
}
|
||||
|
||||
func (fi nullEncryptor) Decrypt(cipherText []byte, blockID []byte) ([]byte, error) {
|
||||
return cloneBytes(cipherText), nil
|
||||
}
|
||||
|
||||
// ctrEncryptor implements encrypted format which uses CTR mode of a block cipher with nonce==IV.
|
||||
type ctrEncryptor struct {
|
||||
createCipher func() (cipher.Block, error)
|
||||
}
|
||||
|
||||
func (fi ctrEncryptor) Encrypt(plainText []byte, blockID []byte) ([]byte, error) {
|
||||
return symmetricEncrypt(fi.createCipher, blockID, plainText)
|
||||
}
|
||||
|
||||
func (fi ctrEncryptor) Decrypt(cipherText []byte, blockID []byte) ([]byte, error) {
|
||||
return symmetricEncrypt(fi.createCipher, blockID, cipherText)
|
||||
}
|
||||
|
||||
func symmetricEncrypt(createCipher func() (cipher.Block, error), iv []byte, b []byte) ([]byte, error) {
|
||||
blockCipher, err := createCipher()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ctr := cipher.NewCTR(blockCipher, iv[0:blockCipher.BlockSize()])
|
||||
result := make([]byte, len(b))
|
||||
ctr.XORKeyStream(result, b)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
type salsaEncryptor struct {
|
||||
nonceSize int
|
||||
key *[32]byte
|
||||
}
|
||||
|
||||
func (s salsaEncryptor) Decrypt(input []byte, blockID []byte) ([]byte, error) {
|
||||
return s.encryptDecrypt(input, blockID)
|
||||
}
|
||||
|
||||
func (s salsaEncryptor) Encrypt(input []byte, blockID []byte) ([]byte, error) {
|
||||
return s.encryptDecrypt(input, blockID)
|
||||
}
|
||||
|
||||
func (s salsaEncryptor) encryptDecrypt(input []byte, blockID []byte) ([]byte, error) {
|
||||
if len(blockID) < s.nonceSize {
|
||||
return nil, fmt.Errorf("hash too short, expected >=%v bytes, got %v", s.nonceSize, len(blockID))
|
||||
}
|
||||
result := make([]byte, len(input))
|
||||
nonce := blockID[0:s.nonceSize]
|
||||
salsa20.XORKeyStream(result, input, nonce, s.key)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// truncatedHMACHashFuncFactory returns a HashFuncFactory that computes HMAC(hash, secret) of a given block of bytes
|
||||
// and truncates results to the given size.
|
||||
func truncatedHMACHashFuncFactory(hf func() hash.Hash, truncate int) HashFuncFactory {
|
||||
return func(o FormattingOptions) (HashFunc, error) {
|
||||
return func(b []byte) []byte {
|
||||
h := hmac.New(hf, o.HMACSecret)
|
||||
h.Write(b) // nolint:errcheck
|
||||
return h.Sum(nil)[0:truncate]
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// truncatedKeyedHashFuncFactory returns a HashFuncFactory that computes keyed hash of a given block of bytes
|
||||
// and truncates results to the given size.
|
||||
func truncatedKeyedHashFuncFactory(hf func(key []byte) (hash.Hash, error), truncate int) HashFuncFactory {
|
||||
return func(o FormattingOptions) (HashFunc, error) {
|
||||
if _, err := hf(o.HMACSecret); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return func(b []byte) []byte {
|
||||
h, _ := hf(o.HMACSecret)
|
||||
h.Write(b) // nolint:errcheck
|
||||
return h.Sum(nil)[0:truncate]
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// newCTREncryptorFactory returns new EncryptorFactory that uses CTR with symmetric encryption (such as AES) and a given key size.
|
||||
func newCTREncryptorFactory(keySize int, createCipherWithKey func(key []byte) (cipher.Block, error)) EncryptorFactory {
|
||||
return func(o FormattingOptions) (Encryptor, error) {
|
||||
key, err := adjustKey(o.MasterKey, keySize)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get encryption key: %v", err)
|
||||
}
|
||||
|
||||
return ctrEncryptor{
|
||||
createCipher: func() (cipher.Block, error) {
|
||||
return createCipherWithKey(key)
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterHash registers a hash function with a given name.
|
||||
func RegisterHash(name string, newHashFunc HashFuncFactory) {
|
||||
hashFunctions[name] = newHashFunc
|
||||
}
|
||||
|
||||
func SupportedHashAlgorithms() []string {
|
||||
var result []string
|
||||
for k := range hashFunctions {
|
||||
result = append(result, k)
|
||||
}
|
||||
sort.Strings(result)
|
||||
return result
|
||||
}
|
||||
|
||||
func SupportedEncryptionAlgorithms() []string {
|
||||
var result []string
|
||||
for k := range encryptors {
|
||||
result = append(result, k)
|
||||
}
|
||||
sort.Strings(result)
|
||||
return result
|
||||
}
|
||||
|
||||
// RegisterEncryption registers new encryption algorithm.
|
||||
func RegisterEncryption(name string, newEncryptor EncryptorFactory) {
|
||||
encryptors[name] = newEncryptor
|
||||
}
|
||||
|
||||
// DefaultHash is the name of the default hash algorithm.
|
||||
const DefaultHash = "BLAKE2B-256-128"
|
||||
|
||||
// DefaultEncryption is the name of the default encryption algorithm.
|
||||
const DefaultEncryption = "SALSA20"
|
||||
|
||||
func init() {
|
||||
RegisterHash("HMAC-SHA256", truncatedHMACHashFuncFactory(sha256.New, 32))
|
||||
RegisterHash("HMAC-SHA256-128", truncatedHMACHashFuncFactory(sha256.New, 16))
|
||||
RegisterHash("HMAC-SHA224", truncatedHMACHashFuncFactory(sha256.New224, 28))
|
||||
RegisterHash("HMAC-SHA3-224", truncatedHMACHashFuncFactory(sha3.New224, 28))
|
||||
RegisterHash("HMAC-SHA3-256", truncatedHMACHashFuncFactory(sha3.New256, 32))
|
||||
|
||||
RegisterHash("BLAKE2S-128", truncatedKeyedHashFuncFactory(blake2s.New128, 16))
|
||||
RegisterHash("BLAKE2S-256", truncatedKeyedHashFuncFactory(blake2s.New256, 32))
|
||||
RegisterHash("BLAKE2B-256-128", truncatedKeyedHashFuncFactory(blake2b.New256, 16))
|
||||
RegisterHash("BLAKE2B-256", truncatedKeyedHashFuncFactory(blake2b.New256, 32))
|
||||
|
||||
RegisterEncryption("NONE", func(f FormattingOptions) (Encryptor, error) {
|
||||
return nullEncryptor{}, nil
|
||||
})
|
||||
RegisterEncryption("AES-128-CTR", newCTREncryptorFactory(16, aes.NewCipher))
|
||||
RegisterEncryption("AES-192-CTR", newCTREncryptorFactory(24, aes.NewCipher))
|
||||
RegisterEncryption("AES-256-CTR", newCTREncryptorFactory(32, aes.NewCipher))
|
||||
RegisterEncryption("SALSA20", func(f FormattingOptions) (Encryptor, error) {
|
||||
var k [32]byte
|
||||
copy(k[:], f.MasterKey[0:32])
|
||||
return salsaEncryptor{8, &k}, nil
|
||||
})
|
||||
RegisterEncryption("XSALSA20", func(f FormattingOptions) (Encryptor, error) {
|
||||
var k [32]byte
|
||||
copy(k[:], f.MasterKey[0:32])
|
||||
return salsaEncryptor{24, &k}, nil
|
||||
})
|
||||
}
|
||||
|
||||
func adjustKey(masterKey []byte, desiredKeySize int) ([]byte, error) {
|
||||
if len(masterKey) == desiredKeySize {
|
||||
return masterKey, nil
|
||||
}
|
||||
|
||||
if desiredKeySize < len(masterKey) {
|
||||
return masterKey[0:desiredKeySize], nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("required key too long %v, but only have %v", desiredKeySize, len(masterKey))
|
||||
}
|
||||
62
block/block_formatter_test.go
Normal file
62
block/block_formatter_test.go
Normal file
@@ -0,0 +1,62 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha1"
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// combinations of hash and encryption that are not compatible.
|
||||
var incompatibleAlgorithms = map[string]string{
|
||||
"BLAKE2B-256-128/XSALSA20": "invalid encryptor: hash too short, expected >=24 bytes, got 16",
|
||||
"BLAKE2S-128/XSALSA20": "invalid encryptor: hash too short, expected >=24 bytes, got 16",
|
||||
"HMAC-RIPEMD-160/XSALSA20": "invalid encryptor: hash too short, expected >=24 bytes, got 20",
|
||||
"HMAC-SHA256-128/XSALSA20": "invalid encryptor: hash too short, expected >=24 bytes, got 16",
|
||||
}
|
||||
|
||||
func TestFormatters(t *testing.T) {
|
||||
secret := []byte("secret")
|
||||
|
||||
data := make([]byte, 100)
|
||||
rand.Read(data)
|
||||
h0 := sha1.Sum(data)
|
||||
|
||||
for _, hashAlgo := range SupportedHashAlgorithms() {
|
||||
for _, encryptionAlgo := range SupportedEncryptionAlgorithms() {
|
||||
h, e, err := CreateHashAndEncryptor(FormattingOptions{
|
||||
HMACSecret: secret,
|
||||
MasterKey: make([]byte, 32),
|
||||
Hash: hashAlgo,
|
||||
Encryption: encryptionAlgo,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
key := hashAlgo + "/" + encryptionAlgo
|
||||
errmsg := incompatibleAlgorithms[key]
|
||||
if err.Error() == errmsg {
|
||||
continue
|
||||
}
|
||||
t.Errorf("Algorithm %v not marked as incompatible and failed with %v", key, err)
|
||||
continue
|
||||
}
|
||||
|
||||
blockID := h(data)
|
||||
cipherText, err := e.Encrypt(data, blockID)
|
||||
if err != nil || cipherText == nil {
|
||||
t.Errorf("invalid response from Encrypt: %v %v", cipherText, err)
|
||||
}
|
||||
|
||||
plainText, err := e.Decrypt(cipherText, blockID)
|
||||
if err != nil || plainText == nil {
|
||||
t.Errorf("invalid response from Decrypt: %v %v", plainText, err)
|
||||
}
|
||||
|
||||
h1 := sha1.Sum(plainText)
|
||||
|
||||
if !bytes.Equal(h0[:], h1[:]) {
|
||||
t.Errorf("Encrypt()/Decrypt() does not round-trip: %x %x", h0, h1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
11
block/block_formatting_options.go
Normal file
11
block/block_formatting_options.go
Normal file
@@ -0,0 +1,11 @@
|
||||
package block
|
||||
|
||||
// FormattingOptions describes the rules for formatting blocks in repository.
|
||||
type FormattingOptions struct {
|
||||
Version int `json:"version,omitempty"` // version number, must be "1"
|
||||
Hash string `json:"hash,omitempty"` // identifier of the hash algorithm used
|
||||
Encryption string `json:"encryption,omitempty"` // identifier of the encryption algorithm used
|
||||
HMACSecret []byte `json:"secret,omitempty"` // HMAC secret used to generate encryption keys
|
||||
MasterKey []byte `json:"masterKey,omitempty"` // master encryption key (SIV-mode encryption only)
|
||||
MaxPackSize int `json:"maxPackSize,omitempty"` // maximum size of a pack object
|
||||
}
|
||||
226
block/block_index_recovery.go
Normal file
226
block/block_index_recovery.go
Normal file
@@ -0,0 +1,226 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
// RecoverIndexFromPackFile attempts to recover index block entries from a given pack file.
|
||||
// Pack file length may be provided (if known) to reduce the number of bytes that are read from the storage.
|
||||
func (bm *Manager) RecoverIndexFromPackFile(ctx context.Context, packFile string, packFileLength int64, commit bool) ([]Info, error) {
|
||||
localIndexBytes, err := bm.readPackFileLocalIndex(ctx, packFile, packFileLength)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ndx, err := openPackIndex(bytes.NewReader(localIndexBytes))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to open index in file %v", packFile)
|
||||
}
|
||||
defer ndx.Close() //nolint:errcheck
|
||||
|
||||
var recovered []Info
|
||||
|
||||
err = ndx.Iterate("", func(i Info) error {
|
||||
recovered = append(recovered, i)
|
||||
if commit {
|
||||
bm.packIndexBuilder.Add(i)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
return recovered, err
|
||||
}
|
||||
|
||||
type packBlockPostamble struct {
|
||||
localIndexIV []byte
|
||||
localIndexOffset uint32
|
||||
localIndexLength uint32
|
||||
}
|
||||
|
||||
func (p *packBlockPostamble) toBytes() ([]byte, error) {
|
||||
// 4 varints + IV + 4 bytes of checksum + 1 byte of postamble length
|
||||
n := 0
|
||||
buf := make([]byte, 4*binary.MaxVarintLen64+len(p.localIndexIV)+4+1)
|
||||
|
||||
n += binary.PutUvarint(buf[n:], uint64(1)) // version flag
|
||||
n += binary.PutUvarint(buf[n:], uint64(len(p.localIndexIV))) // length of local index IV
|
||||
copy(buf[n:], p.localIndexIV)
|
||||
n += len(p.localIndexIV)
|
||||
n += binary.PutUvarint(buf[n:], uint64(p.localIndexOffset))
|
||||
n += binary.PutUvarint(buf[n:], uint64(p.localIndexLength))
|
||||
|
||||
checksum := crc32.ChecksumIEEE(buf[0:n])
|
||||
binary.BigEndian.PutUint32(buf[n:], checksum)
|
||||
n += 4
|
||||
if n > 255 {
|
||||
return nil, fmt.Errorf("postamble too long: %v", n)
|
||||
}
|
||||
|
||||
buf[n] = byte(n)
|
||||
return buf[0 : n+1], nil
|
||||
}
|
||||
|
||||
// findPostamble detects if a given block of bytes contains a possibly valid postamble, and returns it if so
|
||||
// NOTE, even if this function returns a postamble, it should not be trusted to be correct, since it's not
|
||||
// cryptographically signed. this is to facilitate data recovery.
|
||||
func findPostamble(b []byte) *packBlockPostamble {
|
||||
if len(b) == 0 {
|
||||
// no postamble
|
||||
return nil
|
||||
}
|
||||
|
||||
// length of postamble is the last byte
|
||||
postambleLength := int(b[len(b)-1])
|
||||
if postambleLength < 5 {
|
||||
// too short, must be at least 5 bytes (checksum + own length)
|
||||
return nil
|
||||
}
|
||||
postambleStart := len(b) - 1 - postambleLength
|
||||
postambleEnd := len(b) - 1
|
||||
if postambleStart < 0 {
|
||||
// invalid last byte
|
||||
return nil
|
||||
}
|
||||
|
||||
postambleBytes := b[postambleStart:postambleEnd]
|
||||
payload, checksumBytes := postambleBytes[0:len(postambleBytes)-4], postambleBytes[len(postambleBytes)-4:]
|
||||
checksum := binary.BigEndian.Uint32(checksumBytes)
|
||||
validChecksum := crc32.ChecksumIEEE(payload)
|
||||
|
||||
if checksum != validChecksum {
|
||||
// invalid checksum, not a valid postamble
|
||||
return nil
|
||||
}
|
||||
|
||||
return decodePostamble(payload)
|
||||
}
|
||||
|
||||
func decodePostamble(payload []byte) *packBlockPostamble {
|
||||
flags, n := binary.Uvarint(payload)
|
||||
if n <= 0 {
|
||||
// invalid flags
|
||||
return nil
|
||||
}
|
||||
if flags != 1 {
|
||||
// unsupported flag
|
||||
return nil
|
||||
}
|
||||
payload = payload[n:]
|
||||
|
||||
ivLength, n := binary.Uvarint(payload)
|
||||
if n <= 0 {
|
||||
// invalid flags
|
||||
return nil
|
||||
}
|
||||
payload = payload[n:]
|
||||
if ivLength > uint64(len(payload)) {
|
||||
// invalid IV length
|
||||
return nil
|
||||
}
|
||||
|
||||
iv := payload[0:ivLength]
|
||||
payload = payload[ivLength:]
|
||||
|
||||
off, n := binary.Uvarint(payload)
|
||||
if n <= 0 {
|
||||
// invalid offset
|
||||
return nil
|
||||
}
|
||||
payload = payload[n:]
|
||||
|
||||
length, n := binary.Uvarint(payload)
|
||||
if n <= 0 {
|
||||
// invalid offset
|
||||
return nil
|
||||
}
|
||||
|
||||
return &packBlockPostamble{
|
||||
localIndexIV: iv,
|
||||
localIndexLength: uint32(length),
|
||||
localIndexOffset: uint32(off),
|
||||
}
|
||||
}
|
||||
|
||||
func (bm *Manager) buildLocalIndex(pending packIndexBuilder) ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
if err := pending.Build(&buf); err != nil {
|
||||
return nil, fmt.Errorf("unable to build local index: %v", err)
|
||||
}
|
||||
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
// appendPackFileIndexRecoveryData appends data designed to help with recovery of pack index in case it gets damaged or lost.
|
||||
func (bm *Manager) appendPackFileIndexRecoveryData(blockData []byte, pending packIndexBuilder) ([]byte, error) {
|
||||
// build, encrypt and append local index
|
||||
localIndexOffset := len(blockData)
|
||||
localIndex, err := bm.buildLocalIndex(pending)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
localIndexIV := bm.hashData(localIndex)
|
||||
encryptedLocalIndex, err := bm.encryptor.Encrypt(localIndex, localIndexIV)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
postamble := packBlockPostamble{
|
||||
localIndexIV: localIndexIV,
|
||||
localIndexOffset: uint32(localIndexOffset),
|
||||
localIndexLength: uint32(len(localIndex)),
|
||||
}
|
||||
|
||||
blockData = append(blockData, encryptedLocalIndex...)
|
||||
postambleBytes, err := postamble.toBytes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
blockData = append(blockData, postambleBytes...)
|
||||
|
||||
pa2 := findPostamble(blockData)
|
||||
if pa2 == nil {
|
||||
log.Fatalf("invalid postamble written, that could not be immediately decoded, it's a bug")
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(postamble, *pa2) {
|
||||
log.Fatalf("postamble did not round-trip: %v %v", postamble, *pa2)
|
||||
}
|
||||
|
||||
return blockData, nil
|
||||
}
|
||||
|
||||
func (bm *Manager) readPackFileLocalIndex(ctx context.Context, packFile string, packFileLength int64) ([]byte, error) {
|
||||
payload, err := bm.st.GetBlock(ctx, packFile, 0, -1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
postamble := findPostamble(payload)
|
||||
if postamble == nil {
|
||||
return nil, fmt.Errorf("unable to find valid postamble in file %v", packFile)
|
||||
}
|
||||
|
||||
if uint64(postamble.localIndexOffset+postamble.localIndexLength) > uint64(len(payload)) {
|
||||
// invalid offset/length
|
||||
return nil, fmt.Errorf("unable to find valid local index in file %v", packFile)
|
||||
}
|
||||
|
||||
encryptedLocalIndexBytes := payload[postamble.localIndexOffset : postamble.localIndexOffset+postamble.localIndexLength]
|
||||
if encryptedLocalIndexBytes == nil {
|
||||
return nil, fmt.Errorf("unable to find valid local index in file %v", packFile)
|
||||
}
|
||||
|
||||
localIndexBytes, err := bm.decryptAndVerify(encryptedLocalIndexBytes, postamble.localIndexIV)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to decrypt local index: %v", err)
|
||||
}
|
||||
|
||||
return localIndexBytes, nil
|
||||
}
|
||||
90
block/block_index_recovery_test.go
Normal file
90
block/block_index_recovery_test.go
Normal file
@@ -0,0 +1,90 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
func TestBlockIndexRecovery(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
|
||||
block2 := writeBlockAndVerify(ctx, t, bm, seededRandomData(11, 100))
|
||||
block3 := writeBlockAndVerify(ctx, t, bm, seededRandomData(12, 100))
|
||||
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
}
|
||||
|
||||
// delete all index blocks
|
||||
assertNoError(t, bm.st.ListBlocks(ctx, newIndexBlockPrefix, func(bi storage.BlockMetadata) error {
|
||||
log.Debugf("deleting %v", bi.BlockID)
|
||||
return bm.st.DeleteBlock(ctx, bi.BlockID)
|
||||
}))
|
||||
|
||||
// now with index blocks gone, all blocks appear to not be found
|
||||
bm = newTestBlockManager(data, keyTime, nil)
|
||||
verifyBlockNotFound(ctx, t, bm, block1)
|
||||
verifyBlockNotFound(ctx, t, bm, block2)
|
||||
verifyBlockNotFound(ctx, t, bm, block3)
|
||||
|
||||
totalRecovered := 0
|
||||
|
||||
// pass 1 - just list blocks to recover, but don't commit
|
||||
err := bm.st.ListBlocks(ctx, PackBlockPrefix, func(bi storage.BlockMetadata) error {
|
||||
infos, err := bm.RecoverIndexFromPackFile(ctx, bi.BlockID, bi.Length, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
totalRecovered += len(infos)
|
||||
log.Debugf("recovered %v blocks", len(infos))
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Errorf("error recovering: %v", err)
|
||||
}
|
||||
|
||||
if got, want := totalRecovered, 3; got != want {
|
||||
t.Errorf("invalid # of blocks recovered: %v, want %v", got, want)
|
||||
}
|
||||
|
||||
// blocks are stil not found
|
||||
verifyBlockNotFound(ctx, t, bm, block1)
|
||||
verifyBlockNotFound(ctx, t, bm, block2)
|
||||
verifyBlockNotFound(ctx, t, bm, block3)
|
||||
|
||||
// pass 2 now pass commit=true to add recovered blocks to index
|
||||
totalRecovered = 0
|
||||
|
||||
err = bm.st.ListBlocks(ctx, PackBlockPrefix, func(bi storage.BlockMetadata) error {
|
||||
infos, err := bm.RecoverIndexFromPackFile(ctx, bi.BlockID, bi.Length, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
totalRecovered += len(infos)
|
||||
log.Debugf("recovered %v blocks", len(infos))
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Errorf("error recovering: %v", err)
|
||||
}
|
||||
|
||||
if got, want := totalRecovered, 3; got != want {
|
||||
t.Errorf("invalid # of blocks recovered: %v, want %v", got, want)
|
||||
}
|
||||
|
||||
verifyBlock(ctx, t, bm, block1, seededRandomData(10, 100))
|
||||
verifyBlock(ctx, t, bm, block2, seededRandomData(11, 100))
|
||||
verifyBlock(ctx, t, bm, block3, seededRandomData(12, 100))
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
}
|
||||
verifyBlock(ctx, t, bm, block1, seededRandomData(10, 100))
|
||||
verifyBlock(ctx, t, bm, block2, seededRandomData(11, 100))
|
||||
verifyBlock(ctx, t, bm, block3, seededRandomData(12, 100))
|
||||
}
|
||||
1039
block/block_manager.go
Normal file
1039
block/block_manager.go
Normal file
File diff suppressed because it is too large
Load Diff
148
block/block_manager_compaction.go
Normal file
148
block/block_manager_compaction.go
Normal file
@@ -0,0 +1,148 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
var autoCompactionOptions = CompactOptions{
|
||||
MinSmallBlocks: 4 * parallelFetches,
|
||||
MaxSmallBlocks: 64,
|
||||
}
|
||||
|
||||
// CompactOptions provides options for compaction
|
||||
type CompactOptions struct {
|
||||
MinSmallBlocks int
|
||||
MaxSmallBlocks int
|
||||
AllBlocks bool
|
||||
SkipDeletedOlderThan time.Duration
|
||||
}
|
||||
|
||||
// CompactIndexes performs compaction of index blocks ensuring that # of small blocks is between minSmallBlockCount and maxSmallBlockCount
|
||||
func (bm *Manager) CompactIndexes(ctx context.Context, opt CompactOptions) error {
|
||||
log.Debugf("CompactIndexes(%+v)", opt)
|
||||
if opt.MaxSmallBlocks < opt.MinSmallBlocks {
|
||||
return fmt.Errorf("invalid block counts")
|
||||
}
|
||||
|
||||
indexBlocks, _, err := bm.loadPackIndexesUnlocked(ctx)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "error loading indexes")
|
||||
}
|
||||
|
||||
blocksToCompact := bm.getBlocksToCompact(indexBlocks, opt)
|
||||
|
||||
if err := bm.compactAndDeleteIndexBlocks(ctx, blocksToCompact, opt); err != nil {
|
||||
log.Warningf("error performing quick compaction: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (bm *Manager) getBlocksToCompact(indexBlocks []IndexInfo, opt CompactOptions) []IndexInfo {
|
||||
var nonCompactedBlocks []IndexInfo
|
||||
var totalSizeNonCompactedBlocks int64
|
||||
|
||||
var verySmallBlocks []IndexInfo
|
||||
var totalSizeVerySmallBlocks int64
|
||||
|
||||
var mediumSizedBlocks []IndexInfo
|
||||
var totalSizeMediumSizedBlocks int64
|
||||
|
||||
for _, b := range indexBlocks {
|
||||
if b.Length > int64(bm.maxPackSize) && !opt.AllBlocks {
|
||||
continue
|
||||
}
|
||||
|
||||
nonCompactedBlocks = append(nonCompactedBlocks, b)
|
||||
if b.Length < int64(bm.maxPackSize/20) {
|
||||
verySmallBlocks = append(verySmallBlocks, b)
|
||||
totalSizeVerySmallBlocks += b.Length
|
||||
} else {
|
||||
mediumSizedBlocks = append(mediumSizedBlocks, b)
|
||||
totalSizeMediumSizedBlocks += b.Length
|
||||
}
|
||||
totalSizeNonCompactedBlocks += b.Length
|
||||
}
|
||||
|
||||
if len(nonCompactedBlocks) < opt.MinSmallBlocks {
|
||||
// current count is below min allowed - nothing to do
|
||||
formatLog.Debugf("no small blocks to compact")
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(verySmallBlocks) > len(nonCompactedBlocks)/2 && len(mediumSizedBlocks)+1 < opt.MinSmallBlocks {
|
||||
formatLog.Debugf("compacting %v very small blocks", len(verySmallBlocks))
|
||||
return verySmallBlocks
|
||||
}
|
||||
|
||||
formatLog.Debugf("compacting all %v non-compacted blocks", len(nonCompactedBlocks))
|
||||
return nonCompactedBlocks
|
||||
}
|
||||
|
||||
func (bm *Manager) compactAndDeleteIndexBlocks(ctx context.Context, indexBlocks []IndexInfo, opt CompactOptions) error {
|
||||
if len(indexBlocks) <= 1 {
|
||||
return nil
|
||||
}
|
||||
formatLog.Debugf("compacting %v blocks", len(indexBlocks))
|
||||
t0 := time.Now()
|
||||
|
||||
bld := make(packIndexBuilder)
|
||||
for _, indexBlock := range indexBlocks {
|
||||
if err := bm.addIndexBlocksToBuilder(ctx, bld, indexBlock, opt); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := bld.Build(&buf); err != nil {
|
||||
return errors.Wrap(err, "unable to build an index")
|
||||
}
|
||||
|
||||
compactedIndexBlock, err := bm.writePackIndexesNew(ctx, buf.Bytes())
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "unable to write compacted indexes")
|
||||
}
|
||||
|
||||
formatLog.Debugf("wrote compacted index (%v bytes) in %v", compactedIndexBlock, time.Since(t0))
|
||||
|
||||
for _, indexBlock := range indexBlocks {
|
||||
if indexBlock.FileName == compactedIndexBlock {
|
||||
continue
|
||||
}
|
||||
|
||||
bm.listCache.deleteListCache(ctx)
|
||||
if err := bm.st.DeleteBlock(ctx, indexBlock.FileName); err != nil {
|
||||
log.Warningf("unable to delete compacted block %q: %v", indexBlock.FileName, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (bm *Manager) addIndexBlocksToBuilder(ctx context.Context, bld packIndexBuilder, indexBlock IndexInfo, opt CompactOptions) error {
|
||||
data, err := bm.getPhysicalBlockInternal(ctx, indexBlock.FileName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
index, err := openPackIndex(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open index block %q: %v", indexBlock, err)
|
||||
}
|
||||
|
||||
_ = index.Iterate("", func(i Info) error {
|
||||
if i.Deleted && opt.SkipDeletedOlderThan > 0 && time.Since(i.Timestamp()) > opt.SkipDeletedOlderThan {
|
||||
log.Debugf("skipping block %v deleted at %v", i.BlockID, i.Timestamp())
|
||||
return nil
|
||||
}
|
||||
bld.Add(i)
|
||||
return nil
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
909
block/block_manager_test.go
Normal file
909
block/block_manager_test.go
Normal file
@@ -0,0 +1,909 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/hmac"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/internal/storagetesting"
|
||||
"github.com/kopia/repo/storage"
|
||||
logging "github.com/op/go-logging"
|
||||
)
|
||||
|
||||
const (
|
||||
maxPackSize = 2000
|
||||
)
|
||||
|
||||
var fakeTime = time.Date(2017, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
var hmacSecret = []byte{1, 2, 3}
|
||||
|
||||
func init() {
|
||||
logging.SetLevel(logging.DEBUG, "")
|
||||
}
|
||||
|
||||
func TestBlockManagerEmptyFlush(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
bm.Flush(ctx)
|
||||
if got, want := len(data), 0; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlockZeroBytes1(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
blockID := writeBlockAndVerify(ctx, t, bm, []byte{})
|
||||
bm.Flush(ctx)
|
||||
if got, want := len(data), 2; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
dumpBlockManagerData(t, data)
|
||||
bm = newTestBlockManager(data, keyTime, nil)
|
||||
verifyBlock(ctx, t, bm, blockID, []byte{})
|
||||
}
|
||||
|
||||
func TestBlockZeroBytes2(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 10))
|
||||
writeBlockAndVerify(ctx, t, bm, []byte{})
|
||||
bm.Flush(ctx)
|
||||
if got, want := len(data), 2; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
dumpBlockManagerData(t, data)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlockManagerSmallBlockWrites(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
writeBlockAndVerify(ctx, t, bm, seededRandomData(i, 10))
|
||||
}
|
||||
if got, want := len(data), 0; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
bm.Flush(ctx)
|
||||
if got, want := len(data), 2; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlockManagerDedupesPendingBlocks(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
writeBlockAndVerify(ctx, t, bm, seededRandomData(0, 999))
|
||||
}
|
||||
if got, want := len(data), 0; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
bm.Flush(ctx)
|
||||
if got, want := len(data), 2; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlockManagerDedupesPendingAndUncommittedBlocks(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
|
||||
// no writes here, all data fits in a single pack.
|
||||
writeBlockAndVerify(ctx, t, bm, seededRandomData(0, 950))
|
||||
writeBlockAndVerify(ctx, t, bm, seededRandomData(1, 950))
|
||||
writeBlockAndVerify(ctx, t, bm, seededRandomData(2, 10))
|
||||
if got, want := len(data), 0; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
|
||||
// no writes here
|
||||
writeBlockAndVerify(ctx, t, bm, seededRandomData(0, 950))
|
||||
writeBlockAndVerify(ctx, t, bm, seededRandomData(1, 950))
|
||||
writeBlockAndVerify(ctx, t, bm, seededRandomData(2, 10))
|
||||
if got, want := len(data), 0; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
bm.Flush(ctx)
|
||||
|
||||
// this flushes the pack block + index block
|
||||
if got, want := len(data), 2; got != want {
|
||||
dumpBlockManagerData(t, data)
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlockManagerEmpty(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
|
||||
noSuchBlockID := string(hashValue([]byte("foo")))
|
||||
|
||||
b, err := bm.GetBlock(ctx, noSuchBlockID)
|
||||
if err != storage.ErrBlockNotFound {
|
||||
t.Errorf("unexpected error when getting non-existent block: %v, %v", b, err)
|
||||
}
|
||||
|
||||
bi, err := bm.BlockInfo(ctx, noSuchBlockID)
|
||||
if err != storage.ErrBlockNotFound {
|
||||
t.Errorf("unexpected error when getting non-existent block info: %v, %v", bi, err)
|
||||
}
|
||||
|
||||
if got, want := len(data), 0; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func verifyActiveIndexBlockCount(ctx context.Context, t *testing.T, bm *Manager, expected int) {
|
||||
t.Helper()
|
||||
|
||||
blks, err := bm.IndexBlocks(ctx)
|
||||
if err != nil {
|
||||
t.Errorf("error listing active index blocks: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if got, want := len(blks), expected; got != want {
|
||||
t.Errorf("unexpected number of active index blocks %v, expected %v (%v)", got, want, blks)
|
||||
}
|
||||
}
|
||||
func TestBlockManagerInternalFlush(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
b := make([]byte, 25)
|
||||
rand.Read(b)
|
||||
writeBlockAndVerify(ctx, t, bm, b)
|
||||
}
|
||||
|
||||
// 1 data block written, but no index yet.
|
||||
if got, want := len(data), 1; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
|
||||
// do it again - should be 2 blocks + 1000 bytes pending.
|
||||
for i := 0; i < 100; i++ {
|
||||
b := make([]byte, 25)
|
||||
rand.Read(b)
|
||||
writeBlockAndVerify(ctx, t, bm, b)
|
||||
}
|
||||
|
||||
// 2 data blocks written, but no index yet.
|
||||
if got, want := len(data), 2; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
|
||||
bm.Flush(ctx)
|
||||
|
||||
// third block gets written, followed by index.
|
||||
if got, want := len(data), 4; got != want {
|
||||
dumpBlockManagerData(t, data)
|
||||
t.Errorf("unexpected number of blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlockManagerWriteMultiple(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
timeFunc := fakeTimeNowWithAutoAdvance(fakeTime, 1*time.Second)
|
||||
bm := newTestBlockManager(data, keyTime, timeFunc)
|
||||
|
||||
var blockIDs []string
|
||||
|
||||
for i := 0; i < 5000; i++ {
|
||||
//t.Logf("i=%v", i)
|
||||
b := seededRandomData(i, i%113)
|
||||
blkID, err := bm.WriteBlock(ctx, b, "")
|
||||
if err != nil {
|
||||
t.Errorf("err: %v", err)
|
||||
}
|
||||
|
||||
blockIDs = append(blockIDs, blkID)
|
||||
|
||||
if i%17 == 0 {
|
||||
//t.Logf("flushing %v", i)
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Fatalf("error flushing: %v", err)
|
||||
}
|
||||
//dumpBlockManagerData(t, data)
|
||||
}
|
||||
|
||||
if i%41 == 0 {
|
||||
//t.Logf("opening new manager: %v", i)
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Fatalf("error flushing: %v", err)
|
||||
}
|
||||
//t.Logf("data block count: %v", len(data))
|
||||
//dumpBlockManagerData(t, data)
|
||||
bm = newTestBlockManager(data, keyTime, timeFunc)
|
||||
}
|
||||
|
||||
pos := rand.Intn(len(blockIDs))
|
||||
if _, err := bm.GetBlock(ctx, blockIDs[pos]); err != nil {
|
||||
dumpBlockManagerData(t, data)
|
||||
t.Fatalf("can't read block %q: %v", blockIDs[pos], err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This is regression test for a bug where we would corrupt data when encryption
|
||||
// was done in place and clobbered pending data in memory.
|
||||
func TestBlockManagerFailedToWritePack(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
st := storagetesting.NewMapStorage(data, keyTime, nil)
|
||||
faulty := &storagetesting.FaultyStorage{
|
||||
Base: st,
|
||||
}
|
||||
st = faulty
|
||||
|
||||
bm, err := newManagerWithOptions(context.Background(), st, FormattingOptions{
|
||||
Version: 1,
|
||||
Hash: "HMAC-SHA256-128",
|
||||
Encryption: "AES-256-CTR",
|
||||
MaxPackSize: maxPackSize,
|
||||
HMACSecret: []byte("foo"),
|
||||
MasterKey: []byte("0123456789abcdef0123456789abcdef"),
|
||||
}, CachingOptions{}, fakeTimeNowFrozen(fakeTime), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("can't create bm: %v", err)
|
||||
}
|
||||
logging.SetLevel(logging.DEBUG, "faulty-storage")
|
||||
|
||||
faulty.Faults = map[string][]*storagetesting.Fault{
|
||||
"PutBlock": {
|
||||
{Err: errors.New("booboo")},
|
||||
},
|
||||
}
|
||||
|
||||
b1, err := bm.WriteBlock(ctx, seededRandomData(1, 10), "")
|
||||
if err != nil {
|
||||
t.Fatalf("can't create block: %v", err)
|
||||
}
|
||||
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Logf("expected flush error: %v", err)
|
||||
}
|
||||
|
||||
verifyBlock(ctx, t, bm, b1, seededRandomData(1, 10))
|
||||
}
|
||||
|
||||
func TestBlockManagerConcurrency(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
preexistingBlock := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
|
||||
bm.Flush(ctx)
|
||||
|
||||
dumpBlockManagerData(t, data)
|
||||
bm1 := newTestBlockManager(data, keyTime, nil)
|
||||
bm2 := newTestBlockManager(data, keyTime, nil)
|
||||
bm3 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(fakeTime.Add(1), 1*time.Second))
|
||||
|
||||
// all bm* can see pre-existing block
|
||||
verifyBlock(ctx, t, bm1, preexistingBlock, seededRandomData(10, 100))
|
||||
verifyBlock(ctx, t, bm2, preexistingBlock, seededRandomData(10, 100))
|
||||
verifyBlock(ctx, t, bm3, preexistingBlock, seededRandomData(10, 100))
|
||||
|
||||
// write the same block in all managers.
|
||||
sharedBlock := writeBlockAndVerify(ctx, t, bm1, seededRandomData(20, 100))
|
||||
writeBlockAndVerify(ctx, t, bm2, seededRandomData(20, 100))
|
||||
writeBlockAndVerify(ctx, t, bm3, seededRandomData(20, 100))
|
||||
|
||||
// write unique block per manager.
|
||||
bm1block := writeBlockAndVerify(ctx, t, bm1, seededRandomData(31, 100))
|
||||
bm2block := writeBlockAndVerify(ctx, t, bm2, seededRandomData(32, 100))
|
||||
bm3block := writeBlockAndVerify(ctx, t, bm3, seededRandomData(33, 100))
|
||||
|
||||
// make sure they can't see each other's unflushed blocks.
|
||||
verifyBlockNotFound(ctx, t, bm1, bm2block)
|
||||
verifyBlockNotFound(ctx, t, bm1, bm3block)
|
||||
verifyBlockNotFound(ctx, t, bm2, bm1block)
|
||||
verifyBlockNotFound(ctx, t, bm2, bm3block)
|
||||
verifyBlockNotFound(ctx, t, bm3, bm1block)
|
||||
verifyBlockNotFound(ctx, t, bm3, bm2block)
|
||||
|
||||
// now flush all writers, they still can't see each others' data.
|
||||
bm1.Flush(ctx)
|
||||
bm2.Flush(ctx)
|
||||
bm3.Flush(ctx)
|
||||
verifyBlockNotFound(ctx, t, bm1, bm2block)
|
||||
verifyBlockNotFound(ctx, t, bm1, bm3block)
|
||||
verifyBlockNotFound(ctx, t, bm2, bm1block)
|
||||
verifyBlockNotFound(ctx, t, bm2, bm3block)
|
||||
verifyBlockNotFound(ctx, t, bm3, bm1block)
|
||||
verifyBlockNotFound(ctx, t, bm3, bm2block)
|
||||
|
||||
// new block manager at this point can see all data.
|
||||
bm4 := newTestBlockManager(data, keyTime, nil)
|
||||
verifyBlock(ctx, t, bm4, preexistingBlock, seededRandomData(10, 100))
|
||||
verifyBlock(ctx, t, bm4, sharedBlock, seededRandomData(20, 100))
|
||||
verifyBlock(ctx, t, bm4, bm1block, seededRandomData(31, 100))
|
||||
verifyBlock(ctx, t, bm4, bm2block, seededRandomData(32, 100))
|
||||
verifyBlock(ctx, t, bm4, bm3block, seededRandomData(33, 100))
|
||||
|
||||
if got, want := getIndexCount(data), 4; got != want {
|
||||
t.Errorf("unexpected index count before compaction: %v, wanted %v", got, want)
|
||||
}
|
||||
|
||||
if err := bm4.CompactIndexes(ctx, CompactOptions{
|
||||
MinSmallBlocks: 1,
|
||||
MaxSmallBlocks: 1,
|
||||
}); err != nil {
|
||||
t.Errorf("compaction error: %v", err)
|
||||
}
|
||||
if got, want := getIndexCount(data), 1; got != want {
|
||||
t.Errorf("unexpected index count after compaction: %v, wanted %v", got, want)
|
||||
}
|
||||
|
||||
// new block manager at this point can see all data.
|
||||
bm5 := newTestBlockManager(data, keyTime, nil)
|
||||
verifyBlock(ctx, t, bm5, preexistingBlock, seededRandomData(10, 100))
|
||||
verifyBlock(ctx, t, bm5, sharedBlock, seededRandomData(20, 100))
|
||||
verifyBlock(ctx, t, bm5, bm1block, seededRandomData(31, 100))
|
||||
verifyBlock(ctx, t, bm5, bm2block, seededRandomData(32, 100))
|
||||
verifyBlock(ctx, t, bm5, bm3block, seededRandomData(33, 100))
|
||||
if err := bm5.CompactIndexes(ctx, CompactOptions{
|
||||
MinSmallBlocks: 1,
|
||||
MaxSmallBlocks: 1,
|
||||
}); err != nil {
|
||||
t.Errorf("compaction error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteBlock(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
|
||||
bm.Flush(ctx)
|
||||
block2 := writeBlockAndVerify(ctx, t, bm, seededRandomData(11, 100))
|
||||
if err := bm.DeleteBlock(block1); err != nil {
|
||||
t.Errorf("unable to delete block: %v", block1)
|
||||
}
|
||||
if err := bm.DeleteBlock(block2); err != nil {
|
||||
t.Errorf("unable to delete block: %v", block1)
|
||||
}
|
||||
verifyBlockNotFound(ctx, t, bm, block1)
|
||||
verifyBlockNotFound(ctx, t, bm, block2)
|
||||
bm.Flush(ctx)
|
||||
log.Debugf("-----------")
|
||||
bm = newTestBlockManager(data, keyTime, nil)
|
||||
//dumpBlockManagerData(t, data)
|
||||
verifyBlockNotFound(ctx, t, bm, block1)
|
||||
verifyBlockNotFound(ctx, t, bm, block2)
|
||||
}
|
||||
|
||||
func TestRewriteNonDeleted(t *testing.T) {
|
||||
const stepBehaviors = 3
|
||||
|
||||
// perform a sequence WriteBlock() <action1> RewriteBlock() <action2> GetBlock()
|
||||
// where actionX can be (0=flush and reopen, 1=flush, 2=nothing)
|
||||
for action1 := 0; action1 < stepBehaviors; action1++ {
|
||||
for action2 := 0; action2 < stepBehaviors; action2++ {
|
||||
t.Run(fmt.Sprintf("case-%v-%v", action1, action2), func(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
fakeNow := fakeTimeNowWithAutoAdvance(fakeTime, 1*time.Second)
|
||||
bm := newTestBlockManager(data, keyTime, fakeNow)
|
||||
|
||||
applyStep := func(action int) {
|
||||
switch action {
|
||||
case 0:
|
||||
t.Logf("flushing and reopening")
|
||||
bm.Flush(ctx)
|
||||
bm = newTestBlockManager(data, keyTime, fakeNow)
|
||||
case 1:
|
||||
t.Logf("flushing")
|
||||
bm.Flush(ctx)
|
||||
case 2:
|
||||
t.Logf("doing nothing")
|
||||
}
|
||||
}
|
||||
|
||||
block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
|
||||
applyStep(action1)
|
||||
assertNoError(t, bm.RewriteBlock(ctx, block1))
|
||||
applyStep(action2)
|
||||
verifyBlock(ctx, t, bm, block1, seededRandomData(10, 100))
|
||||
dumpBlockManagerData(t, data)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDisableFlush(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
bm.DisableIndexFlush()
|
||||
bm.DisableIndexFlush()
|
||||
for i := 0; i < 500; i++ {
|
||||
writeBlockAndVerify(ctx, t, bm, seededRandomData(i, 100))
|
||||
}
|
||||
bm.Flush(ctx) // flush will not have effect
|
||||
bm.EnableIndexFlush()
|
||||
bm.Flush(ctx) // flush will not have effect
|
||||
bm.EnableIndexFlush()
|
||||
|
||||
verifyActiveIndexBlockCount(ctx, t, bm, 0)
|
||||
bm.EnableIndexFlush()
|
||||
verifyActiveIndexBlockCount(ctx, t, bm, 0)
|
||||
bm.Flush(ctx) // flush will happen now
|
||||
verifyActiveIndexBlockCount(ctx, t, bm, 1)
|
||||
}
|
||||
|
||||
func TestRewriteDeleted(t *testing.T) {
|
||||
const stepBehaviors = 3
|
||||
|
||||
// perform a sequence WriteBlock() <action1> Delete() <action2> RewriteBlock() <action3> GetBlock()
|
||||
// where actionX can be (0=flush and reopen, 1=flush, 2=nothing)
|
||||
for action1 := 0; action1 < stepBehaviors; action1++ {
|
||||
for action2 := 0; action2 < stepBehaviors; action2++ {
|
||||
for action3 := 0; action3 < stepBehaviors; action3++ {
|
||||
t.Run(fmt.Sprintf("case-%v-%v-%v", action1, action2, action3), func(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
fakeNow := fakeTimeNowWithAutoAdvance(fakeTime, 1*time.Second)
|
||||
bm := newTestBlockManager(data, keyTime, fakeNow)
|
||||
|
||||
applyStep := func(action int) {
|
||||
switch action {
|
||||
case 0:
|
||||
t.Logf("flushing and reopening")
|
||||
bm.Flush(ctx)
|
||||
bm = newTestBlockManager(data, keyTime, fakeNow)
|
||||
case 1:
|
||||
t.Logf("flushing")
|
||||
bm.Flush(ctx)
|
||||
case 2:
|
||||
t.Logf("doing nothing")
|
||||
}
|
||||
}
|
||||
|
||||
block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
|
||||
applyStep(action1)
|
||||
assertNoError(t, bm.DeleteBlock(block1))
|
||||
applyStep(action2)
|
||||
if got, want := bm.RewriteBlock(ctx, block1), storage.ErrBlockNotFound; got != want && got != nil {
|
||||
t.Errorf("unexpected error %v, wanted %v", got, want)
|
||||
}
|
||||
applyStep(action3)
|
||||
verifyBlockNotFound(ctx, t, bm, block1)
|
||||
dumpBlockManagerData(t, data)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteAndRecreate(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
// simulate race between delete/recreate and delete
|
||||
// delete happens at t0+10, recreate at t0+20 and second delete time is parameterized.
|
||||
// depending on it, the second delete results will be visible.
|
||||
cases := []struct {
|
||||
desc string
|
||||
deletionTime time.Time
|
||||
isVisible bool
|
||||
}{
|
||||
{"deleted before delete and-recreate", fakeTime.Add(5 * time.Second), true},
|
||||
//{"deleted after delete and recreate", fakeTime.Add(25 * time.Second), false},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
// write a block
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, fakeTimeNowFrozen(fakeTime))
|
||||
block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
|
||||
bm.Flush(ctx)
|
||||
|
||||
// delete but at given timestamp but don't commit yet.
|
||||
bm0 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(tc.deletionTime, 1*time.Second))
|
||||
assertNoError(t, bm0.DeleteBlock(block1))
|
||||
|
||||
// delete it at t0+10
|
||||
bm1 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(fakeTime.Add(10*time.Second), 1*time.Second))
|
||||
verifyBlock(ctx, t, bm1, block1, seededRandomData(10, 100))
|
||||
assertNoError(t, bm1.DeleteBlock(block1))
|
||||
bm1.Flush(ctx)
|
||||
|
||||
// recreate at t0+20
|
||||
bm2 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(fakeTime.Add(20*time.Second), 1*time.Second))
|
||||
block2 := writeBlockAndVerify(ctx, t, bm2, seededRandomData(10, 100))
|
||||
bm2.Flush(ctx)
|
||||
|
||||
// commit deletion from bm0 (t0+5)
|
||||
bm0.Flush(ctx)
|
||||
|
||||
//dumpBlockManagerData(t, data)
|
||||
|
||||
if block1 != block2 {
|
||||
t.Errorf("got invalid block %v, expected %v", block2, block1)
|
||||
}
|
||||
|
||||
bm3 := newTestBlockManager(data, keyTime, nil)
|
||||
dumpBlockManagerData(t, data)
|
||||
if tc.isVisible {
|
||||
verifyBlock(ctx, t, bm3, block1, seededRandomData(10, 100))
|
||||
} else {
|
||||
verifyBlockNotFound(ctx, t, bm3, block1)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindUnreferencedStorageFiles(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
verifyUnreferencedStorageFilesCount(ctx, t, bm, 0)
|
||||
blockID := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
}
|
||||
verifyUnreferencedStorageFilesCount(ctx, t, bm, 0)
|
||||
if err := bm.DeleteBlock(blockID); err != nil {
|
||||
t.Errorf("error deleting block: %v", blockID)
|
||||
}
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
}
|
||||
|
||||
// block still present in first pack
|
||||
verifyUnreferencedStorageFilesCount(ctx, t, bm, 0)
|
||||
|
||||
assertNoError(t, bm.RewriteBlock(ctx, blockID))
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
}
|
||||
verifyUnreferencedStorageFilesCount(ctx, t, bm, 1)
|
||||
assertNoError(t, bm.RewriteBlock(ctx, blockID))
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
}
|
||||
verifyUnreferencedStorageFilesCount(ctx, t, bm, 2)
|
||||
}
|
||||
|
||||
func TestFindUnreferencedStorageFiles2(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, nil)
|
||||
verifyUnreferencedStorageFilesCount(ctx, t, bm, 0)
|
||||
blockID := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100))
|
||||
writeBlockAndVerify(ctx, t, bm, seededRandomData(11, 100))
|
||||
dumpBlocks(t, bm, "after writing")
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
}
|
||||
dumpBlocks(t, bm, "after flush")
|
||||
verifyUnreferencedStorageFilesCount(ctx, t, bm, 0)
|
||||
if err := bm.DeleteBlock(blockID); err != nil {
|
||||
t.Errorf("error deleting block: %v", blockID)
|
||||
}
|
||||
dumpBlocks(t, bm, "after delete")
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
}
|
||||
dumpBlocks(t, bm, "after flush")
|
||||
// block present in first pack, original pack is still referenced
|
||||
verifyUnreferencedStorageFilesCount(ctx, t, bm, 0)
|
||||
}
|
||||
|
||||
func dumpBlocks(t *testing.T, bm *Manager, caption string) {
|
||||
t.Helper()
|
||||
infos, err := bm.ListBlockInfos("", true)
|
||||
if err != nil {
|
||||
t.Errorf("error listing blocks: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("**** dumping %v blocks %v", len(infos), caption)
|
||||
for i, bi := range infos {
|
||||
log.Debugf(" bi[%v]=%#v", i, bi)
|
||||
}
|
||||
log.Infof("finished dumping %v blocks", len(infos))
|
||||
}
|
||||
|
||||
func verifyUnreferencedStorageFilesCount(ctx context.Context, t *testing.T, bm *Manager, want int) {
|
||||
t.Helper()
|
||||
unref, err := bm.FindUnreferencedStorageFiles(ctx)
|
||||
if err != nil {
|
||||
t.Errorf("error in FindUnreferencedStorageFiles: %v", err)
|
||||
}
|
||||
|
||||
log.Infof("got %v expecting %v", unref, want)
|
||||
if got := len(unref); got != want {
|
||||
t.Errorf("invalid number of unreferenced blocks: %v, wanted %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlockWriteAliasing(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, fakeTimeNowFrozen(fakeTime))
|
||||
|
||||
blockData := []byte{100, 0, 0}
|
||||
id1 := writeBlockAndVerify(ctx, t, bm, blockData)
|
||||
blockData[0] = 101
|
||||
id2 := writeBlockAndVerify(ctx, t, bm, blockData)
|
||||
bm.Flush(ctx)
|
||||
blockData[0] = 102
|
||||
id3 := writeBlockAndVerify(ctx, t, bm, blockData)
|
||||
blockData[0] = 103
|
||||
id4 := writeBlockAndVerify(ctx, t, bm, blockData)
|
||||
verifyBlock(ctx, t, bm, id1, []byte{100, 0, 0})
|
||||
verifyBlock(ctx, t, bm, id2, []byte{101, 0, 0})
|
||||
verifyBlock(ctx, t, bm, id3, []byte{102, 0, 0})
|
||||
verifyBlock(ctx, t, bm, id4, []byte{103, 0, 0})
|
||||
}
|
||||
|
||||
func TestBlockReadAliasing(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
bm := newTestBlockManager(data, keyTime, fakeTimeNowFrozen(fakeTime))
|
||||
|
||||
blockData := []byte{100, 0, 0}
|
||||
id1 := writeBlockAndVerify(ctx, t, bm, blockData)
|
||||
blockData2, err := bm.GetBlock(ctx, id1)
|
||||
if err != nil {
|
||||
t.Fatalf("can't get block data: %v", err)
|
||||
}
|
||||
|
||||
blockData2[0]++
|
||||
verifyBlock(ctx, t, bm, id1, blockData)
|
||||
bm.Flush(ctx)
|
||||
verifyBlock(ctx, t, bm, id1, blockData)
|
||||
}
|
||||
|
||||
func TestVersionCompatibility(t *testing.T) {
|
||||
for writeVer := minSupportedReadVersion; writeVer <= currentWriteVersion; writeVer++ {
|
||||
t.Run(fmt.Sprintf("version-%v", writeVer), func(t *testing.T) {
|
||||
verifyVersionCompat(t, writeVer)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func verifyVersionCompat(t *testing.T, writeVersion int) {
|
||||
ctx := context.Background()
|
||||
|
||||
// create block manager that writes 'writeVersion' and reads all versions >= minSupportedReadVersion
|
||||
data := map[string][]byte{}
|
||||
keyTime := map[string]time.Time{}
|
||||
mgr := newTestBlockManager(data, keyTime, nil)
|
||||
mgr.writeFormatVersion = int32(writeVersion)
|
||||
|
||||
dataSet := map[string][]byte{}
|
||||
|
||||
for i := 0; i < 3000000; i = (i + 1) * 2 {
|
||||
data := make([]byte, i)
|
||||
rand.Read(data)
|
||||
|
||||
cid, err := mgr.WriteBlock(ctx, data, "")
|
||||
if err != nil {
|
||||
t.Fatalf("unable to write %v bytes: %v", len(data), err)
|
||||
}
|
||||
dataSet[cid] = data
|
||||
}
|
||||
verifyBlockManagerDataSet(ctx, t, mgr, dataSet)
|
||||
|
||||
// delete random 3 items (map iteration order is random)
|
||||
cnt := 0
|
||||
for blockID := range dataSet {
|
||||
t.Logf("deleting %v", blockID)
|
||||
assertNoError(t, mgr.DeleteBlock(blockID))
|
||||
delete(dataSet, blockID)
|
||||
cnt++
|
||||
if cnt >= 3 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if err := mgr.Flush(ctx); err != nil {
|
||||
t.Fatalf("failed to flush: %v", err)
|
||||
}
|
||||
|
||||
// create new manager that reads and writes using new version.
|
||||
mgr = newTestBlockManager(data, keyTime, nil)
|
||||
|
||||
// make sure we can read everything
|
||||
verifyBlockManagerDataSet(ctx, t, mgr, dataSet)
|
||||
|
||||
if err := mgr.CompactIndexes(ctx, CompactOptions{
|
||||
MinSmallBlocks: 1,
|
||||
MaxSmallBlocks: 1,
|
||||
}); err != nil {
|
||||
t.Fatalf("unable to compact indexes: %v", err)
|
||||
}
|
||||
if err := mgr.Flush(ctx); err != nil {
|
||||
t.Fatalf("failed to flush: %v", err)
|
||||
}
|
||||
verifyBlockManagerDataSet(ctx, t, mgr, dataSet)
|
||||
|
||||
// now open one more manager
|
||||
mgr = newTestBlockManager(data, keyTime, nil)
|
||||
verifyBlockManagerDataSet(ctx, t, mgr, dataSet)
|
||||
}
|
||||
|
||||
func verifyBlockManagerDataSet(ctx context.Context, t *testing.T, mgr *Manager, dataSet map[string][]byte) {
|
||||
for blockID, originalPayload := range dataSet {
|
||||
v, err := mgr.GetBlock(ctx, blockID)
|
||||
if err != nil {
|
||||
t.Errorf("unable to read block %q: %v", blockID, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(v, originalPayload) {
|
||||
t.Errorf("payload for %q does not match original: %v", v, originalPayload)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func newTestBlockManager(data map[string][]byte, keyTime map[string]time.Time, timeFunc func() time.Time) *Manager {
|
||||
//st = logging.NewWrapper(st)
|
||||
if timeFunc == nil {
|
||||
timeFunc = fakeTimeNowWithAutoAdvance(fakeTime, 1*time.Second)
|
||||
}
|
||||
st := storagetesting.NewMapStorage(data, keyTime, timeFunc)
|
||||
bm, err := newManagerWithOptions(context.Background(), st, FormattingOptions{
|
||||
Hash: "HMAC-SHA256",
|
||||
Encryption: "NONE",
|
||||
HMACSecret: hmacSecret,
|
||||
MaxPackSize: maxPackSize,
|
||||
}, CachingOptions{}, timeFunc, nil)
|
||||
if err != nil {
|
||||
panic("can't create block manager: " + err.Error())
|
||||
}
|
||||
bm.checkInvariantsOnUnlock = true
|
||||
return bm
|
||||
}
|
||||
|
||||
func getIndexCount(d map[string][]byte) int {
|
||||
var cnt int
|
||||
|
||||
for k := range d {
|
||||
if strings.HasPrefix(k, newIndexBlockPrefix) {
|
||||
cnt++
|
||||
}
|
||||
}
|
||||
|
||||
return cnt
|
||||
}
|
||||
|
||||
func fakeTimeNowFrozen(t time.Time) func() time.Time {
|
||||
return fakeTimeNowWithAutoAdvance(t, 0)
|
||||
}
|
||||
|
||||
func fakeTimeNowWithAutoAdvance(t time.Time, dt time.Duration) func() time.Time {
|
||||
var mu sync.Mutex
|
||||
return func() time.Time {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
ret := t
|
||||
t = t.Add(dt)
|
||||
return ret
|
||||
}
|
||||
}
|
||||
|
||||
func verifyBlockNotFound(ctx context.Context, t *testing.T, bm *Manager, blockID string) {
|
||||
t.Helper()
|
||||
|
||||
b, err := bm.GetBlock(ctx, blockID)
|
||||
if err != storage.ErrBlockNotFound {
|
||||
t.Errorf("unexpected response from GetBlock(%q), got %v,%v, expected %v", blockID, b, err, storage.ErrBlockNotFound)
|
||||
}
|
||||
}
|
||||
|
||||
func verifyBlock(ctx context.Context, t *testing.T, bm *Manager, blockID string, b []byte) {
|
||||
t.Helper()
|
||||
|
||||
b2, err := bm.GetBlock(ctx, blockID)
|
||||
if err != nil {
|
||||
t.Errorf("unable to read block %q: %v", blockID, err)
|
||||
return
|
||||
}
|
||||
|
||||
if got, want := b2, b; !reflect.DeepEqual(got, want) {
|
||||
t.Errorf("block %q data mismatch: got %x (nil:%v), wanted %x (nil:%v)", blockID, got, got == nil, want, want == nil)
|
||||
}
|
||||
|
||||
bi, err := bm.BlockInfo(ctx, blockID)
|
||||
if err != nil {
|
||||
t.Errorf("error getting block info %q: %v", blockID, err)
|
||||
}
|
||||
|
||||
if got, want := bi.Length, uint32(len(b)); got != want {
|
||||
t.Errorf("invalid block size for %q: %v, wanted %v", blockID, got, want)
|
||||
}
|
||||
|
||||
}
|
||||
func writeBlockAndVerify(ctx context.Context, t *testing.T, bm *Manager, b []byte) string {
|
||||
t.Helper()
|
||||
|
||||
blockID, err := bm.WriteBlock(ctx, b, "")
|
||||
if err != nil {
|
||||
t.Errorf("err: %v", err)
|
||||
}
|
||||
|
||||
if got, want := blockID, string(hashValue(b)); got != want {
|
||||
t.Errorf("invalid block ID for %x, got %v, want %v", b, got, want)
|
||||
}
|
||||
|
||||
verifyBlock(ctx, t, bm, blockID, b)
|
||||
|
||||
return blockID
|
||||
}
|
||||
|
||||
func seededRandomData(seed int, length int) []byte {
|
||||
b := make([]byte, length)
|
||||
rnd := rand.New(rand.NewSource(int64(seed)))
|
||||
rnd.Read(b)
|
||||
return b
|
||||
}
|
||||
|
||||
func hashValue(b []byte) string {
|
||||
h := hmac.New(sha256.New, hmacSecret)
|
||||
h.Write(b) //nolint:errcheck
|
||||
return hex.EncodeToString(h.Sum(nil))
|
||||
}
|
||||
|
||||
func dumpBlockManagerData(t *testing.T, data map[string][]byte) {
|
||||
t.Helper()
|
||||
for k, v := range data {
|
||||
if k[0] == 'n' {
|
||||
ndx, err := openPackIndex(bytes.NewReader(v))
|
||||
if err == nil {
|
||||
t.Logf("index %v (%v bytes)", k, len(v))
|
||||
assertNoError(t, ndx.Iterate("", func(i Info) error {
|
||||
t.Logf(" %+v\n", i)
|
||||
return nil
|
||||
}))
|
||||
|
||||
}
|
||||
} else {
|
||||
t.Logf("data %v (%v bytes)\n", k, len(v))
|
||||
}
|
||||
}
|
||||
}
|
||||
147
block/builder.go
Normal file
147
block/builder.go
Normal file
@@ -0,0 +1,147 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// packIndexBuilder prepares and writes block index for writing.
|
||||
type packIndexBuilder map[string]*Info
|
||||
|
||||
// Add adds a new entry to the builder or conditionally replaces it if the timestamp is greater.
|
||||
func (b packIndexBuilder) Add(i Info) {
|
||||
old, ok := b[i.BlockID]
|
||||
if !ok || i.TimestampSeconds >= old.TimestampSeconds {
|
||||
b[i.BlockID] = &i
|
||||
}
|
||||
}
|
||||
|
||||
func (b packIndexBuilder) sortedBlocks() []*Info {
|
||||
var allBlocks []*Info
|
||||
|
||||
for _, v := range b {
|
||||
allBlocks = append(allBlocks, v)
|
||||
}
|
||||
|
||||
sort.Slice(allBlocks, func(i, j int) bool {
|
||||
return allBlocks[i].BlockID < allBlocks[j].BlockID
|
||||
})
|
||||
|
||||
return allBlocks
|
||||
}
|
||||
|
||||
type indexLayout struct {
|
||||
packFileOffsets map[string]uint32
|
||||
entryCount int
|
||||
keyLength int
|
||||
entryLength int
|
||||
extraDataOffset uint32
|
||||
}
|
||||
|
||||
// Build writes the pack index to the provided output.
|
||||
func (b packIndexBuilder) Build(output io.Writer) error {
|
||||
allBlocks := b.sortedBlocks()
|
||||
layout := &indexLayout{
|
||||
packFileOffsets: map[string]uint32{},
|
||||
keyLength: -1,
|
||||
entryLength: 20,
|
||||
entryCount: len(allBlocks),
|
||||
}
|
||||
|
||||
w := bufio.NewWriter(output)
|
||||
|
||||
// prepare extra data to be appended at the end of an index.
|
||||
extraData := prepareExtraData(allBlocks, layout)
|
||||
|
||||
// write header
|
||||
header := make([]byte, 8)
|
||||
header[0] = 1 // version
|
||||
header[1] = byte(layout.keyLength)
|
||||
binary.BigEndian.PutUint16(header[2:4], uint16(layout.entryLength))
|
||||
binary.BigEndian.PutUint32(header[4:8], uint32(layout.entryCount))
|
||||
if _, err := w.Write(header); err != nil {
|
||||
return fmt.Errorf("unable to write header: %v", err)
|
||||
}
|
||||
|
||||
// write all sorted blocks.
|
||||
entry := make([]byte, layout.entryLength)
|
||||
for _, it := range allBlocks {
|
||||
if err := writeEntry(w, it, layout, entry); err != nil {
|
||||
return fmt.Errorf("unable to write entry: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := w.Write(extraData); err != nil {
|
||||
return fmt.Errorf("error writing extra data: %v", err)
|
||||
}
|
||||
|
||||
return w.Flush()
|
||||
}
|
||||
|
||||
func prepareExtraData(allBlocks []*Info, layout *indexLayout) []byte {
|
||||
var extraData []byte
|
||||
|
||||
for i, it := range allBlocks {
|
||||
if i == 0 {
|
||||
layout.keyLength = len(contentIDToBytes(it.BlockID))
|
||||
}
|
||||
if it.PackFile != "" {
|
||||
if _, ok := layout.packFileOffsets[it.PackFile]; !ok {
|
||||
layout.packFileOffsets[it.PackFile] = uint32(len(extraData))
|
||||
extraData = append(extraData, []byte(it.PackFile)...)
|
||||
}
|
||||
}
|
||||
if len(it.Payload) > 0 {
|
||||
panic("storing payloads in indexes is not supported")
|
||||
}
|
||||
}
|
||||
layout.extraDataOffset = uint32(8 + layout.entryCount*(layout.keyLength+layout.entryLength))
|
||||
return extraData
|
||||
}
|
||||
|
||||
func writeEntry(w io.Writer, it *Info, layout *indexLayout, entry []byte) error {
|
||||
k := contentIDToBytes(it.BlockID)
|
||||
if len(k) != layout.keyLength {
|
||||
return fmt.Errorf("inconsistent key length: %v vs %v", len(k), layout.keyLength)
|
||||
}
|
||||
|
||||
if err := formatEntry(entry, it, layout); err != nil {
|
||||
return fmt.Errorf("unable to format entry: %v", err)
|
||||
}
|
||||
|
||||
if _, err := w.Write(k); err != nil {
|
||||
return fmt.Errorf("error writing entry key: %v", err)
|
||||
}
|
||||
if _, err := w.Write(entry); err != nil {
|
||||
return fmt.Errorf("error writing entry: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func formatEntry(entry []byte, it *Info, layout *indexLayout) error {
|
||||
entryTimestampAndFlags := entry[0:8]
|
||||
entryPackFileOffset := entry[8:12]
|
||||
entryPackedOffset := entry[12:16]
|
||||
entryPackedLength := entry[16:20]
|
||||
timestampAndFlags := uint64(it.TimestampSeconds) << 16
|
||||
|
||||
if len(it.PackFile) == 0 {
|
||||
return fmt.Errorf("empty pack block ID for %v", it.BlockID)
|
||||
}
|
||||
|
||||
binary.BigEndian.PutUint32(entryPackFileOffset, layout.extraDataOffset+layout.packFileOffsets[it.PackFile])
|
||||
if it.Deleted {
|
||||
binary.BigEndian.PutUint32(entryPackedOffset, it.PackOffset|0x80000000)
|
||||
} else {
|
||||
binary.BigEndian.PutUint32(entryPackedOffset, it.PackOffset)
|
||||
}
|
||||
binary.BigEndian.PutUint32(entryPackedLength, it.Length)
|
||||
timestampAndFlags |= uint64(it.FormatVersion) << 8
|
||||
timestampAndFlags |= uint64(len(it.PackFile))
|
||||
binary.BigEndian.PutUint64(entryTimestampAndFlags, timestampAndFlags)
|
||||
return nil
|
||||
}
|
||||
33
block/cache_hmac.go
Normal file
33
block/cache_hmac.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package block
|
||||
|
||||
import "crypto/hmac"
|
||||
import "crypto/sha256"
|
||||
import "errors"
|
||||
|
||||
func appendHMAC(data []byte, secret []byte) []byte {
|
||||
h := hmac.New(sha256.New, secret)
|
||||
h.Write(data) // nolint:errcheck
|
||||
return h.Sum(data)
|
||||
}
|
||||
|
||||
func verifyAndStripHMAC(b []byte, secret []byte) ([]byte, error) {
|
||||
if len(b) < sha256.Size {
|
||||
return nil, errors.New("invalid data - too short")
|
||||
}
|
||||
|
||||
p := len(b) - sha256.Size
|
||||
data := b[0:p]
|
||||
signature := b[p:]
|
||||
|
||||
h := hmac.New(sha256.New, secret)
|
||||
h.Write(data) // nolint:errcheck
|
||||
validSignature := h.Sum(nil)
|
||||
if len(signature) != len(validSignature) {
|
||||
return nil, errors.New("invalid signature length")
|
||||
}
|
||||
if hmac.Equal(validSignature, signature) {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("invalid data - corrupted")
|
||||
}
|
||||
10
block/caching_options.go
Normal file
10
block/caching_options.go
Normal file
@@ -0,0 +1,10 @@
|
||||
package block
|
||||
|
||||
// CachingOptions specifies configuration of local cache.
|
||||
type CachingOptions struct {
|
||||
CacheDirectory string `json:"cacheDirectory,omitempty"`
|
||||
MaxCacheSizeBytes int64 `json:"maxCacheSize,omitempty"`
|
||||
MaxListCacheDurationSec int `json:"maxListCacheDuration,omitempty"`
|
||||
IgnoreListCache bool `json:"-"`
|
||||
HMACSecret []byte `json:"-"`
|
||||
}
|
||||
138
block/committed_block_index.go
Normal file
138
block/committed_block_index.go
Normal file
@@ -0,0 +1,138 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
type committedBlockIndex struct {
|
||||
cache committedBlockIndexCache
|
||||
|
||||
mu sync.Mutex
|
||||
inUse map[string]packIndex
|
||||
merged mergedIndex
|
||||
}
|
||||
|
||||
type committedBlockIndexCache interface {
|
||||
hasIndexBlockID(indexBlockID string) (bool, error)
|
||||
addBlockToCache(indexBlockID string, data []byte) error
|
||||
openIndex(indexBlockID string) (packIndex, error)
|
||||
expireUnused(used []string) error
|
||||
}
|
||||
|
||||
func (b *committedBlockIndex) getBlock(blockID string) (Info, error) {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
|
||||
info, err := b.merged.GetInfo(blockID)
|
||||
if info != nil {
|
||||
return *info, nil
|
||||
}
|
||||
if err == nil {
|
||||
return Info{}, storage.ErrBlockNotFound
|
||||
}
|
||||
return Info{}, err
|
||||
}
|
||||
|
||||
func (b *committedBlockIndex) addBlock(indexBlockID string, data []byte, use bool) error {
|
||||
if err := b.cache.addBlockToCache(indexBlockID, data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !use {
|
||||
return nil
|
||||
}
|
||||
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
|
||||
if b.inUse[indexBlockID] != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
ndx, err := b.cache.openIndex(indexBlockID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open pack index %q: %v", indexBlockID, err)
|
||||
}
|
||||
b.inUse[indexBlockID] = ndx
|
||||
b.merged = append(b.merged, ndx)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *committedBlockIndex) listBlocks(prefix string, cb func(i Info) error) error {
|
||||
b.mu.Lock()
|
||||
m := append(mergedIndex(nil), b.merged...)
|
||||
b.mu.Unlock()
|
||||
|
||||
return m.Iterate(prefix, cb)
|
||||
}
|
||||
|
||||
func (b *committedBlockIndex) packFilesChanged(packFiles []string) bool {
|
||||
if len(packFiles) != len(b.inUse) {
|
||||
return true
|
||||
}
|
||||
|
||||
for _, packFile := range packFiles {
|
||||
if b.inUse[packFile] == nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (b *committedBlockIndex) use(packFiles []string) (bool, error) {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
|
||||
if !b.packFilesChanged(packFiles) {
|
||||
return false, nil
|
||||
}
|
||||
log.Debugf("set of index files has changed (had %v, now %v)", len(b.inUse), len(packFiles))
|
||||
|
||||
var newMerged mergedIndex
|
||||
newInUse := map[string]packIndex{}
|
||||
defer func() {
|
||||
newMerged.Close() //nolint:errcheck
|
||||
}()
|
||||
|
||||
for _, e := range packFiles {
|
||||
ndx, err := b.cache.openIndex(e)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("unable to open pack index %q: %v", e, err)
|
||||
}
|
||||
|
||||
newMerged = append(newMerged, ndx)
|
||||
newInUse[e] = ndx
|
||||
}
|
||||
b.merged = newMerged
|
||||
b.inUse = newInUse
|
||||
|
||||
if err := b.cache.expireUnused(packFiles); err != nil {
|
||||
log.Warningf("unable to expire unused block index files: %v", err)
|
||||
}
|
||||
newMerged = nil
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func newCommittedBlockIndex(caching CachingOptions) (*committedBlockIndex, error) {
|
||||
var cache committedBlockIndexCache
|
||||
|
||||
if caching.CacheDirectory != "" {
|
||||
dirname := filepath.Join(caching.CacheDirectory, "indexes")
|
||||
cache = &diskCommittedBlockIndexCache{dirname}
|
||||
} else {
|
||||
cache = &memoryCommittedBlockIndexCache{
|
||||
blocks: map[string]packIndex{},
|
||||
}
|
||||
}
|
||||
|
||||
return &committedBlockIndex{
|
||||
cache: cache,
|
||||
inUse: map[string]packIndex{},
|
||||
}, nil
|
||||
}
|
||||
134
block/committed_block_index_disk_cache.go
Normal file
134
block/committed_block_index_disk_cache.go
Normal file
@@ -0,0 +1,134 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/exp/mmap"
|
||||
)
|
||||
|
||||
const (
|
||||
simpleIndexSuffix = ".sndx"
|
||||
unusedCommittedBlockIndexCleanupTime = 1 * time.Hour // delete unused committed index blocks after 1 hour
|
||||
)
|
||||
|
||||
type diskCommittedBlockIndexCache struct {
|
||||
dirname string
|
||||
}
|
||||
|
||||
func (c *diskCommittedBlockIndexCache) indexBlockPath(indexBlockID string) string {
|
||||
return filepath.Join(c.dirname, indexBlockID+simpleIndexSuffix)
|
||||
}
|
||||
|
||||
func (c *diskCommittedBlockIndexCache) openIndex(indexBlockID string) (packIndex, error) {
|
||||
fullpath := c.indexBlockPath(indexBlockID)
|
||||
|
||||
f, err := mmap.Open(fullpath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return openPackIndex(f)
|
||||
}
|
||||
|
||||
func (c *diskCommittedBlockIndexCache) hasIndexBlockID(indexBlockID string) (bool, error) {
|
||||
_, err := os.Stat(c.indexBlockPath(indexBlockID))
|
||||
if err == nil {
|
||||
return true, nil
|
||||
}
|
||||
if os.IsNotExist(err) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return false, err
|
||||
}
|
||||
|
||||
func (c *diskCommittedBlockIndexCache) addBlockToCache(indexBlockID string, data []byte) error {
|
||||
exists, err := c.hasIndexBlockID(indexBlockID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if exists {
|
||||
return nil
|
||||
}
|
||||
|
||||
tmpFile, err := writeTempFileAtomic(c.dirname, data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// rename() is atomic, so one process will succeed, but the other will fail
|
||||
if err := os.Rename(tmpFile, c.indexBlockPath(indexBlockID)); err != nil {
|
||||
// verify that the block exists
|
||||
exists, err := c.hasIndexBlockID(indexBlockID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !exists {
|
||||
return fmt.Errorf("unsuccessful index write of block %q", indexBlockID)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeTempFileAtomic(dirname string, data []byte) (string, error) {
|
||||
// write to a temp file to avoid race where two processes are writing at the same time.
|
||||
tf, err := ioutil.TempFile(dirname, "tmp")
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
os.MkdirAll(dirname, 0700) //nolint:errcheck
|
||||
tf, err = ioutil.TempFile(dirname, "tmp")
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("can't create tmp file: %v", err)
|
||||
}
|
||||
|
||||
if _, err := tf.Write(data); err != nil {
|
||||
return "", fmt.Errorf("can't write to temp file: %v", err)
|
||||
}
|
||||
if err := tf.Close(); err != nil {
|
||||
return "", fmt.Errorf("can't close tmp file")
|
||||
}
|
||||
|
||||
return tf.Name(), nil
|
||||
}
|
||||
|
||||
func (c *diskCommittedBlockIndexCache) expireUnused(used []string) error {
|
||||
entries, err := ioutil.ReadDir(c.dirname)
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't list cache: %v", err)
|
||||
}
|
||||
|
||||
remaining := map[string]os.FileInfo{}
|
||||
|
||||
for _, ent := range entries {
|
||||
if strings.HasSuffix(ent.Name(), simpleIndexSuffix) {
|
||||
n := strings.TrimSuffix(ent.Name(), simpleIndexSuffix)
|
||||
remaining[n] = ent
|
||||
}
|
||||
}
|
||||
|
||||
for _, u := range used {
|
||||
delete(remaining, u)
|
||||
}
|
||||
|
||||
for _, rem := range remaining {
|
||||
if time.Since(rem.ModTime()) > unusedCommittedBlockIndexCleanupTime {
|
||||
log.Debugf("removing unused %v %v", rem.Name(), rem.ModTime())
|
||||
if err := os.Remove(filepath.Join(c.dirname, rem.Name())); err != nil {
|
||||
log.Warningf("unable to remove unused index file: %v", err)
|
||||
}
|
||||
} else {
|
||||
log.Debugf("keeping unused %v because it's too new %v", rem.Name(), rem.ModTime())
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
48
block/committed_block_index_mem_cache.go
Normal file
48
block/committed_block_index_mem_cache.go
Normal file
@@ -0,0 +1,48 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type memoryCommittedBlockIndexCache struct {
|
||||
mu sync.Mutex
|
||||
blocks map[string]packIndex
|
||||
}
|
||||
|
||||
func (m *memoryCommittedBlockIndexCache) hasIndexBlockID(indexBlockID string) (bool, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
return m.blocks[indexBlockID] != nil, nil
|
||||
}
|
||||
|
||||
func (m *memoryCommittedBlockIndexCache) addBlockToCache(indexBlockID string, data []byte) error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
ndx, err := openPackIndex(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.blocks[indexBlockID] = ndx
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *memoryCommittedBlockIndexCache) openIndex(indexBlockID string) (packIndex, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
v := m.blocks[indexBlockID]
|
||||
if v == nil {
|
||||
return nil, fmt.Errorf("block not found in cache: %v", indexBlockID)
|
||||
}
|
||||
|
||||
return v, nil
|
||||
}
|
||||
|
||||
func (m *memoryCommittedBlockIndexCache) expireUnused(used []string) error {
|
||||
return nil
|
||||
}
|
||||
38
block/content_id_to_bytes.go
Normal file
38
block/content_id_to_bytes.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
)
|
||||
|
||||
func bytesToContentID(b []byte) string {
|
||||
if len(b) == 0 {
|
||||
return ""
|
||||
}
|
||||
if b[0] == 0xff {
|
||||
return string(b[1:])
|
||||
}
|
||||
prefix := ""
|
||||
if b[0] != 0 {
|
||||
prefix = string(b[0:1])
|
||||
}
|
||||
|
||||
return prefix + hex.EncodeToString(b[1:])
|
||||
}
|
||||
|
||||
func contentIDToBytes(c string) []byte {
|
||||
var prefix []byte
|
||||
var skip int
|
||||
if len(c)%2 == 1 {
|
||||
prefix = []byte(c[0:1])
|
||||
skip = 1
|
||||
} else {
|
||||
prefix = []byte{0}
|
||||
}
|
||||
|
||||
b, err := hex.DecodeString(c[skip:])
|
||||
if err != nil {
|
||||
return append([]byte{0xff}, []byte(c)...)
|
||||
}
|
||||
|
||||
return append(prefix, b...)
|
||||
}
|
||||
34
block/context.go
Normal file
34
block/context.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package block
|
||||
|
||||
import "context"
|
||||
|
||||
type contextKey string
|
||||
|
||||
var useBlockCacheContextKey contextKey = "use-block-cache"
|
||||
var useListCacheContextKey contextKey = "use-list-cache"
|
||||
|
||||
// UsingBlockCache returns a derived context that causes block manager to use cache.
|
||||
func UsingBlockCache(ctx context.Context, enabled bool) context.Context {
|
||||
return context.WithValue(ctx, useBlockCacheContextKey, enabled)
|
||||
}
|
||||
|
||||
// UsingListCache returns a derived context that causes block manager to use cache.
|
||||
func UsingListCache(ctx context.Context, enabled bool) context.Context {
|
||||
return context.WithValue(ctx, useListCacheContextKey, enabled)
|
||||
}
|
||||
|
||||
func shouldUseBlockCache(ctx context.Context) bool {
|
||||
if enabled, ok := ctx.Value(useBlockCacheContextKey).(bool); ok {
|
||||
return enabled
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func shouldUseListCache(ctx context.Context) bool {
|
||||
if enabled, ok := ctx.Value(useListCacheContextKey).(bool); ok {
|
||||
return enabled
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
74
block/format.go
Normal file
74
block/format.go
Normal file
@@ -0,0 +1,74 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Format describes a format of a single pack index. The actual structure is not used,
|
||||
// it's purely for documentation purposes.
|
||||
// The struct is byte-aligned.
|
||||
type Format struct {
|
||||
Version byte // format version number must be 0x01
|
||||
KeySize byte // size of each key in bytes
|
||||
EntrySize uint16 // size of each entry in bytes, big-endian
|
||||
EntryCount uint32 // number of sorted (key,value) entries that follow
|
||||
|
||||
Entries []struct {
|
||||
Key []byte // key bytes (KeySize)
|
||||
Entry entry
|
||||
}
|
||||
|
||||
ExtraData []byte // extra data
|
||||
}
|
||||
|
||||
type entry struct {
|
||||
// big endian:
|
||||
// 48 most significant bits - 48-bit timestamp in seconds since 1970/01/01 UTC
|
||||
// 8 bits - format version (currently == 1)
|
||||
// 8 least significant bits - length of pack block ID
|
||||
timestampAndFlags uint64 //
|
||||
packFileOffset uint32 // 4 bytes, big endian, offset within index file where pack block ID begins
|
||||
packedOffset uint32 // 4 bytes, big endian, offset within pack file where the contents begin
|
||||
packedLength uint32 // 4 bytes, big endian, content length
|
||||
}
|
||||
|
||||
func (e *entry) parse(b []byte) error {
|
||||
if len(b) < 20 {
|
||||
return fmt.Errorf("invalid entry length: %v", len(b))
|
||||
}
|
||||
|
||||
e.timestampAndFlags = binary.BigEndian.Uint64(b[0:8])
|
||||
e.packFileOffset = binary.BigEndian.Uint32(b[8:12])
|
||||
e.packedOffset = binary.BigEndian.Uint32(b[12:16])
|
||||
e.packedLength = binary.BigEndian.Uint32(b[16:20])
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *entry) IsDeleted() bool {
|
||||
return e.packedOffset&0x80000000 != 0
|
||||
}
|
||||
|
||||
func (e *entry) TimestampSeconds() int64 {
|
||||
return int64(e.timestampAndFlags >> 16)
|
||||
}
|
||||
|
||||
func (e *entry) PackedFormatVersion() byte {
|
||||
return byte(e.timestampAndFlags >> 8)
|
||||
}
|
||||
|
||||
func (e *entry) PackFileLength() byte {
|
||||
return byte(e.timestampAndFlags)
|
||||
}
|
||||
|
||||
func (e *entry) PackFileOffset() uint32 {
|
||||
return e.packFileOffset
|
||||
}
|
||||
|
||||
func (e *entry) PackedOffset() uint32 {
|
||||
return e.packedOffset & 0x7fffffff
|
||||
}
|
||||
|
||||
func (e *entry) PackedLength() uint32 {
|
||||
return e.packedLength
|
||||
}
|
||||
198
block/index.go
Normal file
198
block/index.go
Normal file
@@ -0,0 +1,198 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// packIndex is a read-only index of packed blocks.
|
||||
type packIndex interface {
|
||||
io.Closer
|
||||
|
||||
GetInfo(blockID string) (*Info, error)
|
||||
Iterate(prefix string, cb func(Info) error) error
|
||||
}
|
||||
|
||||
type index struct {
|
||||
hdr headerInfo
|
||||
readerAt io.ReaderAt
|
||||
}
|
||||
|
||||
type headerInfo struct {
|
||||
keySize int
|
||||
valueSize int
|
||||
entryCount int
|
||||
}
|
||||
|
||||
func readHeader(readerAt io.ReaderAt) (headerInfo, error) {
|
||||
var header [8]byte
|
||||
|
||||
if n, err := readerAt.ReadAt(header[:], 0); err != nil || n != 8 {
|
||||
return headerInfo{}, errors.Wrap(err, "invalid header")
|
||||
}
|
||||
|
||||
if header[0] != 1 {
|
||||
return headerInfo{}, fmt.Errorf("invalid header format: %v", header[0])
|
||||
}
|
||||
|
||||
hi := headerInfo{
|
||||
keySize: int(header[1]),
|
||||
valueSize: int(binary.BigEndian.Uint16(header[2:4])),
|
||||
entryCount: int(binary.BigEndian.Uint32(header[4:8])),
|
||||
}
|
||||
|
||||
if hi.keySize <= 1 || hi.valueSize < 0 || hi.entryCount < 0 {
|
||||
return headerInfo{}, fmt.Errorf("invalid header")
|
||||
}
|
||||
|
||||
return hi, nil
|
||||
}
|
||||
|
||||
// Iterate invokes the provided callback function for all blocks in the index, sorted alphabetically.
|
||||
// The iteration ends when the callback returns an error, which is propagated to the caller or when
|
||||
// all blocks have been visited.
|
||||
func (b *index) Iterate(prefix string, cb func(Info) error) error {
|
||||
startPos, err := b.findEntryPosition(prefix)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "could not find starting position")
|
||||
}
|
||||
stride := b.hdr.keySize + b.hdr.valueSize
|
||||
entry := make([]byte, stride)
|
||||
for i := startPos; i < b.hdr.entryCount; i++ {
|
||||
n, err := b.readerAt.ReadAt(entry, int64(8+stride*i))
|
||||
if err != nil || n != len(entry) {
|
||||
return errors.Wrap(err, "unable to read from index")
|
||||
}
|
||||
|
||||
key := entry[0:b.hdr.keySize]
|
||||
value := entry[b.hdr.keySize:]
|
||||
|
||||
i, err := b.entryToInfo(bytesToContentID(key), value)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "invalid index data")
|
||||
}
|
||||
if !strings.HasPrefix(i.BlockID, prefix) {
|
||||
break
|
||||
}
|
||||
if err := cb(i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *index) findEntryPosition(blockID string) (int, error) {
|
||||
stride := b.hdr.keySize + b.hdr.valueSize
|
||||
entryBuf := make([]byte, stride)
|
||||
var readErr error
|
||||
pos := sort.Search(b.hdr.entryCount, func(p int) bool {
|
||||
if readErr != nil {
|
||||
return false
|
||||
}
|
||||
_, err := b.readerAt.ReadAt(entryBuf, int64(8+stride*p))
|
||||
if err != nil {
|
||||
readErr = err
|
||||
return false
|
||||
}
|
||||
|
||||
return bytesToContentID(entryBuf[0:b.hdr.keySize]) >= blockID
|
||||
})
|
||||
|
||||
return pos, readErr
|
||||
}
|
||||
|
||||
func (b *index) findEntry(blockID string) ([]byte, error) {
|
||||
key := contentIDToBytes(blockID)
|
||||
if len(key) != b.hdr.keySize {
|
||||
return nil, fmt.Errorf("invalid block ID: %q", blockID)
|
||||
}
|
||||
stride := b.hdr.keySize + b.hdr.valueSize
|
||||
|
||||
position, err := b.findEntryPosition(blockID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if position >= b.hdr.entryCount {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
entryBuf := make([]byte, stride)
|
||||
if _, err := b.readerAt.ReadAt(entryBuf, int64(8+stride*position)); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if bytes.Equal(entryBuf[0:len(key)], key) {
|
||||
return entryBuf[len(key):], nil
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// GetInfo returns information about a given block. If a block is not found, nil is returned.
|
||||
func (b *index) GetInfo(blockID string) (*Info, error) {
|
||||
e, err := b.findEntry(blockID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if e == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
i, err := b.entryToInfo(blockID, e)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &i, err
|
||||
}
|
||||
|
||||
func (b *index) entryToInfo(blockID string, entryData []byte) (Info, error) {
|
||||
if len(entryData) < 20 {
|
||||
return Info{}, fmt.Errorf("invalid entry length: %v", len(entryData))
|
||||
}
|
||||
|
||||
var e entry
|
||||
if err := e.parse(entryData); err != nil {
|
||||
return Info{}, err
|
||||
}
|
||||
|
||||
packFile := make([]byte, e.PackFileLength())
|
||||
n, err := b.readerAt.ReadAt(packFile, int64(e.PackFileOffset()))
|
||||
if err != nil || n != int(e.PackFileLength()) {
|
||||
return Info{}, errors.Wrap(err, "can't read pack block ID")
|
||||
}
|
||||
|
||||
return Info{
|
||||
BlockID: blockID,
|
||||
Deleted: e.IsDeleted(),
|
||||
TimestampSeconds: e.TimestampSeconds(),
|
||||
FormatVersion: e.PackedFormatVersion(),
|
||||
PackOffset: e.PackedOffset(),
|
||||
Length: e.PackedLength(),
|
||||
PackFile: string(packFile),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Close closes the index and the underlying reader.
|
||||
func (b *index) Close() error {
|
||||
if closer, ok := b.readerAt.(io.Closer); ok {
|
||||
return closer.Close()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// openPackIndex reads an Index from a given reader. The caller must call Close() when the index is no longer used.
|
||||
func openPackIndex(readerAt io.ReaderAt) (packIndex, error) {
|
||||
h, err := readHeader(readerAt)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "invalid header")
|
||||
}
|
||||
return &index{hdr: h, readerAt: readerAt}, nil
|
||||
}
|
||||
22
block/info.go
Normal file
22
block/info.go
Normal file
@@ -0,0 +1,22 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// Info is an information about a single block managed by Manager.
|
||||
type Info struct {
|
||||
BlockID string `json:"blockID"`
|
||||
Length uint32 `json:"length"`
|
||||
TimestampSeconds int64 `json:"time"`
|
||||
PackFile string `json:"packFile,omitempty"`
|
||||
PackOffset uint32 `json:"packOffset,omitempty"`
|
||||
Deleted bool `json:"deleted"`
|
||||
Payload []byte `json:"payload"` // set for payloads stored inline
|
||||
FormatVersion byte `json:"formatVersion"`
|
||||
}
|
||||
|
||||
// Timestamp returns the time when a block was created or deleted.
|
||||
func (i Info) Timestamp() time.Time {
|
||||
return time.Unix(i.TimestampSeconds, 0)
|
||||
}
|
||||
123
block/list_cache.go
Normal file
123
block/list_cache.go
Normal file
@@ -0,0 +1,123 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
type listCache struct {
|
||||
st storage.Storage
|
||||
cacheFile string
|
||||
listCacheDuration time.Duration
|
||||
hmacSecret []byte
|
||||
}
|
||||
|
||||
func (c *listCache) listIndexBlocks(ctx context.Context) ([]IndexInfo, error) {
|
||||
if c.cacheFile != "" {
|
||||
ci, err := c.readBlocksFromCache(ctx)
|
||||
if err == nil {
|
||||
expirationTime := ci.Timestamp.Add(c.listCacheDuration)
|
||||
if time.Now().Before(expirationTime) {
|
||||
log.Debugf("retrieved list of index blocks from cache")
|
||||
return ci.Blocks, nil
|
||||
}
|
||||
} else if err != storage.ErrBlockNotFound {
|
||||
log.Warningf("unable to open cache file: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
blocks, err := listIndexBlocksFromStorage(ctx, c.st)
|
||||
if err == nil {
|
||||
c.saveListToCache(ctx, &cachedList{
|
||||
Blocks: blocks,
|
||||
Timestamp: time.Now(),
|
||||
})
|
||||
}
|
||||
log.Debugf("found %v index blocks from source", len(blocks))
|
||||
|
||||
return blocks, err
|
||||
}
|
||||
|
||||
func (c *listCache) saveListToCache(ctx context.Context, ci *cachedList) {
|
||||
if c.cacheFile == "" {
|
||||
return
|
||||
}
|
||||
log.Debugf("saving index blocks to cache: %v", len(ci.Blocks))
|
||||
if data, err := json.Marshal(ci); err == nil {
|
||||
mySuffix := fmt.Sprintf(".tmp-%v-%v", os.Getpid(), time.Now().UnixNano())
|
||||
if err := ioutil.WriteFile(c.cacheFile+mySuffix, appendHMAC(data, c.hmacSecret), 0600); err != nil {
|
||||
log.Warningf("unable to write list cache: %v", err)
|
||||
}
|
||||
os.Rename(c.cacheFile+mySuffix, c.cacheFile) //nolint:errcheck
|
||||
os.Remove(c.cacheFile + mySuffix) //nolint:errcheck
|
||||
}
|
||||
}
|
||||
|
||||
func (c *listCache) deleteListCache(ctx context.Context) {
|
||||
if c.cacheFile != "" {
|
||||
os.Remove(c.cacheFile) //nolint:errcheck
|
||||
}
|
||||
}
|
||||
|
||||
func (c *listCache) readBlocksFromCache(ctx context.Context) (*cachedList, error) {
|
||||
if !shouldUseListCache(ctx) {
|
||||
return nil, storage.ErrBlockNotFound
|
||||
}
|
||||
|
||||
ci := &cachedList{}
|
||||
|
||||
data, err := ioutil.ReadFile(c.cacheFile)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, storage.ErrBlockNotFound
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err = verifyAndStripHMAC(data, c.hmacSecret)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid file %v: %v", c.cacheFile, err)
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(data, &ci); err != nil {
|
||||
return nil, fmt.Errorf("can't unmarshal cached list results: %v", err)
|
||||
}
|
||||
|
||||
return ci, nil
|
||||
|
||||
}
|
||||
|
||||
func newListCache(ctx context.Context, st storage.Storage, caching CachingOptions) (*listCache, error) {
|
||||
var listCacheFile string
|
||||
|
||||
if caching.CacheDirectory != "" {
|
||||
listCacheFile = filepath.Join(caching.CacheDirectory, "list")
|
||||
|
||||
if _, err := os.Stat(caching.CacheDirectory); os.IsNotExist(err) {
|
||||
if err := os.MkdirAll(caching.CacheDirectory, 0700); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
c := &listCache{
|
||||
st: st,
|
||||
cacheFile: listCacheFile,
|
||||
hmacSecret: caching.HMACSecret,
|
||||
listCacheDuration: time.Duration(caching.MaxListCacheDurationSec) * time.Second,
|
||||
}
|
||||
|
||||
if caching.IgnoreListCache {
|
||||
c.deleteListCache(ctx)
|
||||
}
|
||||
|
||||
return c, nil
|
||||
}
|
||||
132
block/merged.go
Normal file
132
block/merged.go
Normal file
@@ -0,0 +1,132 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"errors"
|
||||
)
|
||||
|
||||
// mergedIndex is an implementation of Index that transparently merges returns from underlying Indexes.
|
||||
type mergedIndex []packIndex
|
||||
|
||||
// Close closes all underlying indexes.
|
||||
func (m mergedIndex) Close() error {
|
||||
for _, ndx := range m {
|
||||
if err := ndx.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetInfo returns information about a single block. If a block is not found, returns (nil,nil)
|
||||
func (m mergedIndex) GetInfo(contentID string) (*Info, error) {
|
||||
var best *Info
|
||||
for _, ndx := range m {
|
||||
i, err := ndx.GetInfo(contentID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if i != nil {
|
||||
if best == nil || i.TimestampSeconds > best.TimestampSeconds || (i.TimestampSeconds == best.TimestampSeconds && !i.Deleted) {
|
||||
best = i
|
||||
}
|
||||
}
|
||||
}
|
||||
return best, nil
|
||||
}
|
||||
|
||||
type nextInfo struct {
|
||||
it Info
|
||||
ch <-chan Info
|
||||
}
|
||||
|
||||
type nextInfoHeap []*nextInfo
|
||||
|
||||
func (h nextInfoHeap) Len() int { return len(h) }
|
||||
func (h nextInfoHeap) Less(i, j int) bool {
|
||||
if a, b := h[i].it.BlockID, h[j].it.BlockID; a != b {
|
||||
return a < b
|
||||
}
|
||||
|
||||
if a, b := h[i].it.TimestampSeconds, h[j].it.TimestampSeconds; a != b {
|
||||
return a < b
|
||||
}
|
||||
|
||||
return !h[i].it.Deleted
|
||||
}
|
||||
|
||||
func (h nextInfoHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
||||
func (h *nextInfoHeap) Push(x interface{}) {
|
||||
*h = append(*h, x.(*nextInfo))
|
||||
}
|
||||
func (h *nextInfoHeap) Pop() interface{} {
|
||||
old := *h
|
||||
n := len(old)
|
||||
x := old[n-1]
|
||||
*h = old[0 : n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
func iterateChan(prefix string, ndx packIndex, done chan bool) <-chan Info {
|
||||
ch := make(chan Info)
|
||||
go func() {
|
||||
defer close(ch)
|
||||
|
||||
_ = ndx.Iterate(prefix, func(i Info) error {
|
||||
select {
|
||||
case <-done:
|
||||
return errors.New("end of iteration")
|
||||
case ch <- i:
|
||||
return nil
|
||||
}
|
||||
})
|
||||
}()
|
||||
return ch
|
||||
}
|
||||
|
||||
// Iterate invokes the provided callback for all unique block IDs in the underlying sources until either
|
||||
// all blocks have been visited or until an error is returned by the callback.
|
||||
func (m mergedIndex) Iterate(prefix string, cb func(i Info) error) error {
|
||||
var minHeap nextInfoHeap
|
||||
done := make(chan bool)
|
||||
defer close(done)
|
||||
|
||||
for _, ndx := range m {
|
||||
ch := iterateChan(prefix, ndx, done)
|
||||
it, ok := <-ch
|
||||
if ok {
|
||||
heap.Push(&minHeap, &nextInfo{it, ch})
|
||||
}
|
||||
}
|
||||
|
||||
var pendingItem Info
|
||||
|
||||
for len(minHeap) > 0 {
|
||||
min := heap.Pop(&minHeap).(*nextInfo)
|
||||
if pendingItem.BlockID != min.it.BlockID {
|
||||
if pendingItem.BlockID != "" {
|
||||
if err := cb(pendingItem); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
pendingItem = min.it
|
||||
} else if min.it.TimestampSeconds > pendingItem.TimestampSeconds {
|
||||
pendingItem = min.it
|
||||
}
|
||||
|
||||
it, ok := <-min.ch
|
||||
if ok {
|
||||
heap.Push(&minHeap, &nextInfo{it, min.ch})
|
||||
}
|
||||
}
|
||||
|
||||
if pendingItem.BlockID != "" {
|
||||
return cb(pendingItem)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
var _ packIndex = (*mergedIndex)(nil)
|
||||
93
block/merged_test.go
Normal file
93
block/merged_test.go
Normal file
@@ -0,0 +1,93 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func TestMerged(t *testing.T) {
|
||||
i1, err := indexWithItems(
|
||||
Info{BlockID: "aabbcc", TimestampSeconds: 1, PackFile: "xx", PackOffset: 11},
|
||||
Info{BlockID: "ddeeff", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111},
|
||||
Info{BlockID: "z010203", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111},
|
||||
Info{BlockID: "de1e1e", TimestampSeconds: 4, PackFile: "xx", PackOffset: 111},
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("can't create index: %v", err)
|
||||
}
|
||||
i2, err := indexWithItems(
|
||||
Info{BlockID: "aabbcc", TimestampSeconds: 3, PackFile: "yy", PackOffset: 33},
|
||||
Info{BlockID: "xaabbcc", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111},
|
||||
Info{BlockID: "de1e1e", TimestampSeconds: 4, PackFile: "xx", PackOffset: 222, Deleted: true},
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("can't create index: %v", err)
|
||||
}
|
||||
i3, err := indexWithItems(
|
||||
Info{BlockID: "aabbcc", TimestampSeconds: 2, PackFile: "zz", PackOffset: 22},
|
||||
Info{BlockID: "ddeeff", TimestampSeconds: 1, PackFile: "zz", PackOffset: 222},
|
||||
Info{BlockID: "k010203", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111},
|
||||
Info{BlockID: "k020304", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111},
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("can't create index: %v", err)
|
||||
}
|
||||
|
||||
m := mergedIndex{i1, i2, i3}
|
||||
i, err := m.GetInfo("aabbcc")
|
||||
if err != nil || i == nil {
|
||||
t.Fatalf("unable to get info: %v", err)
|
||||
}
|
||||
if got, want := i.PackOffset, uint32(33); got != want {
|
||||
t.Errorf("invalid pack offset %v, wanted %v", got, want)
|
||||
}
|
||||
|
||||
var inOrder []string
|
||||
assertNoError(t, m.Iterate("", func(i Info) error {
|
||||
inOrder = append(inOrder, i.BlockID)
|
||||
if i.BlockID == "de1e1e" {
|
||||
if i.Deleted {
|
||||
t.Errorf("iteration preferred deleted block over non-deleted")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}))
|
||||
|
||||
if i, err := m.GetInfo("de1e1e"); err != nil {
|
||||
t.Errorf("error getting deleted block info: %v", err)
|
||||
} else if i.Deleted {
|
||||
t.Errorf("GetInfo preferred deleted block over non-deleted")
|
||||
}
|
||||
|
||||
expectedInOrder := []string{
|
||||
"aabbcc",
|
||||
"ddeeff",
|
||||
"de1e1e",
|
||||
"k010203",
|
||||
"k020304",
|
||||
"xaabbcc",
|
||||
"z010203",
|
||||
}
|
||||
if !reflect.DeepEqual(inOrder, expectedInOrder) {
|
||||
t.Errorf("unexpected items in order: %v, wanted %v", inOrder, expectedInOrder)
|
||||
}
|
||||
|
||||
if err := m.Close(); err != nil {
|
||||
t.Errorf("unexpected error in Close(): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func indexWithItems(items ...Info) (packIndex, error) {
|
||||
b := make(packIndexBuilder)
|
||||
for _, it := range items {
|
||||
b.Add(it)
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
if err := b.Build(&buf); err != nil {
|
||||
return nil, errors.Wrap(err, "build error")
|
||||
}
|
||||
return openPackIndex(bytes.NewReader(buf.Bytes()))
|
||||
}
|
||||
26
block/packindex_internal_test.go
Normal file
26
block/packindex_internal_test.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package block
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestRoundTrip(t *testing.T) {
|
||||
cases := []string{
|
||||
"",
|
||||
"x",
|
||||
"aa",
|
||||
"xaa",
|
||||
"xaaa",
|
||||
"a1x",
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
b := contentIDToBytes(tc)
|
||||
got := bytesToContentID(b)
|
||||
if got != tc {
|
||||
t.Errorf("%q did not round trip, got %q, wanted %q", tc, got, tc)
|
||||
}
|
||||
}
|
||||
|
||||
if got, want := bytesToContentID(nil), ""; got != want {
|
||||
t.Errorf("unexpected content id %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
235
block/packindex_test.go
Normal file
235
block/packindex_test.go
Normal file
@@ -0,0 +1,235 @@
|
||||
package block
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha1"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPackIndex(t *testing.T) {
|
||||
blockNumber := 0
|
||||
|
||||
deterministicBlockID := func(prefix string, id int) string {
|
||||
h := sha1.New()
|
||||
fmt.Fprintf(h, "%v%v", prefix, id)
|
||||
blockNumber++
|
||||
|
||||
prefix2 := ""
|
||||
if id%2 == 0 {
|
||||
prefix2 = "x"
|
||||
}
|
||||
if id%7 == 0 {
|
||||
prefix2 = "y"
|
||||
}
|
||||
if id%5 == 0 {
|
||||
prefix2 = "m"
|
||||
}
|
||||
return string(fmt.Sprintf("%v%x", prefix2, h.Sum(nil)))
|
||||
}
|
||||
deterministicPackFile := func(id int) string {
|
||||
h := sha1.New()
|
||||
fmt.Fprintf(h, "%v", id)
|
||||
blockNumber++
|
||||
return string(fmt.Sprintf("%x", h.Sum(nil)))
|
||||
}
|
||||
|
||||
deterministicPackedOffset := func(id int) uint32 {
|
||||
s := rand.NewSource(int64(id + 1))
|
||||
rnd := rand.New(s)
|
||||
return uint32(rnd.Int31())
|
||||
}
|
||||
deterministicPackedLength := func(id int) uint32 {
|
||||
s := rand.NewSource(int64(id + 2))
|
||||
rnd := rand.New(s)
|
||||
return uint32(rnd.Int31())
|
||||
}
|
||||
deterministicFormatVersion := func(id int) byte {
|
||||
return byte(id % 100)
|
||||
}
|
||||
|
||||
randomUnixTime := func() int64 {
|
||||
return int64(rand.Int31())
|
||||
}
|
||||
|
||||
var infos []Info
|
||||
|
||||
// deleted blocks with all information
|
||||
for i := 0; i < 100; i++ {
|
||||
infos = append(infos, Info{
|
||||
TimestampSeconds: randomUnixTime(),
|
||||
Deleted: true,
|
||||
BlockID: deterministicBlockID("deleted-packed", i),
|
||||
PackFile: deterministicPackFile(i),
|
||||
PackOffset: deterministicPackedOffset(i),
|
||||
Length: deterministicPackedLength(i),
|
||||
FormatVersion: deterministicFormatVersion(i),
|
||||
})
|
||||
}
|
||||
// non-deleted block
|
||||
for i := 0; i < 100; i++ {
|
||||
infos = append(infos, Info{
|
||||
TimestampSeconds: randomUnixTime(),
|
||||
BlockID: deterministicBlockID("packed", i),
|
||||
PackFile: deterministicPackFile(i),
|
||||
PackOffset: deterministicPackedOffset(i),
|
||||
Length: deterministicPackedLength(i),
|
||||
FormatVersion: deterministicFormatVersion(i),
|
||||
})
|
||||
}
|
||||
|
||||
infoMap := map[string]Info{}
|
||||
b1 := make(packIndexBuilder)
|
||||
b2 := make(packIndexBuilder)
|
||||
b3 := make(packIndexBuilder)
|
||||
|
||||
for _, info := range infos {
|
||||
infoMap[info.BlockID] = info
|
||||
b1.Add(info)
|
||||
b2.Add(info)
|
||||
b3.Add(info)
|
||||
}
|
||||
|
||||
var buf1 bytes.Buffer
|
||||
var buf2 bytes.Buffer
|
||||
var buf3 bytes.Buffer
|
||||
if err := b1.Build(&buf1); err != nil {
|
||||
t.Errorf("unable to build: %v", err)
|
||||
}
|
||||
if err := b1.Build(&buf2); err != nil {
|
||||
t.Errorf("unable to build: %v", err)
|
||||
}
|
||||
if err := b1.Build(&buf3); err != nil {
|
||||
t.Errorf("unable to build: %v", err)
|
||||
}
|
||||
data1 := buf1.Bytes()
|
||||
data2 := buf2.Bytes()
|
||||
data3 := buf3.Bytes()
|
||||
|
||||
if !reflect.DeepEqual(data1, data2) {
|
||||
t.Errorf("builder output not stable: %x vs %x", hex.Dump(data1), hex.Dump(data2))
|
||||
}
|
||||
if !reflect.DeepEqual(data2, data3) {
|
||||
t.Errorf("builder output not stable: %x vs %x", hex.Dump(data2), hex.Dump(data3))
|
||||
}
|
||||
|
||||
t.Run("FuzzTest", func(t *testing.T) {
|
||||
fuzzTestIndexOpen(t, data1)
|
||||
})
|
||||
|
||||
ndx, err := openPackIndex(bytes.NewReader(data1))
|
||||
if err != nil {
|
||||
t.Fatalf("can't open index: %v", err)
|
||||
}
|
||||
defer ndx.Close()
|
||||
|
||||
for _, info := range infos {
|
||||
info2, err := ndx.GetInfo(info.BlockID)
|
||||
if err != nil {
|
||||
t.Errorf("unable to find %v", info.BlockID)
|
||||
continue
|
||||
}
|
||||
if !reflect.DeepEqual(info, *info2) {
|
||||
t.Errorf("invalid value retrieved: %+v, wanted %+v", info2, info)
|
||||
}
|
||||
}
|
||||
|
||||
cnt := 0
|
||||
assertNoError(t, ndx.Iterate("", func(info2 Info) error {
|
||||
info := infoMap[info2.BlockID]
|
||||
if !reflect.DeepEqual(info, info2) {
|
||||
t.Errorf("invalid value retrieved: %+v, wanted %+v", info2, info)
|
||||
}
|
||||
cnt++
|
||||
return nil
|
||||
}))
|
||||
if cnt != len(infoMap) {
|
||||
t.Errorf("invalid number of iterations: %v, wanted %v", cnt, len(infoMap))
|
||||
}
|
||||
|
||||
prefixes := []string{"a", "b", "f", "0", "3", "aa", "aaa", "aab", "fff", "m", "x", "y", "m0", "ma"}
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
blockID := deterministicBlockID("no-such-block", i)
|
||||
v, err := ndx.GetInfo(blockID)
|
||||
if err != nil {
|
||||
t.Errorf("unable to get block %v: %v", blockID, err)
|
||||
}
|
||||
if v != nil {
|
||||
t.Errorf("unexpected result when getting block %v: %v", blockID, v)
|
||||
}
|
||||
}
|
||||
|
||||
for _, prefix := range prefixes {
|
||||
cnt2 := 0
|
||||
assertNoError(t, ndx.Iterate(string(prefix), func(info2 Info) error {
|
||||
cnt2++
|
||||
if !strings.HasPrefix(string(info2.BlockID), string(prefix)) {
|
||||
t.Errorf("unexpected item %v when iterating prefix %v", info2.BlockID, prefix)
|
||||
}
|
||||
return nil
|
||||
}))
|
||||
t.Logf("found %v elements with prefix %q", cnt2, prefix)
|
||||
}
|
||||
}
|
||||
|
||||
func fuzzTestIndexOpen(t *testing.T, originalData []byte) {
|
||||
// use consistent random
|
||||
rnd := rand.New(rand.NewSource(12345))
|
||||
|
||||
fuzzTest(rnd, originalData, 50000, func(d []byte) {
|
||||
ndx, err := openPackIndex(bytes.NewReader(d))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer ndx.Close()
|
||||
cnt := 0
|
||||
_ = ndx.Iterate("", func(cb Info) error {
|
||||
if cnt < 10 {
|
||||
_, _ = ndx.GetInfo(cb.BlockID)
|
||||
}
|
||||
cnt++
|
||||
return nil
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func fuzzTest(rnd *rand.Rand, originalData []byte, rounds int, callback func(d []byte)) {
|
||||
for round := 0; round < rounds; round++ {
|
||||
data := append([]byte(nil), originalData...)
|
||||
|
||||
// mutate small number of bytes
|
||||
bytesToMutate := rnd.Intn(3)
|
||||
for i := 0; i < bytesToMutate; i++ {
|
||||
pos := rnd.Intn(len(data))
|
||||
data[pos] = byte(rnd.Int())
|
||||
}
|
||||
|
||||
sectionsToInsert := rnd.Intn(3)
|
||||
for i := 0; i < sectionsToInsert; i++ {
|
||||
pos := rnd.Intn(len(data))
|
||||
insertedLength := rnd.Intn(20)
|
||||
insertedData := make([]byte, insertedLength)
|
||||
rnd.Read(insertedData)
|
||||
|
||||
data = append(append(append([]byte(nil), data[0:pos]...), insertedData...), data[pos:]...)
|
||||
}
|
||||
|
||||
sectionsToDelete := rnd.Intn(3)
|
||||
for i := 0; i < sectionsToDelete; i++ {
|
||||
pos := rnd.Intn(len(data))
|
||||
deletedLength := rnd.Intn(10)
|
||||
if pos+deletedLength > len(data) {
|
||||
continue
|
||||
}
|
||||
|
||||
data = append(append([]byte(nil), data[0:pos]...), data[pos+deletedLength:]...)
|
||||
}
|
||||
|
||||
callback(data)
|
||||
}
|
||||
}
|
||||
25
block/stats.go
Normal file
25
block/stats.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package block
|
||||
|
||||
// Stats exposes statistics about block operation.
|
||||
type Stats struct {
|
||||
// Keep int64 fields first to ensure they get aligned to at least 64-bit boundaries
|
||||
// which is required for atomic access on ARM and x86-32.
|
||||
ReadBytes int64 `json:"readBytes,omitempty"`
|
||||
WrittenBytes int64 `json:"writtenBytes,omitempty"`
|
||||
DecryptedBytes int64 `json:"decryptedBytes,omitempty"`
|
||||
EncryptedBytes int64 `json:"encryptedBytes,omitempty"`
|
||||
HashedBytes int64 `json:"hashedBytes,omitempty"`
|
||||
|
||||
ReadBlocks int32 `json:"readBlocks,omitempty"`
|
||||
WrittenBlocks int32 `json:"writtenBlocks,omitempty"`
|
||||
CheckedBlocks int32 `json:"checkedBlocks,omitempty"`
|
||||
HashedBlocks int32 `json:"hashedBlocks,omitempty"`
|
||||
InvalidBlocks int32 `json:"invalidBlocks,omitempty"`
|
||||
PresentBlocks int32 `json:"presentBlocks,omitempty"`
|
||||
ValidBlocks int32 `json:"validBlocks,omitempty"`
|
||||
}
|
||||
|
||||
// Reset clears all repository statistics.
|
||||
func (s *Stats) Reset() {
|
||||
*s = Stats{}
|
||||
}
|
||||
111
connect.go
Normal file
111
connect.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/kopia/repo/block"
|
||||
"github.com/kopia/repo/storage"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// ConnectOptions specifies options when persisting configuration to connect to a repository.
|
||||
type ConnectOptions struct {
|
||||
block.CachingOptions
|
||||
}
|
||||
|
||||
// Connect connects to the repository in the specified storage and persists the configuration and credentials in the file provided.
|
||||
func Connect(ctx context.Context, configFile string, st storage.Storage, password string, opt ConnectOptions) error {
|
||||
formatBytes, err := st.GetBlock(ctx, FormatBlockID, 0, -1)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "unable to read format block")
|
||||
}
|
||||
|
||||
f, err := parseFormatBlock(formatBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var lc LocalConfig
|
||||
lc.Storage = st.ConnectionInfo()
|
||||
|
||||
if err = setupCaching(configFile, &lc, opt.CachingOptions, f.UniqueID); err != nil {
|
||||
return errors.Wrap(err, "unable to set up caching")
|
||||
}
|
||||
|
||||
d, err := json.MarshalIndent(&lc, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = os.MkdirAll(filepath.Dir(configFile), 0700); err != nil {
|
||||
return errors.Wrap(err, "unable to create config directory")
|
||||
}
|
||||
|
||||
if err = ioutil.WriteFile(configFile, d, 0600); err != nil {
|
||||
return errors.Wrap(err, "unable to write config file")
|
||||
}
|
||||
|
||||
// now verify that the repository can be opened with the provided config file.
|
||||
r, err := Open(ctx, configFile, password, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return r.Close(ctx)
|
||||
}
|
||||
|
||||
func setupCaching(configPath string, lc *LocalConfig, opt block.CachingOptions, uniqueID []byte) error {
|
||||
if opt.MaxCacheSizeBytes == 0 {
|
||||
lc.Caching = block.CachingOptions{}
|
||||
return nil
|
||||
}
|
||||
|
||||
if opt.CacheDirectory == "" {
|
||||
cacheDir, err := os.UserCacheDir()
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "unable to determine cache directory")
|
||||
}
|
||||
|
||||
h := sha256.New()
|
||||
h.Write(uniqueID) //nolint:errcheck
|
||||
h.Write([]byte(configPath)) //nolint:errcheck
|
||||
lc.Caching.CacheDirectory = filepath.Join(cacheDir, "kopia", hex.EncodeToString(h.Sum(nil))[0:16])
|
||||
} else {
|
||||
absCacheDir, err := filepath.Abs(opt.CacheDirectory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
lc.Caching.CacheDirectory = absCacheDir
|
||||
}
|
||||
lc.Caching.MaxCacheSizeBytes = opt.MaxCacheSizeBytes
|
||||
lc.Caching.MaxListCacheDurationSec = opt.MaxListCacheDurationSec
|
||||
|
||||
log.Debugf("Creating cache directory '%v' with max size %v", lc.Caching.CacheDirectory, lc.Caching.MaxCacheSizeBytes)
|
||||
if err := os.MkdirAll(lc.Caching.CacheDirectory, 0700); err != nil {
|
||||
log.Warningf("unablet to create cache directory: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Disconnect removes the specified configuration file and any local cache directories.
|
||||
func Disconnect(configFile string) error {
|
||||
cfg, err := loadConfigFromFile(configFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cfg.Caching.CacheDirectory != "" {
|
||||
if err = os.RemoveAll(cfg.Caching.CacheDirectory); err != nil {
|
||||
log.Warningf("unable to to remove cache directory: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return os.Remove(configFile)
|
||||
}
|
||||
33
crypto_key_derivation.go
Normal file
33
crypto_key_derivation.go
Normal file
@@ -0,0 +1,33 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"golang.org/x/crypto/hkdf"
|
||||
"golang.org/x/crypto/scrypt"
|
||||
)
|
||||
|
||||
// defaultKeyDerivationAlgorithm is the key derivation algorithm for new configurations.
|
||||
const defaultKeyDerivationAlgorithm = "scrypt-65536-8-1"
|
||||
|
||||
func (f formatBlock) deriveMasterKeyFromPassword(password string) ([]byte, error) {
|
||||
const masterKeySize = 32
|
||||
|
||||
switch f.KeyDerivationAlgorithm {
|
||||
case "scrypt-65536-8-1":
|
||||
return scrypt.Key([]byte(password), f.UniqueID, 65536, 8, 1, masterKeySize)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported key algorithm: %v", f.KeyDerivationAlgorithm)
|
||||
}
|
||||
}
|
||||
|
||||
// deriveKeyFromMasterKey computes a key for a specific purpose and length using HKDF based on the master key.
|
||||
func deriveKeyFromMasterKey(masterKey, uniqueID, purpose []byte, length int) []byte {
|
||||
key := make([]byte, length)
|
||||
k := hkdf.New(sha256.New, masterKey, uniqueID, purpose)
|
||||
io.ReadFull(k, key) //nolint:errcheck
|
||||
return key
|
||||
}
|
||||
2
doc.go
Normal file
2
doc.go
Normal file
@@ -0,0 +1,2 @@
|
||||
// Package repo implements content-addressable Repository on top of BLOB storage.
|
||||
package repo
|
||||
40
examples/upload_download/main.go
Normal file
40
examples/upload_download/main.go
Normal file
@@ -0,0 +1,40 @@
|
||||
//+build !test
|
||||
|
||||
// Command repository_api demonstrates the use of Kopia's Repository API.
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/kopia/repo"
|
||||
)
|
||||
|
||||
func main() {
|
||||
ctx := context.Background()
|
||||
|
||||
if err := setupRepositoryAndConnect(ctx, masterPassword); err != nil {
|
||||
log.Printf("unable to set up repository: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
r, err := repo.Open(ctx, configFile, masterPassword, nil)
|
||||
if err != nil {
|
||||
log.Printf("unable to open repository: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer r.Close(ctx) //nolint:errcheck
|
||||
|
||||
uploadAndDownloadObjects(ctx, r)
|
||||
|
||||
// Now list blocks found in the repository.
|
||||
blks, err := r.Blocks.ListBlocks("")
|
||||
if err != nil {
|
||||
log.Printf("err: %v", err)
|
||||
}
|
||||
|
||||
for _, b := range blks {
|
||||
log.Printf("found block %v", b)
|
||||
}
|
||||
}
|
||||
56
examples/upload_download/setup_repository.go
Normal file
56
examples/upload_download/setup_repository.go
Normal file
@@ -0,0 +1,56 @@
|
||||
//+build !test
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/kopia/repo"
|
||||
"github.com/kopia/repo/block"
|
||||
"github.com/kopia/repo/storage/filesystem"
|
||||
"github.com/kopia/repo/storage/logging"
|
||||
)
|
||||
|
||||
const (
|
||||
masterPassword = "my-password$!@#!@"
|
||||
storageDir = "/tmp/kopia-example/storage"
|
||||
configFile = "/tmp/kopia-example/config"
|
||||
cacheDirectory = "/tmp/kopia-example/cache"
|
||||
)
|
||||
|
||||
func setupRepositoryAndConnect(ctx context.Context, password string) error {
|
||||
if err := os.MkdirAll(storageDir, 0700); err != nil {
|
||||
return fmt.Errorf("unable to create directory: %v", err)
|
||||
}
|
||||
st, err := filesystem.New(ctx, &filesystem.Options{
|
||||
Path: storageDir,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to connect to storage: %v", err)
|
||||
}
|
||||
|
||||
// set up logging so we can see what's going on
|
||||
st = logging.NewWrapper(st)
|
||||
|
||||
// see if we already have the config file, if not connect.
|
||||
if _, err := os.Stat(configFile); os.IsNotExist(err) {
|
||||
// initialize repository
|
||||
if err := repo.Initialize(ctx, st, &repo.NewRepositoryOptions{}, password); err != nil {
|
||||
return fmt.Errorf("unable to initialize repository: %v", err)
|
||||
}
|
||||
|
||||
// now establish connection to repository and create configuration file.
|
||||
if err := repo.Connect(ctx, configFile, st, password, repo.ConnectOptions{
|
||||
CachingOptions: block.CachingOptions{
|
||||
CacheDirectory: cacheDirectory,
|
||||
MaxCacheSizeBytes: 100000000,
|
||||
},
|
||||
}); err != nil {
|
||||
return fmt.Errorf("unable to connect to repository: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
67
examples/upload_download/upload_download_objects.go
Normal file
67
examples/upload_download/upload_download_objects.go
Normal file
@@ -0,0 +1,67 @@
|
||||
//+build !test
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/kopia/repo"
|
||||
"github.com/kopia/repo/object"
|
||||
)
|
||||
|
||||
func uploadRandomObject(ctx context.Context, r *repo.Repository, length int) (object.ID, error) {
|
||||
w := r.Objects.NewWriter(ctx, object.WriterOptions{})
|
||||
defer w.Close() //nolint:errcheck
|
||||
|
||||
buf := make([]byte, 256*1024)
|
||||
for length > 0 {
|
||||
todo := length
|
||||
if todo > len(buf) {
|
||||
todo = len(buf)
|
||||
}
|
||||
rand.Read(buf[0:todo]) //nolint:errcheck
|
||||
if _, err := w.Write(buf[0:todo]); err != nil {
|
||||
return "", err
|
||||
}
|
||||
length -= todo
|
||||
}
|
||||
return w.Result()
|
||||
}
|
||||
|
||||
func downloadObject(ctx context.Context, r *repo.Repository, oid object.ID) ([]byte, error) {
|
||||
rd, err := r.Objects.Open(ctx, oid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rd.Close() //nolint:errcheck
|
||||
|
||||
return ioutil.ReadAll(rd)
|
||||
}
|
||||
|
||||
func uploadAndDownloadObjects(ctx context.Context, r *repo.Repository) {
|
||||
var oids []object.ID
|
||||
|
||||
for size := 100; size < 100000000; size *= 2 {
|
||||
log.Printf("uploading file with %v bytes", size)
|
||||
oid, err := uploadRandomObject(ctx, r, size)
|
||||
if err != nil {
|
||||
log.Printf("unable to upload: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
log.Printf("uploaded %v bytes as %v", size, oid)
|
||||
oids = append(oids, oid)
|
||||
}
|
||||
|
||||
for _, oid := range oids {
|
||||
log.Printf("downloading %q", oid)
|
||||
b, err := downloadObject(ctx, r, oid)
|
||||
if err != nil {
|
||||
log.Printf("unable to read object: %v", err)
|
||||
}
|
||||
log.Printf("downloaded %v", len(b))
|
||||
}
|
||||
}
|
||||
263
format_block.go
Normal file
263
format_block.go
Normal file
@@ -0,0 +1,263 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"crypto/hmac"
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/kopia/repo/storage"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
const defaultFormatEncryption = "AES256_GCM"
|
||||
|
||||
const (
|
||||
maxChecksummedFormatBytesLength = 65000
|
||||
formatBlockChecksumSize = sha256.Size
|
||||
)
|
||||
|
||||
// formatBlockChecksumSecret is a HMAC secret used for checksumming the format block.
|
||||
// It's not really a secret, but will provide positive identification of blocks that
|
||||
// are repository format blocks.
|
||||
var formatBlockChecksumSecret = []byte("kopia-repository")
|
||||
|
||||
// FormatBlockID is the identifier of a storage block that describes repository format.
|
||||
const FormatBlockID = "kopia.repository"
|
||||
|
||||
var (
|
||||
purposeAESKey = []byte("AES")
|
||||
purposeAuthData = []byte("CHECKSUM")
|
||||
|
||||
errFormatBlockNotFound = errors.New("format block not found")
|
||||
)
|
||||
|
||||
type formatBlock struct {
|
||||
Tool string `json:"tool"`
|
||||
BuildVersion string `json:"buildVersion"`
|
||||
BuildInfo string `json:"buildInfo"`
|
||||
|
||||
UniqueID []byte `json:"uniqueID"`
|
||||
KeyDerivationAlgorithm string `json:"keyAlgo"`
|
||||
|
||||
Version string `json:"version"`
|
||||
EncryptionAlgorithm string `json:"encryption"`
|
||||
EncryptedFormatBytes []byte `json:"encryptedBlockFormat,omitempty"`
|
||||
UnencryptedFormat *repositoryObjectFormat `json:"blockFormat,omitempty"`
|
||||
}
|
||||
|
||||
// encryptedRepositoryConfig contains the configuration of repository that's persisted in encrypted format.
|
||||
type encryptedRepositoryConfig struct {
|
||||
Format repositoryObjectFormat `json:"format"`
|
||||
}
|
||||
|
||||
func parseFormatBlock(b []byte) (*formatBlock, error) {
|
||||
f := &formatBlock{}
|
||||
|
||||
if err := json.Unmarshal(b, &f); err != nil {
|
||||
return nil, errors.Wrap(err, "invalid format block")
|
||||
}
|
||||
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// RecoverFormatBlock attempts to recover format block replica from the specified file.
|
||||
// The format block can be either the prefix or a suffix of the given file.
|
||||
// optionally the length can be provided (if known) to speed up recovery.
|
||||
func RecoverFormatBlock(ctx context.Context, st storage.Storage, filename string, optionalLength int64) ([]byte, error) {
|
||||
if optionalLength > 0 {
|
||||
return recoverFormatBlockWithLength(ctx, st, filename, optionalLength)
|
||||
}
|
||||
|
||||
var foundMetadata storage.BlockMetadata
|
||||
|
||||
if err := st.ListBlocks(ctx, filename, func(bm storage.BlockMetadata) error {
|
||||
if foundMetadata.BlockID != "" {
|
||||
return fmt.Errorf("found multiple blocks with a given prefix: %v", filename)
|
||||
}
|
||||
foundMetadata = bm
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, errors.Wrap(err, "error")
|
||||
}
|
||||
|
||||
if foundMetadata.BlockID == "" {
|
||||
return nil, storage.ErrBlockNotFound
|
||||
}
|
||||
|
||||
return recoverFormatBlockWithLength(ctx, st, foundMetadata.BlockID, foundMetadata.Length)
|
||||
}
|
||||
|
||||
func recoverFormatBlockWithLength(ctx context.Context, st storage.Storage, filename string, length int64) ([]byte, error) {
|
||||
chunkLength := int64(65536)
|
||||
if chunkLength > length {
|
||||
chunkLength = length
|
||||
}
|
||||
|
||||
if chunkLength > 4 {
|
||||
|
||||
// try prefix
|
||||
prefixChunk, err := st.GetBlock(ctx, filename, 0, chunkLength)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if l := int(prefixChunk[0]) + int(prefixChunk[1])<<8; l <= maxChecksummedFormatBytesLength && l+2 < len(prefixChunk) {
|
||||
if b, ok := verifyFormatBlockChecksum(prefixChunk[2 : 2+l]); ok {
|
||||
return b, nil
|
||||
}
|
||||
}
|
||||
|
||||
// try the suffix
|
||||
suffixChunk, err := st.GetBlock(ctx, filename, length-chunkLength, chunkLength)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if l := int(suffixChunk[len(suffixChunk)-2]) + int(suffixChunk[len(suffixChunk)-1])<<8; l <= maxChecksummedFormatBytesLength && l+2 < len(suffixChunk) {
|
||||
if b, ok := verifyFormatBlockChecksum(suffixChunk[len(suffixChunk)-2-l : len(suffixChunk)-2]); ok {
|
||||
return b, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, errFormatBlockNotFound
|
||||
}
|
||||
|
||||
func verifyFormatBlockChecksum(b []byte) ([]byte, bool) {
|
||||
if len(b) < formatBlockChecksumSize {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
data, checksum := b[0:len(b)-formatBlockChecksumSize], b[len(b)-formatBlockChecksumSize:]
|
||||
h := hmac.New(sha256.New, formatBlockChecksumSecret)
|
||||
h.Write(data) //nolint:errcheck
|
||||
actualChecksum := h.Sum(nil)
|
||||
if !hmac.Equal(actualChecksum, checksum) {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
return data, true
|
||||
}
|
||||
|
||||
func writeFormatBlock(ctx context.Context, st storage.Storage, f *formatBlock) error {
|
||||
var buf bytes.Buffer
|
||||
e := json.NewEncoder(&buf)
|
||||
e.SetIndent("", " ")
|
||||
if err := e.Encode(f); err != nil {
|
||||
return errors.Wrap(err, "unable to marshal format block")
|
||||
}
|
||||
|
||||
if err := st.PutBlock(ctx, FormatBlockID, buf.Bytes()); err != nil {
|
||||
return errors.Wrap(err, "unable to write format block")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *formatBlock) decryptFormatBytes(masterKey []byte) (*repositoryObjectFormat, error) {
|
||||
switch f.EncryptionAlgorithm {
|
||||
case "NONE": // do nothing
|
||||
return f.UnencryptedFormat, nil
|
||||
|
||||
case "AES256_GCM":
|
||||
aead, authData, err := initCrypto(masterKey, f.UniqueID)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot initialize cipher")
|
||||
}
|
||||
|
||||
content := append([]byte(nil), f.EncryptedFormatBytes...)
|
||||
if len(content) < aead.NonceSize() {
|
||||
return nil, fmt.Errorf("invalid encrypted payload, too short")
|
||||
}
|
||||
nonce := content[0:aead.NonceSize()]
|
||||
payload := content[aead.NonceSize():]
|
||||
|
||||
plainText, err := aead.Open(payload[:0], nonce, payload, authData)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to decrypt repository format, invalid credentials?")
|
||||
}
|
||||
|
||||
var erc encryptedRepositoryConfig
|
||||
if err := json.Unmarshal(plainText, &erc); err != nil {
|
||||
return nil, errors.Wrap(err, "invalid repository format")
|
||||
}
|
||||
|
||||
return &erc.Format, nil
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown encryption algorithm: '%v'", f.EncryptionAlgorithm)
|
||||
}
|
||||
}
|
||||
|
||||
func initCrypto(masterKey, repositoryID []byte) (cipher.AEAD, []byte, error) {
|
||||
aesKey := deriveKeyFromMasterKey(masterKey, repositoryID, purposeAESKey, 32)
|
||||
authData := deriveKeyFromMasterKey(masterKey, repositoryID, purposeAuthData, 32)
|
||||
|
||||
blk, err := aes.NewCipher(aesKey)
|
||||
if err != nil {
|
||||
return nil, nil, errors.Wrap(err, "cannot create cipher")
|
||||
}
|
||||
aead, err := cipher.NewGCM(blk)
|
||||
if err != nil {
|
||||
return nil, nil, errors.Wrap(err, "cannot create cipher")
|
||||
}
|
||||
|
||||
return aead, authData, nil
|
||||
}
|
||||
|
||||
func encryptFormatBytes(f *formatBlock, format *repositoryObjectFormat, masterKey, repositoryID []byte) error {
|
||||
switch f.EncryptionAlgorithm {
|
||||
case "NONE":
|
||||
f.UnencryptedFormat = format
|
||||
return nil
|
||||
|
||||
case "AES256_GCM":
|
||||
content, err := json.Marshal(&encryptedRepositoryConfig{Format: *format})
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "can't marshal format to JSON")
|
||||
}
|
||||
aead, authData, err := initCrypto(masterKey, repositoryID)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "unable to initialize crypto")
|
||||
}
|
||||
nonceLength := aead.NonceSize()
|
||||
noncePlusContentLength := nonceLength + len(content)
|
||||
cipherText := make([]byte, noncePlusContentLength+aead.Overhead())
|
||||
|
||||
// Store nonce at the beginning of ciphertext.
|
||||
nonce := cipherText[0:nonceLength]
|
||||
if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
b := aead.Seal(cipherText[nonceLength:nonceLength], nonce, content, authData)
|
||||
content = nonce[0 : nonceLength+len(b)]
|
||||
f.EncryptedFormatBytes = content
|
||||
return nil
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unknown encryption algorithm: '%v'", f.EncryptionAlgorithm)
|
||||
}
|
||||
}
|
||||
|
||||
func addFormatBlockChecksumAndLength(fb []byte) ([]byte, error) {
|
||||
h := hmac.New(sha256.New, formatBlockChecksumSecret)
|
||||
h.Write(fb) //nolint:errcheck
|
||||
checksummedFormatBytes := h.Sum(fb)
|
||||
|
||||
l := len(checksummedFormatBytes)
|
||||
if l > maxChecksummedFormatBytesLength {
|
||||
return nil, fmt.Errorf("format block too big: %v", l)
|
||||
}
|
||||
|
||||
// return <length><checksummed-bytes><length>
|
||||
result := append([]byte(nil), byte(l), byte(l>>8))
|
||||
result = append(result, checksummedFormatBytes...)
|
||||
result = append(result, byte(l), byte(l>>8))
|
||||
return result, nil
|
||||
}
|
||||
79
format_block_test.go
Normal file
79
format_block_test.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/kopia/repo/internal/storagetesting"
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
func TestFormatBlockRecovery(t *testing.T) {
|
||||
data := map[string][]byte{}
|
||||
st := storagetesting.NewMapStorage(data, nil, nil)
|
||||
ctx := context.Background()
|
||||
|
||||
someDataBlock := []byte("aadsdasdas")
|
||||
checksummed, err := addFormatBlockChecksumAndLength(someDataBlock)
|
||||
if err != nil {
|
||||
t.Errorf("error appending checksum: %v", err)
|
||||
}
|
||||
if got, want := len(checksummed), 2+2+sha256.Size+len(someDataBlock); got != want {
|
||||
t.Errorf("unexpected checksummed length: %v, want %v", got, want)
|
||||
}
|
||||
|
||||
assertNoError(t, st.PutBlock(ctx, "some-block-by-itself", checksummed))
|
||||
assertNoError(t, st.PutBlock(ctx, "some-block-suffix", append(append([]byte(nil), 1, 2, 3), checksummed...)))
|
||||
assertNoError(t, st.PutBlock(ctx, "some-block-prefix", append(append([]byte(nil), checksummed...), 1, 2, 3)))
|
||||
|
||||
// mess up checksum
|
||||
checksummed[len(checksummed)-3] ^= 1
|
||||
assertNoError(t, st.PutBlock(ctx, "bad-checksum", checksummed))
|
||||
assertNoError(t, st.PutBlock(ctx, "zero-len", []byte{}))
|
||||
assertNoError(t, st.PutBlock(ctx, "one-len", []byte{1}))
|
||||
assertNoError(t, st.PutBlock(ctx, "two-len", []byte{1, 2}))
|
||||
assertNoError(t, st.PutBlock(ctx, "three-len", []byte{1, 2, 3}))
|
||||
assertNoError(t, st.PutBlock(ctx, "four-len", []byte{1, 2, 3, 4}))
|
||||
assertNoError(t, st.PutBlock(ctx, "five-len", []byte{1, 2, 3, 4, 5}))
|
||||
|
||||
cases := []struct {
|
||||
block string
|
||||
err error
|
||||
}{
|
||||
{"some-block-by-itself", nil},
|
||||
{"some-block-suffix", nil},
|
||||
{"some-block-prefix", nil},
|
||||
{"bad-checksum", errFormatBlockNotFound},
|
||||
{"no-such-block", storage.ErrBlockNotFound},
|
||||
{"zero-len", errFormatBlockNotFound},
|
||||
{"one-len", errFormatBlockNotFound},
|
||||
{"two-len", errFormatBlockNotFound},
|
||||
{"three-len", errFormatBlockNotFound},
|
||||
{"four-len", errFormatBlockNotFound},
|
||||
{"five-len", errFormatBlockNotFound},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.block, func(t *testing.T) {
|
||||
v, err := RecoverFormatBlock(ctx, st, tc.block, -1)
|
||||
if tc.err == nil {
|
||||
if !reflect.DeepEqual(v, someDataBlock) || err != nil {
|
||||
t.Errorf("unexpected result or error: v=%v err=%v, expected success", v, err)
|
||||
}
|
||||
} else {
|
||||
if v != nil || err != tc.err {
|
||||
t.Errorf("unexpected result or error: v=%v err=%v, expected %v", v, err, tc.err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func assertNoError(t *testing.T, err error) {
|
||||
t.Helper()
|
||||
if err != nil {
|
||||
t.Errorf("err: %v", err)
|
||||
}
|
||||
}
|
||||
132
initialize.go
Normal file
132
initialize.go
Normal file
@@ -0,0 +1,132 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/kopia/repo/block"
|
||||
"github.com/kopia/repo/object"
|
||||
"github.com/kopia/repo/storage"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// BuildInfo is the build information of Kopia.
|
||||
var (
|
||||
BuildInfo = "unknown"
|
||||
BuildVersion = "v0-unofficial"
|
||||
)
|
||||
|
||||
// NewRepositoryOptions specifies options that apply to newly created repositories.
|
||||
// All fields are optional, when not provided, reasonable defaults will be used.
|
||||
type NewRepositoryOptions struct {
|
||||
UniqueID []byte // force the use of particular unique ID
|
||||
BlockFormat block.FormattingOptions
|
||||
DisableHMAC bool
|
||||
ObjectFormat object.Format // object format
|
||||
}
|
||||
|
||||
// Initialize creates initial repository data structures in the specified storage with given credentials.
|
||||
func Initialize(ctx context.Context, st storage.Storage, opt *NewRepositoryOptions, password string) error {
|
||||
if opt == nil {
|
||||
opt = &NewRepositoryOptions{}
|
||||
}
|
||||
|
||||
// get the block - expect ErrBlockNotFound
|
||||
_, err := st.GetBlock(ctx, FormatBlockID, 0, -1)
|
||||
if err == nil {
|
||||
return fmt.Errorf("repository already initialized")
|
||||
}
|
||||
if err != storage.ErrBlockNotFound {
|
||||
return err
|
||||
}
|
||||
|
||||
format := formatBlockFromOptions(opt)
|
||||
masterKey, err := format.deriveMasterKeyFromPassword(password)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "unable to derive master key")
|
||||
}
|
||||
|
||||
if err := encryptFormatBytes(format, repositoryObjectFormatFromOptions(opt), masterKey, format.UniqueID); err != nil {
|
||||
return errors.Wrap(err, "unable to encrypt format bytes")
|
||||
}
|
||||
|
||||
if err := writeFormatBlock(ctx, st, format); err != nil {
|
||||
return errors.Wrap(err, "unable to write format block")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func formatBlockFromOptions(opt *NewRepositoryOptions) *formatBlock {
|
||||
f := &formatBlock{
|
||||
Tool: "https://github.com/kopia/kopia",
|
||||
BuildInfo: BuildInfo,
|
||||
KeyDerivationAlgorithm: defaultKeyDerivationAlgorithm,
|
||||
UniqueID: applyDefaultRandomBytes(opt.UniqueID, 32),
|
||||
Version: "1",
|
||||
EncryptionAlgorithm: defaultFormatEncryption,
|
||||
}
|
||||
|
||||
if opt.BlockFormat.Encryption == "NONE" {
|
||||
f.EncryptionAlgorithm = "NONE"
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func repositoryObjectFormatFromOptions(opt *NewRepositoryOptions) *repositoryObjectFormat {
|
||||
f := &repositoryObjectFormat{
|
||||
FormattingOptions: block.FormattingOptions{
|
||||
Version: 1,
|
||||
Hash: applyDefaultString(opt.BlockFormat.Hash, block.DefaultHash),
|
||||
Encryption: applyDefaultString(opt.BlockFormat.Encryption, block.DefaultEncryption),
|
||||
HMACSecret: applyDefaultRandomBytes(opt.BlockFormat.HMACSecret, 32),
|
||||
MasterKey: applyDefaultRandomBytes(opt.BlockFormat.MasterKey, 32),
|
||||
MaxPackSize: applyDefaultInt(opt.BlockFormat.MaxPackSize, applyDefaultInt(opt.ObjectFormat.MaxBlockSize, 20<<20)), // 20 MB
|
||||
},
|
||||
Format: object.Format{
|
||||
Splitter: applyDefaultString(opt.ObjectFormat.Splitter, object.DefaultSplitter),
|
||||
MaxBlockSize: applyDefaultInt(opt.ObjectFormat.MaxBlockSize, 20<<20), // 20MiB
|
||||
MinBlockSize: applyDefaultInt(opt.ObjectFormat.MinBlockSize, 10<<20), // 10MiB
|
||||
AvgBlockSize: applyDefaultInt(opt.ObjectFormat.AvgBlockSize, 16<<20), // 16MiB
|
||||
},
|
||||
}
|
||||
|
||||
if opt.DisableHMAC {
|
||||
f.HMACSecret = nil
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func randomBytes(n int) []byte {
|
||||
b := make([]byte, n)
|
||||
io.ReadFull(rand.Reader, b) //nolint:errcheck
|
||||
return b
|
||||
}
|
||||
|
||||
func applyDefaultInt(v, def int) int {
|
||||
if v == 0 {
|
||||
return def
|
||||
}
|
||||
|
||||
return v
|
||||
}
|
||||
|
||||
func applyDefaultString(v, def string) string {
|
||||
if v == "" {
|
||||
return def
|
||||
}
|
||||
|
||||
return v
|
||||
}
|
||||
|
||||
func applyDefaultRandomBytes(b []byte, n int) []byte {
|
||||
if b == nil {
|
||||
return randomBytes(n)
|
||||
}
|
||||
|
||||
return b
|
||||
}
|
||||
9
internal/repologging/logging.go
Normal file
9
internal/repologging/logging.go
Normal file
@@ -0,0 +1,9 @@
|
||||
// Package repologging provides loggers.
|
||||
package repologging
|
||||
|
||||
import "github.com/op/go-logging"
|
||||
|
||||
// Logger returns an instance of a logger used throughout repository codebase.
|
||||
func Logger(module string) *logging.Logger {
|
||||
return logging.MustGetLogger(module)
|
||||
}
|
||||
@@ -23,6 +23,7 @@ type Environment struct {
|
||||
|
||||
configDir string
|
||||
storageDir string
|
||||
connected bool
|
||||
}
|
||||
|
||||
// Setup sets up a test environment.
|
||||
@@ -75,6 +76,8 @@ func (e *Environment) Setup(t *testing.T, opts ...func(*repo.NewRepositoryOption
|
||||
t.Fatalf("can't connect: %v", err)
|
||||
}
|
||||
|
||||
e.connected = true
|
||||
|
||||
e.Repository, err = repo.Open(ctx, e.configFile(), masterPassword, &repo.Options{})
|
||||
if err != nil {
|
||||
t.Fatalf("can't open: %v", err)
|
||||
@@ -88,8 +91,13 @@ func (e *Environment) Close(t *testing.T) {
|
||||
if err := e.Repository.Close(context.Background()); err != nil {
|
||||
t.Fatalf("unable to close: %v", err)
|
||||
}
|
||||
|
||||
if err := os.RemoveAll(e.configDir); err != nil {
|
||||
if e.connected {
|
||||
if err := repo.Disconnect(e.configFile()); err != nil {
|
||||
t.Errorf("error disconnecting: %v", err)
|
||||
}
|
||||
}
|
||||
if err := os.Remove(e.configDir); err != nil {
|
||||
// should be empty, assuming Disconnect was successful
|
||||
t.Errorf("error removing config directory: %v", err)
|
||||
}
|
||||
if err := os.RemoveAll(e.storageDir); err != nil {
|
||||
|
||||
44
internal/retry/retry.go
Normal file
44
internal/retry/retry.go
Normal file
@@ -0,0 +1,44 @@
|
||||
// Package retry implements exponential retry policy.
|
||||
package retry
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/internal/repologging"
|
||||
)
|
||||
|
||||
var log = repologging.Logger("repo/retry")
|
||||
|
||||
var (
|
||||
maxAttempts = 10
|
||||
retryInitialSleepAmount = 1 * time.Second
|
||||
retryMaxSleepAmount = 32 * time.Second
|
||||
)
|
||||
|
||||
// AttemptFunc performs an attempt and returns a value (optional, may be nil) and an error.
|
||||
type AttemptFunc func() (interface{}, error)
|
||||
|
||||
// IsRetriableFunc is a function that determines whether an error is retriable.
|
||||
type IsRetriableFunc func(err error) bool
|
||||
|
||||
// WithExponentialBackoff runs the provided attempt until it succeeds, retrying on all errors that are
|
||||
// deemed retriable by the provided function. The delay between retries grows exponentially up to
|
||||
// a certain limit.
|
||||
func WithExponentialBackoff(desc string, attempt AttemptFunc, isRetriableError IsRetriableFunc) (interface{}, error) {
|
||||
sleepAmount := retryInitialSleepAmount
|
||||
for i := 0; i < maxAttempts; i++ {
|
||||
v, err := attempt()
|
||||
if !isRetriableError(err) {
|
||||
return v, err
|
||||
}
|
||||
log.Debugf("got error %v when %v (#%v), sleeping for %v before retrying", err, desc, i, sleepAmount)
|
||||
time.Sleep(sleepAmount)
|
||||
sleepAmount *= 2
|
||||
if sleepAmount > retryMaxSleepAmount {
|
||||
sleepAmount = retryMaxSleepAmount
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unable to complete %v despite %v retries", desc, maxAttempts)
|
||||
}
|
||||
59
internal/retry/retry_test.go
Normal file
59
internal/retry/retry_test.go
Normal file
@@ -0,0 +1,59 @@
|
||||
package retry
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
errRetriable = errors.New("retriable")
|
||||
)
|
||||
|
||||
func isRetriable(e error) bool {
|
||||
return e == errRetriable
|
||||
}
|
||||
|
||||
func TestRetry(t *testing.T) {
|
||||
retryInitialSleepAmount = 10 * time.Millisecond
|
||||
retryMaxSleepAmount = 20 * time.Millisecond
|
||||
maxAttempts = 3
|
||||
|
||||
cnt := 0
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
f func() (interface{}, error)
|
||||
want interface{}
|
||||
wantError error
|
||||
}{
|
||||
{"success-nil", func() (interface{}, error) { return nil, nil }, nil, nil},
|
||||
{"success", func() (interface{}, error) { return 3, nil }, 3, nil},
|
||||
{"retriable-succeeds", func() (interface{}, error) {
|
||||
cnt++
|
||||
if cnt < 2 {
|
||||
return nil, errRetriable
|
||||
}
|
||||
return 4, nil
|
||||
}, 4, nil},
|
||||
{"retriable-never-succeeds", func() (interface{}, error) { return nil, errRetriable }, nil, fmt.Errorf("unable to complete retriable-never-succeeds despite 3 retries")},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
tc := tc
|
||||
t.Parallel()
|
||||
|
||||
got, err := WithExponentialBackoff(tc.desc, tc.f, isRetriable)
|
||||
if !reflect.DeepEqual(err, tc.wantError) {
|
||||
t.Errorf("invalid error %q, wanted %q", err, tc.wantError)
|
||||
}
|
||||
|
||||
if got != tc.want {
|
||||
t.Errorf("invalid value %v, wanted %v", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
110
internal/storagetesting/asserts.go
Normal file
110
internal/storagetesting/asserts.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package storagetesting
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
// AssertGetBlock asserts that the specified storage block has correct content.
|
||||
func AssertGetBlock(ctx context.Context, t *testing.T, s storage.Storage, block string, expected []byte) {
|
||||
t.Helper()
|
||||
|
||||
b, err := s.GetBlock(ctx, block, 0, -1)
|
||||
if err != nil {
|
||||
t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected)
|
||||
return
|
||||
}
|
||||
|
||||
if !bytes.Equal(b, expected) {
|
||||
t.Errorf("GetBlock(%v) returned %x, but expected %x", block, b, expected)
|
||||
}
|
||||
|
||||
half := int64(len(expected) / 2)
|
||||
if half == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
b, err = s.GetBlock(ctx, block, 0, 0)
|
||||
if err != nil {
|
||||
t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected)
|
||||
return
|
||||
}
|
||||
|
||||
if len(b) != 0 {
|
||||
t.Errorf("GetBlock(%v) returned non-zero length: %v", block, len(b))
|
||||
return
|
||||
}
|
||||
|
||||
b, err = s.GetBlock(ctx, block, 0, half)
|
||||
if err != nil {
|
||||
t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected)
|
||||
return
|
||||
}
|
||||
|
||||
if !bytes.Equal(b, expected[0:half]) {
|
||||
t.Errorf("GetBlock(%v) returned %x, but expected %x", block, b, expected[0:half])
|
||||
}
|
||||
|
||||
b, err = s.GetBlock(ctx, block, half, int64(len(expected))-half)
|
||||
if err != nil {
|
||||
t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected)
|
||||
return
|
||||
}
|
||||
|
||||
if !bytes.Equal(b, expected[len(expected)-int(half):]) {
|
||||
t.Errorf("GetBlock(%v) returned %x, but expected %x", block, b, expected[len(expected)-int(half):])
|
||||
}
|
||||
|
||||
AssertInvalidOffsetLength(ctx, t, s, block, -3, 1)
|
||||
AssertInvalidOffsetLength(ctx, t, s, block, int64(len(expected)), 3)
|
||||
AssertInvalidOffsetLength(ctx, t, s, block, int64(len(expected)-1), 3)
|
||||
AssertInvalidOffsetLength(ctx, t, s, block, int64(len(expected)+1), 3)
|
||||
}
|
||||
|
||||
// AssertInvalidOffsetLength verifies that the given combination of (offset,length) fails on GetBlock()
|
||||
func AssertInvalidOffsetLength(ctx context.Context, t *testing.T, s storage.Storage, block string, offset, length int64) {
|
||||
if _, err := s.GetBlock(ctx, block, offset, length); err == nil {
|
||||
t.Errorf("GetBlock(%v,%v,%v) did not return error for invalid offset/length", block, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
// AssertGetBlockNotFound asserts that GetBlock() for specified storage block returns ErrBlockNotFound.
|
||||
func AssertGetBlockNotFound(ctx context.Context, t *testing.T, s storage.Storage, block string) {
|
||||
t.Helper()
|
||||
|
||||
b, err := s.GetBlock(ctx, block, 0, -1)
|
||||
if err != storage.ErrBlockNotFound || b != nil {
|
||||
t.Errorf("GetBlock(%v) returned %v, %v but expected ErrBlockNotFound", block, b, err)
|
||||
}
|
||||
}
|
||||
|
||||
// AssertListResults asserts that the list results with given prefix return the specified list of names in order.
|
||||
func AssertListResults(ctx context.Context, t *testing.T, s storage.Storage, prefix string, want ...string) {
|
||||
t.Helper()
|
||||
var names []string
|
||||
|
||||
if err := s.ListBlocks(ctx, prefix, func(e storage.BlockMetadata) error {
|
||||
names = append(names, e.BlockID)
|
||||
return nil
|
||||
}); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
names = sorted(names)
|
||||
want = sorted(want)
|
||||
|
||||
if !reflect.DeepEqual(names, want) {
|
||||
t.Errorf("ListBlocks(%v) returned %v, but wanted %v", prefix, names, want)
|
||||
}
|
||||
}
|
||||
|
||||
func sorted(s []string) []string {
|
||||
x := append([]string(nil), s...)
|
||||
sort.Strings(x)
|
||||
return x
|
||||
}
|
||||
2
internal/storagetesting/doc.go
Normal file
2
internal/storagetesting/doc.go
Normal file
@@ -0,0 +1,2 @@
|
||||
// Package storagetesting is used for testing Storage implementations.
|
||||
package storagetesting
|
||||
115
internal/storagetesting/faulty.go
Normal file
115
internal/storagetesting/faulty.go
Normal file
@@ -0,0 +1,115 @@
|
||||
package storagetesting
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/internal/repologging"
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
var log = repologging.Logger("faulty-storage")
|
||||
|
||||
// Fault describes the behavior of a single fault.
|
||||
type Fault struct {
|
||||
Repeat int // how many times to repeat this fault
|
||||
Sleep time.Duration // sleep before returning
|
||||
ErrCallback func() error
|
||||
WaitFor chan struct{} // waits until the given channel is closed before returning
|
||||
Err error // error to return (can be nil in combination with Sleep and WaitFor)
|
||||
}
|
||||
|
||||
// FaultyStorage implements fault injection for Storage.
|
||||
type FaultyStorage struct {
|
||||
Base storage.Storage
|
||||
Faults map[string][]*Fault
|
||||
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// GetBlock implements storage.Storage
|
||||
func (s *FaultyStorage) GetBlock(ctx context.Context, id string, offset, length int64) ([]byte, error) {
|
||||
if err := s.getNextFault("GetBlock", id, offset, length); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return s.Base.GetBlock(ctx, id, offset, length)
|
||||
}
|
||||
|
||||
// PutBlock implements storage.Storage
|
||||
func (s *FaultyStorage) PutBlock(ctx context.Context, id string, data []byte) error {
|
||||
if err := s.getNextFault("PutBlock", id, len(data)); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.Base.PutBlock(ctx, id, data)
|
||||
}
|
||||
|
||||
// DeleteBlock implements storage.Storage
|
||||
func (s *FaultyStorage) DeleteBlock(ctx context.Context, id string) error {
|
||||
if err := s.getNextFault("DeleteBlock", id); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.Base.DeleteBlock(ctx, id)
|
||||
}
|
||||
|
||||
// ListBlocks implements storage.Storage
|
||||
func (s *FaultyStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
|
||||
if err := s.getNextFault("ListBlocks", prefix); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return s.Base.ListBlocks(ctx, prefix, func(bm storage.BlockMetadata) error {
|
||||
if err := s.getNextFault("ListBlocksItem", prefix); err != nil {
|
||||
return err
|
||||
}
|
||||
return callback(bm)
|
||||
})
|
||||
}
|
||||
|
||||
// Close implements storage.Storage
|
||||
func (s *FaultyStorage) Close(ctx context.Context) error {
|
||||
if err := s.getNextFault("Close"); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.Base.Close(ctx)
|
||||
}
|
||||
|
||||
// ConnectionInfo implements storage.Storage
|
||||
func (s *FaultyStorage) ConnectionInfo() storage.ConnectionInfo {
|
||||
return s.Base.ConnectionInfo()
|
||||
}
|
||||
|
||||
func (s *FaultyStorage) getNextFault(method string, args ...interface{}) error {
|
||||
s.mu.Lock()
|
||||
faults := s.Faults[method]
|
||||
if len(faults) == 0 {
|
||||
s.mu.Unlock()
|
||||
log.Debugf("no faults for %v %v", method, args)
|
||||
return nil
|
||||
}
|
||||
|
||||
f := faults[0]
|
||||
if f.Repeat > 0 {
|
||||
f.Repeat--
|
||||
log.Debugf("will repeat %v more times the fault for %v %v", f.Repeat, method, args)
|
||||
} else {
|
||||
s.Faults[method] = faults[1:]
|
||||
}
|
||||
s.mu.Unlock()
|
||||
if f.WaitFor != nil {
|
||||
log.Debugf("waiting for channel to be closed in %v %v", method, args)
|
||||
<-f.WaitFor
|
||||
}
|
||||
if f.Sleep > 0 {
|
||||
log.Debugf("sleeping for %v in %v %v", f.Sleep, method, args)
|
||||
}
|
||||
if f.ErrCallback != nil {
|
||||
err := f.ErrCallback()
|
||||
log.Debugf("returning %v for %v %v", err, method, args)
|
||||
return err
|
||||
}
|
||||
log.Debugf("returning %v for %v %v", f.Err, method, args)
|
||||
return f.Err
|
||||
}
|
||||
|
||||
var _ storage.Storage = (*FaultyStorage)(nil)
|
||||
133
internal/storagetesting/map.go
Normal file
133
internal/storagetesting/map.go
Normal file
@@ -0,0 +1,133 @@
|
||||
package storagetesting
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
type mapStorage struct {
|
||||
data map[string][]byte
|
||||
keyTime map[string]time.Time
|
||||
timeNow func() time.Time
|
||||
mutex sync.RWMutex
|
||||
}
|
||||
|
||||
func (s *mapStorage) GetBlock(ctx context.Context, id string, offset, length int64) ([]byte, error) {
|
||||
s.mutex.RLock()
|
||||
defer s.mutex.RUnlock()
|
||||
|
||||
data, ok := s.data[id]
|
||||
if ok {
|
||||
data = append([]byte(nil), data...)
|
||||
if length < 0 {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
if int(offset) > len(data) || offset < 0 {
|
||||
return nil, errors.New("invalid offset")
|
||||
}
|
||||
|
||||
data = data[offset:]
|
||||
if int(length) > len(data) {
|
||||
return nil, errors.New("invalid length")
|
||||
}
|
||||
return data[0:length], nil
|
||||
}
|
||||
|
||||
return nil, storage.ErrBlockNotFound
|
||||
}
|
||||
|
||||
func (s *mapStorage) PutBlock(ctx context.Context, id string, data []byte) error {
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
|
||||
if _, ok := s.data[id]; ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
s.keyTime[id] = s.timeNow()
|
||||
s.data[id] = append([]byte{}, data...)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *mapStorage) DeleteBlock(ctx context.Context, id string) error {
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
|
||||
delete(s.data, id)
|
||||
delete(s.keyTime, id)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *mapStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
|
||||
s.mutex.RLock()
|
||||
|
||||
keys := []string{}
|
||||
for k := range s.data {
|
||||
if strings.HasPrefix(k, prefix) {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
}
|
||||
s.mutex.RUnlock()
|
||||
|
||||
sort.Strings(keys)
|
||||
|
||||
for _, k := range keys {
|
||||
s.mutex.RLock()
|
||||
v, ok := s.data[k]
|
||||
ts := s.keyTime[k]
|
||||
s.mutex.RUnlock()
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if err := callback(storage.BlockMetadata{
|
||||
BlockID: k,
|
||||
Length: int64(len(v)),
|
||||
Timestamp: ts,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *mapStorage) Close(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *mapStorage) TouchBlock(ctx context.Context, blockID string, threshold time.Duration) error {
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
|
||||
if v, ok := s.keyTime[blockID]; ok {
|
||||
n := s.timeNow()
|
||||
if n.Sub(v) >= threshold {
|
||||
s.keyTime[blockID] = n
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *mapStorage) ConnectionInfo() storage.ConnectionInfo {
|
||||
// unsupported
|
||||
return storage.ConnectionInfo{}
|
||||
}
|
||||
|
||||
// NewMapStorage returns an implementation of Storage backed by the contents of given map.
|
||||
// Used primarily for testing.
|
||||
func NewMapStorage(data map[string][]byte, keyTime map[string]time.Time, timeNow func() time.Time) storage.Storage {
|
||||
if keyTime == nil {
|
||||
keyTime = make(map[string]time.Time)
|
||||
}
|
||||
if timeNow == nil {
|
||||
timeNow = time.Now
|
||||
}
|
||||
return &mapStorage{data: data, keyTime: keyTime, timeNow: timeNow}
|
||||
}
|
||||
15
internal/storagetesting/map_test.go
Normal file
15
internal/storagetesting/map_test.go
Normal file
@@ -0,0 +1,15 @@
|
||||
package storagetesting
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMapStorage(t *testing.T) {
|
||||
data := map[string][]byte{}
|
||||
r := NewMapStorage(data, nil, nil)
|
||||
if r == nil {
|
||||
t.Errorf("unexpected result: %v", r)
|
||||
}
|
||||
VerifyStorage(context.Background(), t, r)
|
||||
}
|
||||
84
internal/storagetesting/verify.go
Normal file
84
internal/storagetesting/verify.go
Normal file
@@ -0,0 +1,84 @@
|
||||
package storagetesting
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
// VerifyStorage verifies the behavior of the specified storage.
|
||||
func VerifyStorage(ctx context.Context, t *testing.T, r storage.Storage) {
|
||||
blocks := []struct {
|
||||
blk string
|
||||
contents []byte
|
||||
}{
|
||||
{blk: string("abcdbbf4f0507d054ed5a80a5b65086f602b"), contents: []byte{}},
|
||||
{blk: string("zxce0e35630770c54668a8cfb4e414c6bf8f"), contents: []byte{1}},
|
||||
{blk: string("abff4585856ebf0748fd989e1dd623a8963d"), contents: bytes.Repeat([]byte{1}, 1000)},
|
||||
{blk: string("abgc3dca496d510f492c858a2df1eb824e62"), contents: bytes.Repeat([]byte{1}, 10000)},
|
||||
{blk: string("kopia.repository"), contents: bytes.Repeat([]byte{2}, 100)},
|
||||
}
|
||||
|
||||
// First verify that blocks don't exist.
|
||||
for _, b := range blocks {
|
||||
AssertGetBlockNotFound(ctx, t, r, b.blk)
|
||||
}
|
||||
|
||||
ctx2 := storage.WithUploadProgressCallback(ctx, func(desc string, completed, total int64) {
|
||||
log.Infof("progress %v: %v/%v", desc, completed, total)
|
||||
})
|
||||
|
||||
// Now add blocks.
|
||||
for _, b := range blocks {
|
||||
if err := r.PutBlock(ctx2, b.blk, b.contents); err != nil {
|
||||
t.Errorf("can't put block: %v", err)
|
||||
}
|
||||
|
||||
AssertGetBlock(ctx, t, r, b.blk, b.contents)
|
||||
}
|
||||
|
||||
AssertListResults(ctx, t, r, "", blocks[0].blk, blocks[1].blk, blocks[2].blk, blocks[3].blk, blocks[4].blk)
|
||||
AssertListResults(ctx, t, r, "ab", blocks[0].blk, blocks[2].blk, blocks[3].blk)
|
||||
|
||||
// Overwrite blocks.
|
||||
for _, b := range blocks {
|
||||
if err := r.PutBlock(ctx, b.blk, b.contents); err != nil {
|
||||
t.Errorf("can't put block: %v", err)
|
||||
}
|
||||
|
||||
AssertGetBlock(ctx, t, r, b.blk, b.contents)
|
||||
}
|
||||
|
||||
if err := r.DeleteBlock(ctx, blocks[0].blk); err != nil {
|
||||
t.Errorf("unable to delete block: %v", err)
|
||||
}
|
||||
if err := r.DeleteBlock(ctx, blocks[0].blk); err != nil {
|
||||
t.Errorf("invalid error when deleting deleted block: %v", err)
|
||||
}
|
||||
AssertListResults(ctx, t, r, "ab", blocks[2].blk, blocks[3].blk)
|
||||
AssertListResults(ctx, t, r, "", blocks[1].blk, blocks[2].blk, blocks[3].blk, blocks[4].blk)
|
||||
}
|
||||
|
||||
// AssertConnectionInfoRoundTrips verifies that the ConnectionInfo returned by a given storage can be used to create
|
||||
// equivalent storage
|
||||
func AssertConnectionInfoRoundTrips(ctx context.Context, t *testing.T, s storage.Storage) {
|
||||
t.Helper()
|
||||
|
||||
ci := s.ConnectionInfo()
|
||||
s2, err := storage.NewStorage(ctx, ci)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
ci2 := s2.ConnectionInfo()
|
||||
if !reflect.DeepEqual(ci, ci2) {
|
||||
t.Errorf("connection info does not round-trip: %v vs %v", ci, ci2)
|
||||
}
|
||||
|
||||
if err := s2.Close(ctx); err != nil {
|
||||
t.Errorf("unable to close storage: %v", err)
|
||||
}
|
||||
}
|
||||
44
internal/throttle/round_tripper.go
Normal file
44
internal/throttle/round_tripper.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package throttle
|
||||
|
||||
import (
|
||||
"io"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type throttlerPool interface {
|
||||
AddReader(io.ReadCloser) (io.ReadCloser, error)
|
||||
}
|
||||
|
||||
type throttlingRoundTripper struct {
|
||||
base http.RoundTripper
|
||||
downloadPool throttlerPool
|
||||
uploadPool throttlerPool
|
||||
}
|
||||
|
||||
func (rt *throttlingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
if req.Body != nil && rt.uploadPool != nil {
|
||||
var err error
|
||||
req.Body, err = rt.uploadPool.AddReader(req.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
resp, err := rt.base.RoundTrip(req)
|
||||
if resp != nil && resp.Body != nil && rt.downloadPool != nil {
|
||||
resp.Body, err = rt.downloadPool.AddReader(resp.Body)
|
||||
}
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// NewRoundTripper returns http.RoundTripper that throttles upload and downloads.
|
||||
func NewRoundTripper(base http.RoundTripper, downloadPool throttlerPool, uploadPool throttlerPool) http.RoundTripper {
|
||||
if base == nil {
|
||||
base = http.DefaultTransport
|
||||
}
|
||||
|
||||
return &throttlingRoundTripper{
|
||||
base: base,
|
||||
downloadPool: downloadPool,
|
||||
uploadPool: uploadPool,
|
||||
}
|
||||
}
|
||||
103
internal/throttle/round_tripper_test.go
Normal file
103
internal/throttle/round_tripper_test.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package throttle
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type baseRoundTripper struct {
|
||||
responses map[*http.Request]*http.Response
|
||||
}
|
||||
|
||||
func (rt *baseRoundTripper) add(req *http.Request, resp *http.Response) (*http.Request, *http.Response) {
|
||||
rt.responses[req] = resp
|
||||
return req, resp
|
||||
}
|
||||
|
||||
func (rt *baseRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
resp := rt.responses[req]
|
||||
if resp != nil {
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("error occurred")
|
||||
}
|
||||
|
||||
type fakePool struct {
|
||||
readers []io.ReadCloser
|
||||
}
|
||||
|
||||
func (fp *fakePool) reset() {
|
||||
fp.readers = nil
|
||||
}
|
||||
|
||||
func (fp *fakePool) AddReader(r io.ReadCloser) (io.ReadCloser, error) {
|
||||
fp.readers = append(fp.readers, r)
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func TestRoundTripper(t *testing.T) {
|
||||
downloadBody := ioutil.NopCloser(bytes.NewReader([]byte("data1")))
|
||||
uploadBody := ioutil.NopCloser(bytes.NewReader([]byte("data1")))
|
||||
|
||||
base := &baseRoundTripper{
|
||||
responses: make(map[*http.Request]*http.Response),
|
||||
}
|
||||
downloadPool := &fakePool{}
|
||||
uploadPool := &fakePool{}
|
||||
rt := NewRoundTripper(base, downloadPool, uploadPool)
|
||||
|
||||
// Empty request (no request, no response)
|
||||
uploadPool.reset()
|
||||
downloadPool.reset()
|
||||
req1, resp1 := base.add(&http.Request{}, &http.Response{})
|
||||
resp, err := rt.RoundTrip(req1)
|
||||
if resp != resp1 || err != nil {
|
||||
t.Errorf("invalid response or error: %v", err)
|
||||
}
|
||||
if len(downloadPool.readers) != 0 || len(uploadPool.readers) != 0 {
|
||||
t.Errorf("invalid pool contents: %v %v", downloadPool.readers, uploadPool.readers)
|
||||
}
|
||||
|
||||
// Upload request
|
||||
uploadPool.reset()
|
||||
downloadPool.reset()
|
||||
req2, resp2 := base.add(&http.Request{
|
||||
Body: uploadBody,
|
||||
}, &http.Response{})
|
||||
resp, err = rt.RoundTrip(req2)
|
||||
if resp != resp2 || err != nil {
|
||||
t.Errorf("invalid response or error: %v", err)
|
||||
}
|
||||
if len(downloadPool.readers) != 0 || len(uploadPool.readers) != 1 {
|
||||
t.Errorf("invalid pool contents: %v %v", downloadPool.readers, uploadPool.readers)
|
||||
}
|
||||
|
||||
// Download request
|
||||
uploadPool.reset()
|
||||
downloadPool.reset()
|
||||
req3, resp3 := base.add(&http.Request{}, &http.Response{Body: downloadBody})
|
||||
resp, err = rt.RoundTrip(req3)
|
||||
if resp != resp3 || err != nil {
|
||||
t.Errorf("invalid response or error: %v", err)
|
||||
}
|
||||
if len(downloadPool.readers) != 1 || len(uploadPool.readers) != 0 {
|
||||
t.Errorf("invalid pool contents: %v %v", downloadPool.readers, uploadPool.readers)
|
||||
}
|
||||
|
||||
// Upload/Download request
|
||||
uploadPool.reset()
|
||||
downloadPool.reset()
|
||||
req4, resp4 := base.add(&http.Request{Body: uploadBody}, &http.Response{Body: downloadBody})
|
||||
resp, err = rt.RoundTrip(req4)
|
||||
if resp != resp4 || err != nil {
|
||||
t.Errorf("invalid response or error: %v", err)
|
||||
}
|
||||
if len(downloadPool.readers) != 1 || len(uploadPool.readers) != 1 {
|
||||
t.Errorf("invalid pool contents: %v %v", downloadPool.readers, uploadPool.readers)
|
||||
}
|
||||
}
|
||||
56
local_config.go
Normal file
56
local_config.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/kopia/repo/block"
|
||||
"github.com/kopia/repo/object"
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
// LocalConfig is a configuration of Kopia stored in a configuration file.
|
||||
type LocalConfig struct {
|
||||
Storage storage.ConnectionInfo `json:"storage"`
|
||||
Caching block.CachingOptions `json:"caching"`
|
||||
}
|
||||
|
||||
// repositoryObjectFormat describes the format of objects in a repository.
|
||||
type repositoryObjectFormat struct {
|
||||
block.FormattingOptions
|
||||
object.Format
|
||||
}
|
||||
|
||||
// Load reads local configuration from the specified reader.
|
||||
func (lc *LocalConfig) Load(r io.Reader) error {
|
||||
*lc = LocalConfig{}
|
||||
return json.NewDecoder(r).Decode(lc)
|
||||
}
|
||||
|
||||
// Save writes the configuration to the specified writer.
|
||||
func (lc *LocalConfig) Save(w io.Writer) error {
|
||||
b, err := json.MarshalIndent(lc, "", " ")
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
_, err = w.Write(b)
|
||||
return err
|
||||
}
|
||||
|
||||
// loadConfigFromFile reads the local configuration from the specified file.
|
||||
func loadConfigFromFile(fileName string) (*LocalConfig, error) {
|
||||
f, err := os.Open(fileName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close() //nolint:errcheck
|
||||
|
||||
var lc LocalConfig
|
||||
|
||||
if err := lc.Load(f); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &lc, nil
|
||||
}
|
||||
12
manifest/manifest_entry.go
Normal file
12
manifest/manifest_entry.go
Normal file
@@ -0,0 +1,12 @@
|
||||
package manifest
|
||||
|
||||
import "time"
|
||||
|
||||
// EntryMetadata contains metadata about manifest item. Each manifest item has one or more labels
|
||||
// Including required "type" label.
|
||||
type EntryMetadata struct {
|
||||
ID string
|
||||
Length int
|
||||
Labels map[string]string
|
||||
ModTime time.Time
|
||||
}
|
||||
516
manifest/manifest_manager.go
Normal file
516
manifest/manifest_manager.go
Normal file
@@ -0,0 +1,516 @@
|
||||
// Package manifest implements support for managing JSON-based manifests in repository.
|
||||
package manifest
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/internal/repologging"
|
||||
"github.com/kopia/repo/storage"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
var log = repologging.Logger("kopia/manifest")
|
||||
|
||||
// ErrNotFound is returned when the metadata item is not found.
|
||||
var ErrNotFound = errors.New("not found")
|
||||
|
||||
const manifestBlockPrefix = "m"
|
||||
const autoCompactionBlockCount = 16
|
||||
|
||||
type blockManager interface {
|
||||
GetBlock(ctx context.Context, blockID string) ([]byte, error)
|
||||
WriteBlock(ctx context.Context, data []byte, prefix string) (string, error)
|
||||
DeleteBlock(blockID string) error
|
||||
ListBlocks(prefix string) ([]string, error)
|
||||
DisableIndexFlush()
|
||||
EnableIndexFlush()
|
||||
Flush(ctx context.Context) error
|
||||
}
|
||||
|
||||
// Manager organizes JSON manifests of various kinds, including snapshot manifests
|
||||
type Manager struct {
|
||||
mu sync.Mutex
|
||||
b blockManager
|
||||
|
||||
initialized bool
|
||||
pendingEntries map[string]*manifestEntry
|
||||
|
||||
committedEntries map[string]*manifestEntry
|
||||
committedBlockIDs map[string]bool
|
||||
}
|
||||
|
||||
// Put serializes the provided payload to JSON and persists it. Returns unique handle that represents the object.
|
||||
func (m *Manager) Put(ctx context.Context, labels map[string]string, payload interface{}) (string, error) {
|
||||
if labels["type"] == "" {
|
||||
return "", fmt.Errorf("'type' label is required")
|
||||
}
|
||||
|
||||
if err := m.ensureInitialized(ctx); err != nil {
|
||||
return "", err
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
random := make([]byte, 16)
|
||||
if _, err := rand.Read(random); err != nil {
|
||||
return "", errors.Wrap(err, "can't initialize randomness")
|
||||
}
|
||||
|
||||
b, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return "", errors.Wrap(err, "marshal error")
|
||||
}
|
||||
|
||||
e := &manifestEntry{
|
||||
ID: hex.EncodeToString(random),
|
||||
ModTime: time.Now().UTC(),
|
||||
Labels: copyLabels(labels),
|
||||
Content: b,
|
||||
}
|
||||
|
||||
m.pendingEntries[e.ID] = e
|
||||
|
||||
return e.ID, nil
|
||||
}
|
||||
|
||||
// GetMetadata returns metadata about provided manifest item or ErrNotFound if the item can't be found.
|
||||
func (m *Manager) GetMetadata(ctx context.Context, id string) (*EntryMetadata, error) {
|
||||
if err := m.ensureInitialized(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
e := m.pendingEntries[id]
|
||||
if e == nil {
|
||||
e = m.committedEntries[id]
|
||||
}
|
||||
|
||||
if e == nil || e.Deleted {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
|
||||
return &EntryMetadata{
|
||||
ID: id,
|
||||
ModTime: e.ModTime,
|
||||
Length: len(e.Content),
|
||||
Labels: copyLabels(e.Labels),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Get retrieves the contents of the provided manifest item by deserializing it as JSON to provided object.
|
||||
// If the manifest is not found, returns ErrNotFound.
|
||||
func (m *Manager) Get(ctx context.Context, id string, data interface{}) error {
|
||||
if err := m.ensureInitialized(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
b, err := m.GetRaw(ctx, id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(b, data); err != nil {
|
||||
return fmt.Errorf("unable to unmashal %q: %v", id, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetRaw returns raw contents of the provided manifest (JSON bytes) or ErrNotFound if not found.
|
||||
func (m *Manager) GetRaw(ctx context.Context, id string) ([]byte, error) {
|
||||
if err := m.ensureInitialized(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
e := m.pendingEntries[id]
|
||||
if e == nil {
|
||||
e = m.committedEntries[id]
|
||||
}
|
||||
if e == nil || e.Deleted {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
|
||||
return e.Content, nil
|
||||
}
|
||||
|
||||
// Find returns the list of EntryMetadata for manifest entries matching all provided labels.
|
||||
func (m *Manager) Find(ctx context.Context, labels map[string]string) ([]*EntryMetadata, error) {
|
||||
if err := m.ensureInitialized(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
var matches []*EntryMetadata
|
||||
for _, e := range m.pendingEntries {
|
||||
if matchesLabels(e.Labels, labels) {
|
||||
matches = append(matches, cloneEntryMetadata(e))
|
||||
}
|
||||
}
|
||||
for _, e := range m.committedEntries {
|
||||
if m.pendingEntries[e.ID] != nil {
|
||||
// ignore committed that are also in pending
|
||||
continue
|
||||
}
|
||||
|
||||
if matchesLabels(e.Labels, labels) {
|
||||
matches = append(matches, cloneEntryMetadata(e))
|
||||
}
|
||||
}
|
||||
|
||||
sort.Slice(matches, func(i, j int) bool {
|
||||
return matches[i].ModTime.Before(matches[j].ModTime)
|
||||
})
|
||||
return matches, nil
|
||||
}
|
||||
|
||||
func cloneEntryMetadata(e *manifestEntry) *EntryMetadata {
|
||||
return &EntryMetadata{
|
||||
ID: e.ID,
|
||||
Labels: copyLabels(e.Labels),
|
||||
Length: len(e.Content),
|
||||
ModTime: e.ModTime,
|
||||
}
|
||||
}
|
||||
|
||||
// matchesLabels returns true when all entries in 'b' are found in the 'a'.
|
||||
func matchesLabels(a, b map[string]string) bool {
|
||||
for k, v := range b {
|
||||
if a[k] != v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Flush persists changes to manifest manager.
|
||||
func (m *Manager) Flush(ctx context.Context) error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
_, err := m.flushPendingEntriesLocked(ctx)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *Manager) flushPendingEntriesLocked(ctx context.Context) (string, error) {
|
||||
if len(m.pendingEntries) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
man := manifest{}
|
||||
|
||||
for _, e := range m.pendingEntries {
|
||||
man.Entries = append(man.Entries, e)
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
gz := gzip.NewWriter(&buf)
|
||||
mustSucceed(json.NewEncoder(gz).Encode(man))
|
||||
mustSucceed(gz.Flush())
|
||||
mustSucceed(gz.Close())
|
||||
|
||||
blockID, err := m.b.WriteBlock(ctx, buf.Bytes(), manifestBlockPrefix)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
for _, e := range m.pendingEntries {
|
||||
m.committedEntries[e.ID] = e
|
||||
delete(m.pendingEntries, e.ID)
|
||||
}
|
||||
|
||||
m.committedBlockIDs[blockID] = true
|
||||
|
||||
return blockID, nil
|
||||
}
|
||||
|
||||
func mustSucceed(e error) {
|
||||
if e != nil {
|
||||
panic("unexpected failure: " + e.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// Delete marks the specified manifest ID for deletion.
|
||||
func (m *Manager) Delete(ctx context.Context, id string) error {
|
||||
if err := m.ensureInitialized(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if m.pendingEntries[id] == nil && m.committedEntries[id] == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
m.pendingEntries[id] = &manifestEntry{
|
||||
ID: id,
|
||||
ModTime: time.Now().UTC(),
|
||||
Deleted: true,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Refresh updates the committed blocks from the underlying storage.
|
||||
func (m *Manager) Refresh(ctx context.Context) error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
return m.loadCommittedBlocksLocked(ctx)
|
||||
}
|
||||
|
||||
func (m *Manager) loadCommittedBlocksLocked(ctx context.Context) error {
|
||||
log.Debugf("listing manifest blocks")
|
||||
for {
|
||||
blocks, err := m.b.ListBlocks(manifestBlockPrefix)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "unable to list manifest blocks")
|
||||
}
|
||||
|
||||
m.committedEntries = map[string]*manifestEntry{}
|
||||
m.committedBlockIDs = map[string]bool{}
|
||||
|
||||
log.Debugf("found %v manifest blocks", len(blocks))
|
||||
err = m.loadManifestBlocks(ctx, blocks)
|
||||
if err == nil {
|
||||
// success
|
||||
break
|
||||
}
|
||||
if err == storage.ErrBlockNotFound {
|
||||
// try again, lost a race with another manifest manager which just did compaction
|
||||
continue
|
||||
}
|
||||
return errors.Wrap(err, "unable to load manifest blocks")
|
||||
}
|
||||
|
||||
if err := m.maybeCompactLocked(ctx); err != nil {
|
||||
return fmt.Errorf("error auto-compacting blocks")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) loadManifestBlocks(ctx context.Context, blockIDs []string) error {
|
||||
t0 := time.Now()
|
||||
|
||||
for _, b := range blockIDs {
|
||||
m.committedBlockIDs[b] = true
|
||||
}
|
||||
|
||||
manifests, err := m.loadBlocksInParallel(ctx, blockIDs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, man := range manifests {
|
||||
for _, e := range man.Entries {
|
||||
m.mergeEntry(e)
|
||||
}
|
||||
}
|
||||
|
||||
// after merging, remove blocks marked as deleted.
|
||||
for k, e := range m.committedEntries {
|
||||
if e.Deleted {
|
||||
delete(m.committedEntries, k)
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("finished loading manifest blocks in %v.", time.Since(t0))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) loadBlocksInParallel(ctx context.Context, blockIDs []string) ([]manifest, error) {
|
||||
errors := make(chan error, len(blockIDs))
|
||||
manifests := make(chan manifest, len(blockIDs))
|
||||
ch := make(chan string, len(blockIDs))
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for i := 0; i < 8; i++ {
|
||||
wg.Add(1)
|
||||
go func(workerID int) {
|
||||
defer wg.Done()
|
||||
|
||||
for blk := range ch {
|
||||
t1 := time.Now()
|
||||
man, err := m.loadManifestBlock(ctx, blk)
|
||||
|
||||
if err != nil {
|
||||
errors <- err
|
||||
log.Debugf("block %v failed to be loaded by worker %v in %v: %v.", blk, workerID, time.Since(t1), err)
|
||||
} else {
|
||||
log.Debugf("block %v loaded by worker %v in %v.", blk, workerID, time.Since(t1))
|
||||
manifests <- man
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
// feed block IDs for goroutines
|
||||
for _, b := range blockIDs {
|
||||
ch <- b
|
||||
}
|
||||
close(ch)
|
||||
|
||||
// wait for workers to complete
|
||||
wg.Wait()
|
||||
close(errors)
|
||||
close(manifests)
|
||||
|
||||
// if there was any error, forward it
|
||||
if err := <-errors; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var man []manifest
|
||||
for m := range manifests {
|
||||
man = append(man, m)
|
||||
}
|
||||
|
||||
return man, nil
|
||||
}
|
||||
|
||||
func (m *Manager) loadManifestBlock(ctx context.Context, blockID string) (manifest, error) {
|
||||
man := manifest{}
|
||||
blk, err := m.b.GetBlock(ctx, blockID)
|
||||
if err != nil {
|
||||
// do not wrap the error here, we want to propagate original ErrBlockNotFound
|
||||
// which causes a retry if we lose list/delete race.
|
||||
return man, err
|
||||
}
|
||||
|
||||
gz, err := gzip.NewReader(bytes.NewReader(blk))
|
||||
if err != nil {
|
||||
return man, fmt.Errorf("unable to unpack block %q: %v", blockID, err)
|
||||
}
|
||||
|
||||
if err := json.NewDecoder(gz).Decode(&man); err != nil {
|
||||
return man, fmt.Errorf("unable to parse block %q: %v", blockID, err)
|
||||
}
|
||||
|
||||
return man, nil
|
||||
}
|
||||
|
||||
// Compact performs compaction of manifest blocks.
|
||||
func (m *Manager) Compact(ctx context.Context) error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
return m.compactLocked(ctx)
|
||||
}
|
||||
|
||||
func (m *Manager) maybeCompactLocked(ctx context.Context) error {
|
||||
if len(m.committedBlockIDs) < autoCompactionBlockCount {
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Debugf("performing automatic compaction of %v blocks", len(m.committedBlockIDs))
|
||||
if err := m.compactLocked(ctx); err != nil {
|
||||
return errors.Wrap(err, "unable to compact manifest blocks")
|
||||
}
|
||||
|
||||
if err := m.b.Flush(ctx); err != nil {
|
||||
return errors.Wrap(err, "unable to flush blocks after auto-compaction")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) compactLocked(ctx context.Context) error {
|
||||
log.Debugf("compactLocked: pendingEntries=%v blockIDs=%v", len(m.pendingEntries), len(m.committedBlockIDs))
|
||||
|
||||
if len(m.committedBlockIDs) == 1 && len(m.pendingEntries) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// compaction needs to be atomic (deletes and rewrite should show up in one index block or not show up at all)
|
||||
// that's why we want to prevent index flushes while we're d.
|
||||
m.b.DisableIndexFlush()
|
||||
defer m.b.EnableIndexFlush()
|
||||
|
||||
for _, e := range m.committedEntries {
|
||||
m.pendingEntries[e.ID] = e
|
||||
}
|
||||
|
||||
blockID, err := m.flushPendingEntriesLocked(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// add the newly-created block to the list, could be duplicate
|
||||
for b := range m.committedBlockIDs {
|
||||
if b == blockID {
|
||||
// do not delete block that was just written.
|
||||
continue
|
||||
}
|
||||
|
||||
if err := m.b.DeleteBlock(b); err != nil {
|
||||
return fmt.Errorf("unable to delete block %q: %v", b, err)
|
||||
}
|
||||
|
||||
delete(m.committedBlockIDs, b)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) mergeEntry(e *manifestEntry) {
|
||||
prev := m.committedEntries[e.ID]
|
||||
if prev == nil {
|
||||
m.committedEntries[e.ID] = e
|
||||
return
|
||||
}
|
||||
|
||||
if e.ModTime.After(prev.ModTime) {
|
||||
m.committedEntries[e.ID] = e
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Manager) ensureInitialized(ctx context.Context) error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
if m.initialized {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := m.loadCommittedBlocksLocked(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.initialized = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func copyLabels(m map[string]string) map[string]string {
|
||||
r := map[string]string{}
|
||||
for k, v := range m {
|
||||
r[k] = v
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// NewManager returns new manifest manager for the provided block manager.
|
||||
func NewManager(ctx context.Context, b blockManager) (*Manager, error) {
|
||||
m := &Manager{
|
||||
b: b,
|
||||
pendingEntries: map[string]*manifestEntry{},
|
||||
committedEntries: map[string]*manifestEntry{},
|
||||
committedBlockIDs: map[string]bool{},
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
321
manifest/manifest_manager_test.go
Normal file
321
manifest/manifest_manager_test.go
Normal file
@@ -0,0 +1,321 @@
|
||||
package manifest
|
||||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/block"
|
||||
"github.com/kopia/repo/internal/storagetesting"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func TestManifest(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
mgr, setupErr := newManagerForTesting(ctx, t, data)
|
||||
if setupErr != nil {
|
||||
t.Fatalf("unable to open block manager: %v", setupErr)
|
||||
}
|
||||
|
||||
item1 := map[string]int{"foo": 1, "bar": 2}
|
||||
item2 := map[string]int{"foo": 2, "bar": 3}
|
||||
item3 := map[string]int{"foo": 3, "bar": 4}
|
||||
|
||||
labels1 := map[string]string{"type": "item", "color": "red"}
|
||||
labels2 := map[string]string{"type": "item", "color": "blue", "shape": "square"}
|
||||
labels3 := map[string]string{"type": "item", "shape": "square", "color": "red"}
|
||||
|
||||
id1 := addAndVerify(ctx, t, mgr, labels1, item1)
|
||||
id2 := addAndVerify(ctx, t, mgr, labels2, item2)
|
||||
id3 := addAndVerify(ctx, t, mgr, labels3, item3)
|
||||
|
||||
cases := []struct {
|
||||
criteria map[string]string
|
||||
expected []string
|
||||
}{
|
||||
{map[string]string{"color": "red"}, []string{id1, id3}},
|
||||
{map[string]string{"color": "blue"}, []string{id2}},
|
||||
{map[string]string{"color": "green"}, nil},
|
||||
{map[string]string{"color": "red", "shape": "square"}, []string{id3}},
|
||||
{map[string]string{"color": "blue", "shape": "square"}, []string{id2}},
|
||||
{map[string]string{"color": "red", "shape": "circle"}, nil},
|
||||
}
|
||||
|
||||
// verify before flush
|
||||
for _, tc := range cases {
|
||||
verifyMatches(ctx, t, mgr, tc.criteria, tc.expected)
|
||||
}
|
||||
verifyItem(ctx, t, mgr, id1, labels1, item1)
|
||||
verifyItem(ctx, t, mgr, id2, labels2, item2)
|
||||
verifyItem(ctx, t, mgr, id3, labels3, item3)
|
||||
|
||||
if err := mgr.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
}
|
||||
if err := mgr.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
}
|
||||
|
||||
// verify after flush
|
||||
for _, tc := range cases {
|
||||
verifyMatches(ctx, t, mgr, tc.criteria, tc.expected)
|
||||
}
|
||||
verifyItem(ctx, t, mgr, id1, labels1, item1)
|
||||
verifyItem(ctx, t, mgr, id2, labels2, item2)
|
||||
verifyItem(ctx, t, mgr, id3, labels3, item3)
|
||||
|
||||
// flush underlying block manager and verify in new manifest manager.
|
||||
mgr.b.Flush(ctx)
|
||||
mgr2, setupErr := newManagerForTesting(ctx, t, data)
|
||||
if setupErr != nil {
|
||||
t.Fatalf("can't open block manager: %v", setupErr)
|
||||
}
|
||||
for _, tc := range cases {
|
||||
verifyMatches(ctx, t, mgr2, tc.criteria, tc.expected)
|
||||
}
|
||||
verifyItem(ctx, t, mgr2, id1, labels1, item1)
|
||||
verifyItem(ctx, t, mgr2, id2, labels2, item2)
|
||||
verifyItem(ctx, t, mgr2, id3, labels3, item3)
|
||||
if err := mgr2.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
}
|
||||
|
||||
// delete from one
|
||||
time.Sleep(1 * time.Second)
|
||||
if err := mgr.Delete(ctx, id3); err != nil {
|
||||
t.Errorf("delete error: %v", err)
|
||||
}
|
||||
verifyItemNotFound(ctx, t, mgr, id3)
|
||||
mgr.Flush(ctx)
|
||||
verifyItemNotFound(ctx, t, mgr, id3)
|
||||
|
||||
// still found in another
|
||||
verifyItem(ctx, t, mgr2, id3, labels3, item3)
|
||||
if err := mgr2.loadCommittedBlocksLocked(ctx); err != nil {
|
||||
t.Errorf("unable to load: %v", err)
|
||||
}
|
||||
|
||||
if err := mgr.Compact(ctx); err != nil {
|
||||
t.Errorf("can't compact: %v", err)
|
||||
}
|
||||
|
||||
blks, err := mgr.b.ListBlocks(manifestBlockPrefix)
|
||||
if err != nil {
|
||||
t.Errorf("unable to list manifest blocks: %v", err)
|
||||
}
|
||||
if got, want := len(blks), 1; got != want {
|
||||
t.Errorf("unexpected number of blocks: %v, want %v", got, want)
|
||||
}
|
||||
|
||||
mgr.b.Flush(ctx)
|
||||
|
||||
mgr3, err := newManagerForTesting(ctx, t, data)
|
||||
if err != nil {
|
||||
t.Fatalf("can't open manager: %v", err)
|
||||
}
|
||||
|
||||
verifyItem(ctx, t, mgr3, id1, labels1, item1)
|
||||
verifyItem(ctx, t, mgr3, id2, labels2, item2)
|
||||
verifyItemNotFound(ctx, t, mgr3, id3)
|
||||
}
|
||||
|
||||
func TestManifestInitCorruptedBlock(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
st := storagetesting.NewMapStorage(data, nil, nil)
|
||||
|
||||
f := block.FormattingOptions{
|
||||
Hash: "HMAC-SHA256-128",
|
||||
Encryption: "NONE",
|
||||
MaxPackSize: 100000,
|
||||
}
|
||||
|
||||
// write some data to storage
|
||||
bm, err := block.NewManager(ctx, st, f, block.CachingOptions{}, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
mgr, err := NewManager(ctx, bm)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
mgr.Put(ctx, map[string]string{"type": "foo"}, map[string]string{"some": "value"}) //nolint:errcheck
|
||||
mgr.Flush(ctx)
|
||||
bm.Flush(ctx)
|
||||
|
||||
// corrupt data at the storage level.
|
||||
for k, v := range data {
|
||||
if strings.HasPrefix(k, "p") {
|
||||
for i := 0; i < len(v); i++ {
|
||||
v[i] ^= 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// make a new block manager based on corrupted data.
|
||||
bm, err = block.NewManager(ctx, st, f, block.CachingOptions{}, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
mgr, err = NewManager(ctx, bm)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
f func() error
|
||||
}{
|
||||
{"GetRaw", func() error { _, err := mgr.GetRaw(ctx, "anything"); return err }},
|
||||
{"GetMetadata", func() error { _, err := mgr.GetMetadata(ctx, "anything"); return err }},
|
||||
{"Get", func() error { return mgr.Get(ctx, "anything", nil) }},
|
||||
{"Delete", func() error { return mgr.Delete(ctx, "anything") }},
|
||||
{"Find", func() error { _, err := mgr.Find(ctx, nil); return err }},
|
||||
{"Put", func() error {
|
||||
_, err := mgr.Put(ctx, map[string]string{
|
||||
"type": "foo",
|
||||
}, map[string]string{
|
||||
"some": "value",
|
||||
})
|
||||
return err
|
||||
}},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
err := tc.f()
|
||||
if err == nil || !strings.Contains(err.Error(), "invalid checksum") {
|
||||
t.Errorf("invalid error when initializing malformed manifest manager: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func addAndVerify(ctx context.Context, t *testing.T, mgr *Manager, labels map[string]string, data map[string]int) string {
|
||||
t.Helper()
|
||||
id, err := mgr.Put(ctx, labels, data)
|
||||
if err != nil {
|
||||
t.Errorf("unable to add %v (%v): %v", labels, data, err)
|
||||
return ""
|
||||
}
|
||||
|
||||
verifyItem(ctx, t, mgr, id, labels, data)
|
||||
return id
|
||||
}
|
||||
|
||||
func verifyItem(ctx context.Context, t *testing.T, mgr *Manager, id string, labels map[string]string, data map[string]int) {
|
||||
t.Helper()
|
||||
|
||||
l, err := mgr.GetMetadata(ctx, id)
|
||||
if err != nil {
|
||||
t.Errorf("unable to retrieve %q: %v", id, err)
|
||||
return
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(l.Labels, labels) {
|
||||
t.Errorf("invalid labels retrieved %v, wanted %v", l.Labels, labels)
|
||||
}
|
||||
|
||||
var d2 map[string]int
|
||||
if err := mgr.Get(ctx, id, &d2); err != nil {
|
||||
t.Errorf("Get failed: %v", err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(d2, data) {
|
||||
t.Errorf("invalid data retrieved %v, wanted %v", d2, data)
|
||||
}
|
||||
}
|
||||
|
||||
func verifyItemNotFound(ctx context.Context, t *testing.T, mgr *Manager, id string) {
|
||||
t.Helper()
|
||||
|
||||
_, err := mgr.GetMetadata(ctx, id)
|
||||
if got, want := err, ErrNotFound; got != want {
|
||||
t.Errorf("invalid error when getting %q %v, expected %v", id, err, ErrNotFound)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func verifyMatches(ctx context.Context, t *testing.T, mgr *Manager, labels map[string]string, expected []string) {
|
||||
t.Helper()
|
||||
|
||||
var matches []string
|
||||
items, err := mgr.Find(ctx, labels)
|
||||
if err != nil {
|
||||
t.Errorf("error in Find(): %v", err)
|
||||
return
|
||||
}
|
||||
for _, m := range items {
|
||||
matches = append(matches, m.ID)
|
||||
}
|
||||
sort.Strings(matches)
|
||||
sort.Strings(expected)
|
||||
|
||||
if !reflect.DeepEqual(matches, expected) {
|
||||
t.Errorf("invalid matches for %v: %v, expected %v", labels, matches, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func newManagerForTesting(ctx context.Context, t *testing.T, data map[string][]byte) (*Manager, error) {
|
||||
st := storagetesting.NewMapStorage(data, nil, nil)
|
||||
|
||||
bm, err := block.NewManager(ctx, st, block.FormattingOptions{
|
||||
Hash: "HMAC-SHA256-128",
|
||||
Encryption: "NONE",
|
||||
MaxPackSize: 100000,
|
||||
}, block.CachingOptions{}, nil)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "can't create block manager")
|
||||
}
|
||||
|
||||
return NewManager(ctx, bm)
|
||||
}
|
||||
|
||||
func TestManifestInvalidPut(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
mgr, setupErr := newManagerForTesting(ctx, t, data)
|
||||
if setupErr != nil {
|
||||
t.Fatalf("unable to open block manager: %v", setupErr)
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
labels map[string]string
|
||||
payload interface{}
|
||||
expectedError string
|
||||
}{
|
||||
{map[string]string{"": ""}, "xxx", "'type' label is required"},
|
||||
{map[string]string{"type": "blah"}, complex128(1), "marshal error"},
|
||||
}
|
||||
|
||||
for i, tc := range cases {
|
||||
_, err := mgr.Put(ctx, tc.labels, tc.payload)
|
||||
if err == nil || !strings.Contains(err.Error(), tc.expectedError) {
|
||||
t.Errorf("invalid error when putting case %v: %v, expected %v", i, err, tc.expectedError)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestManifestAutoCompaction(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
|
||||
for i := 0; i < 100; i++ {
|
||||
mgr, setupErr := newManagerForTesting(ctx, t, data)
|
||||
if setupErr != nil {
|
||||
t.Fatalf("unable to open block manager: %v", setupErr)
|
||||
}
|
||||
|
||||
item1 := map[string]int{"foo": 1, "bar": 2}
|
||||
labels1 := map[string]string{"type": "item", "color": "red"}
|
||||
addAndVerify(ctx, t, mgr, labels1, item1)
|
||||
mgr.Flush(ctx)
|
||||
}
|
||||
}
|
||||
18
manifest/serialized.go
Normal file
18
manifest/serialized.go
Normal file
@@ -0,0 +1,18 @@
|
||||
package manifest
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
)
|
||||
|
||||
type manifest struct {
|
||||
Entries []*manifestEntry `json:"entries"`
|
||||
}
|
||||
|
||||
type manifestEntry struct {
|
||||
ID string `json:"id"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
ModTime time.Time `json:"modified"`
|
||||
Deleted bool `json:"deleted,omitempty"`
|
||||
Content json.RawMessage `json:"data"`
|
||||
}
|
||||
8
object/indirect.go
Normal file
8
object/indirect.go
Normal file
@@ -0,0 +1,8 @@
|
||||
package object
|
||||
|
||||
// indirectObjectEntry represents an entry in indirect object stream.
|
||||
type indirectObjectEntry struct {
|
||||
Start int64 `json:"s,omitempty"`
|
||||
Length int64 `json:"l,omitempty"`
|
||||
Object ID `json:"o,omitempty"`
|
||||
}
|
||||
245
object/object_manager.go
Normal file
245
object/object_manager.go
Normal file
@@ -0,0 +1,245 @@
|
||||
// Package object implements repository support for content-addressable objects of arbitrary size.
|
||||
package object
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/kopia/repo/block"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// Reader allows reading, seeking, getting the length of and closing of a repository object.
|
||||
type Reader interface {
|
||||
io.Reader
|
||||
io.Seeker
|
||||
io.Closer
|
||||
Length() int64
|
||||
}
|
||||
|
||||
type blockManager interface {
|
||||
BlockInfo(ctx context.Context, blockID string) (block.Info, error)
|
||||
GetBlock(ctx context.Context, blockID string) ([]byte, error)
|
||||
WriteBlock(ctx context.Context, data []byte, prefix string) (string, error)
|
||||
}
|
||||
|
||||
// Format describes the format of objects in a repository.
|
||||
type Format struct {
|
||||
Splitter string `json:"splitter,omitempty"` // splitter used to break objects into storage blocks
|
||||
MinBlockSize int `json:"minBlockSize,omitempty"` // minimum block size used with dynamic splitter
|
||||
AvgBlockSize int `json:"avgBlockSize,omitempty"` // approximate size of storage block (used with dynamic splitter)
|
||||
MaxBlockSize int `json:"maxBlockSize,omitempty"` // maximum size of storage block
|
||||
}
|
||||
|
||||
// Manager implements a content-addressable storage on top of blob storage.
|
||||
type Manager struct {
|
||||
Format Format
|
||||
|
||||
blockMgr blockManager
|
||||
trace func(message string, args ...interface{})
|
||||
|
||||
newSplitter func() objectSplitter
|
||||
}
|
||||
|
||||
// NewWriter creates an ObjectWriter for writing to the repository.
|
||||
func (om *Manager) NewWriter(ctx context.Context, opt WriterOptions) Writer {
|
||||
return &objectWriter{
|
||||
ctx: ctx,
|
||||
repo: om,
|
||||
splitter: om.newSplitter(),
|
||||
description: opt.Description,
|
||||
prefix: opt.Prefix,
|
||||
}
|
||||
}
|
||||
|
||||
// Open creates new ObjectReader for reading given object from a repository.
|
||||
func (om *Manager) Open(ctx context.Context, objectID ID) (Reader, error) {
|
||||
// log.Printf("Repository::Open %v", objectID.String())
|
||||
// defer log.Printf("finished Repository::Open() %v", objectID.String())
|
||||
|
||||
if indexObjectID, ok := objectID.IndexObjectID(); ok {
|
||||
rd, err := om.Open(ctx, indexObjectID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rd.Close() //nolint:errcheck
|
||||
|
||||
seekTable, err := om.flattenListChunk(rd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
totalLength := seekTable[len(seekTable)-1].endOffset()
|
||||
|
||||
return &objectReader{
|
||||
ctx: ctx,
|
||||
repo: om,
|
||||
seekTable: seekTable,
|
||||
totalLength: totalLength,
|
||||
}, nil
|
||||
}
|
||||
|
||||
return om.newRawReader(ctx, objectID)
|
||||
}
|
||||
|
||||
// VerifyObject ensures that all objects backing ObjectID are present in the repository
|
||||
// and returns the total length of the object and storage blocks of which it is composed.
|
||||
func (om *Manager) VerifyObject(ctx context.Context, oid ID) (int64, []string, error) {
|
||||
blocks := &blockTracker{}
|
||||
l, err := om.verifyObjectInternal(ctx, oid, blocks)
|
||||
if err != nil {
|
||||
return 0, nil, err
|
||||
}
|
||||
|
||||
return l, blocks.blockIDs(), nil
|
||||
}
|
||||
|
||||
func (om *Manager) verifyIndirectObjectInternal(ctx context.Context, indexObjectID ID, blocks *blockTracker) (int64, error) {
|
||||
if _, err := om.verifyObjectInternal(ctx, indexObjectID, blocks); err != nil {
|
||||
return 0, errors.Wrap(err, "unable to read index")
|
||||
}
|
||||
rd, err := om.Open(ctx, indexObjectID)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer rd.Close() //nolint:errcheck
|
||||
|
||||
seekTable, err := om.flattenListChunk(rd)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
for i, m := range seekTable {
|
||||
l, err := om.verifyObjectInternal(ctx, m.Object, blocks)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if l != m.Length {
|
||||
return 0, fmt.Errorf("unexpected length of part %#v of indirect object %q: %v %v, expected %v", i, indexObjectID, m.Object, l, m.Length)
|
||||
}
|
||||
}
|
||||
|
||||
totalLength := seekTable[len(seekTable)-1].endOffset()
|
||||
return totalLength, nil
|
||||
}
|
||||
|
||||
func (om *Manager) verifyObjectInternal(ctx context.Context, oid ID, blocks *blockTracker) (int64, error) {
|
||||
if indexObjectID, ok := oid.IndexObjectID(); ok {
|
||||
return om.verifyIndirectObjectInternal(ctx, indexObjectID, blocks)
|
||||
}
|
||||
|
||||
if blockID, ok := oid.BlockID(); ok {
|
||||
p, err := om.blockMgr.BlockInfo(ctx, blockID)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
blocks.addBlock(blockID)
|
||||
return int64(p.Length), nil
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("unrecognized object type: %v", oid)
|
||||
|
||||
}
|
||||
|
||||
func nullTrace(message string, args ...interface{}) {
|
||||
}
|
||||
|
||||
// ManagerOptions specifies object manager options.
|
||||
type ManagerOptions struct {
|
||||
Trace func(message string, args ...interface{})
|
||||
}
|
||||
|
||||
// NewObjectManager creates an ObjectManager with the specified block manager and format.
|
||||
func NewObjectManager(ctx context.Context, bm blockManager, f Format, opts ManagerOptions) (*Manager, error) {
|
||||
om := &Manager{
|
||||
blockMgr: bm,
|
||||
Format: f,
|
||||
trace: nullTrace,
|
||||
}
|
||||
|
||||
splitterID := f.Splitter
|
||||
if splitterID == "" {
|
||||
splitterID = "FIXED"
|
||||
}
|
||||
|
||||
os := splitterFactories[splitterID]
|
||||
if os == nil {
|
||||
return nil, fmt.Errorf("unsupported splitter %q", f.Splitter)
|
||||
}
|
||||
|
||||
om.newSplitter = func() objectSplitter {
|
||||
return os(&f)
|
||||
}
|
||||
|
||||
if opts.Trace != nil {
|
||||
om.trace = opts.Trace
|
||||
} else {
|
||||
om.trace = nullTrace
|
||||
}
|
||||
|
||||
return om, nil
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
{"stream":"kopia:indirect","entries":[
|
||||
{"l":1698099,"o":"D13ea27f9ad891ad4a2edfa983906863d"},
|
||||
{"s":1698099,"l":1302081,"o":"De8ca8327cd3af5f4edbd5ed1009c525e"},
|
||||
{"s":3000180,"l":4352499,"o":"D6b6eb48ca5361d06d72fe193813e42e1"},
|
||||
{"s":7352679,"l":1170821,"o":"Dd14653f76b63802ed48be64a0e67fea9"},
|
||||
|
||||
{"s":91094118,"l":1645153,"o":"Daa55df764d881a1daadb5ea9de17abbb"}
|
||||
]}
|
||||
*/
|
||||
|
||||
type indirectObject struct {
|
||||
StreamID string `json:"stream"`
|
||||
Entries []indirectObjectEntry `json:"entries"`
|
||||
}
|
||||
|
||||
func (om *Manager) flattenListChunk(rawReader io.Reader) ([]indirectObjectEntry, error) {
|
||||
var ind indirectObject
|
||||
|
||||
if err := json.NewDecoder(rawReader).Decode(&ind); err != nil {
|
||||
return nil, errors.Wrap(err, "invalid indirect object")
|
||||
}
|
||||
|
||||
return ind.Entries, nil
|
||||
}
|
||||
|
||||
func (om *Manager) newRawReader(ctx context.Context, objectID ID) (Reader, error) {
|
||||
if blockID, ok := objectID.BlockID(); ok {
|
||||
payload, err := om.blockMgr.GetBlock(ctx, blockID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newObjectReaderWithData(payload), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unsupported object ID: %v", objectID)
|
||||
}
|
||||
|
||||
type readerWithData struct {
|
||||
io.ReadSeeker
|
||||
length int64
|
||||
}
|
||||
|
||||
func (rwd *readerWithData) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rwd *readerWithData) Length() int64 {
|
||||
return rwd.length
|
||||
}
|
||||
|
||||
func newObjectReaderWithData(data []byte) Reader {
|
||||
return &readerWithData{
|
||||
ReadSeeker: bytes.NewReader(data),
|
||||
length: int64(len(data)),
|
||||
}
|
||||
}
|
||||
344
object/object_manager_test.go
Normal file
344
object/object_manager_test.go
Normal file
@@ -0,0 +1,344 @@
|
||||
package object
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
cryptorand "crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"runtime/debug"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/kopia/repo/block"
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
type fakeBlockManager struct {
|
||||
mu sync.Mutex
|
||||
data map[string][]byte
|
||||
}
|
||||
|
||||
func (f *fakeBlockManager) GetBlock(ctx context.Context, blockID string) ([]byte, error) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
|
||||
if d, ok := f.data[blockID]; ok {
|
||||
return append([]byte(nil), d...), nil
|
||||
}
|
||||
|
||||
return nil, storage.ErrBlockNotFound
|
||||
}
|
||||
|
||||
func (f *fakeBlockManager) WriteBlock(ctx context.Context, data []byte, prefix string) (string, error) {
|
||||
h := sha256.New()
|
||||
h.Write(data) //nolint:errcheck
|
||||
blockID := prefix + string(hex.EncodeToString(h.Sum(nil)))
|
||||
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
|
||||
f.data[blockID] = append([]byte(nil), data...)
|
||||
return blockID, nil
|
||||
}
|
||||
|
||||
func (f *fakeBlockManager) BlockInfo(ctx context.Context, blockID string) (block.Info, error) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
|
||||
if d, ok := f.data[blockID]; ok {
|
||||
return block.Info{BlockID: blockID, Length: uint32(len(d))}, nil
|
||||
}
|
||||
|
||||
return block.Info{}, storage.ErrBlockNotFound
|
||||
}
|
||||
|
||||
func (f *fakeBlockManager) Flush(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupTest(t *testing.T) (map[string][]byte, *Manager) {
|
||||
return setupTestWithData(t, map[string][]byte{}, ManagerOptions{})
|
||||
}
|
||||
|
||||
func setupTestWithData(t *testing.T, data map[string][]byte, opts ManagerOptions) (map[string][]byte, *Manager) {
|
||||
r, err := NewObjectManager(context.Background(), &fakeBlockManager{data: data}, Format{
|
||||
MaxBlockSize: 400,
|
||||
Splitter: "FIXED",
|
||||
}, opts)
|
||||
if err != nil {
|
||||
t.Fatalf("can't create object manager: %v", err)
|
||||
}
|
||||
|
||||
return data, r
|
||||
}
|
||||
|
||||
func TestWriters(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cases := []struct {
|
||||
data []byte
|
||||
objectID ID
|
||||
}{
|
||||
{
|
||||
[]byte("the quick brown fox jumps over the lazy dog"),
|
||||
"05c6e08f1d9fdafa03147fcb8f82f124c76d2f70e3d989dc8aadb5e7d7450bec",
|
||||
},
|
||||
{make([]byte, 100), "cd00e292c5970d3c5e2f0ffa5171e555bc46bfc4faddfb4a418b6840b86e79a3"}, // 100 zero bytes
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
data, om := setupTest(t)
|
||||
|
||||
writer := om.NewWriter(ctx, WriterOptions{})
|
||||
|
||||
if _, err := writer.Write(c.data); err != nil {
|
||||
t.Errorf("write error: %v", err)
|
||||
}
|
||||
|
||||
result, err := writer.Result()
|
||||
if err != nil {
|
||||
t.Errorf("error getting writer results for %v, expected: %v", c.data, c.objectID.String())
|
||||
continue
|
||||
}
|
||||
|
||||
if !objectIDsEqual(result, c.objectID) {
|
||||
t.Errorf("incorrect result for %v, expected: %v got: %v", c.data, c.objectID.String(), result.String())
|
||||
}
|
||||
|
||||
if _, ok := c.objectID.BlockID(); !ok {
|
||||
if len(data) != 0 {
|
||||
t.Errorf("unexpected data written to the storage: %v", data)
|
||||
}
|
||||
} else {
|
||||
if len(data) != 1 {
|
||||
// 1 data block
|
||||
t.Errorf("unexpected data written to the storage: %v", data)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func objectIDsEqual(o1 ID, o2 ID) bool {
|
||||
return reflect.DeepEqual(o1, o2)
|
||||
}
|
||||
|
||||
func TestWriterCompleteChunkInTwoWrites(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
_, om := setupTest(t)
|
||||
|
||||
bytes := make([]byte, 100)
|
||||
writer := om.NewWriter(ctx, WriterOptions{})
|
||||
writer.Write(bytes[0:50]) //nolint:errcheck
|
||||
writer.Write(bytes[0:50]) //nolint:errcheck
|
||||
result, err := writer.Result()
|
||||
if !objectIDsEqual(result, "cd00e292c5970d3c5e2f0ffa5171e555bc46bfc4faddfb4a418b6840b86e79a3") {
|
||||
t.Errorf("unexpected result: %v err: %v", result, err)
|
||||
}
|
||||
}
|
||||
|
||||
func verifyIndirectBlock(ctx context.Context, t *testing.T, r *Manager, oid ID) {
|
||||
for indexBlockID, isIndirect := oid.IndexObjectID(); isIndirect; indexBlockID, isIndirect = indexBlockID.IndexObjectID() {
|
||||
rd, err := r.Open(ctx, indexBlockID)
|
||||
if err != nil {
|
||||
t.Errorf("unable to open %v: %v", oid.String(), err)
|
||||
return
|
||||
}
|
||||
defer rd.Close()
|
||||
|
||||
var ind indirectObject
|
||||
if err := json.NewDecoder(rd).Decode(&ind); err != nil {
|
||||
t.Errorf("cannot parse indirect stream: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndirection(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
cases := []struct {
|
||||
dataLength int
|
||||
expectedBlockCount int
|
||||
expectedIndirection int
|
||||
}{
|
||||
{dataLength: 200, expectedBlockCount: 1, expectedIndirection: 0},
|
||||
{dataLength: 1400, expectedBlockCount: 3, expectedIndirection: 1},
|
||||
{dataLength: 2000, expectedBlockCount: 4, expectedIndirection: 2},
|
||||
{dataLength: 3000, expectedBlockCount: 5, expectedIndirection: 2},
|
||||
{dataLength: 4000, expectedBlockCount: 5, expectedIndirection: 2},
|
||||
{dataLength: 10000, expectedBlockCount: 10, expectedIndirection: 3},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
data, om := setupTest(t)
|
||||
|
||||
contentBytes := make([]byte, c.dataLength)
|
||||
|
||||
writer := om.NewWriter(ctx, WriterOptions{})
|
||||
if _, err := writer.Write(contentBytes); err != nil {
|
||||
t.Errorf("write error: %v", err)
|
||||
}
|
||||
result, err := writer.Result()
|
||||
if err != nil {
|
||||
t.Errorf("error getting writer results: %v", err)
|
||||
}
|
||||
|
||||
if indirectionLevel(result) != c.expectedIndirection {
|
||||
t.Errorf("incorrect indirection level for size: %v: %v, expected %v", c.dataLength, indirectionLevel(result), c.expectedIndirection)
|
||||
}
|
||||
|
||||
if got, want := len(data), c.expectedBlockCount; got != want {
|
||||
t.Errorf("unexpected block count for %v: %v, expected %v", c.dataLength, got, want)
|
||||
}
|
||||
|
||||
l, b, err := om.VerifyObject(ctx, result)
|
||||
if err != nil {
|
||||
t.Errorf("error verifying %q: %v", result, err)
|
||||
}
|
||||
|
||||
if got, want := int(l), len(contentBytes); got != want {
|
||||
t.Errorf("got invalid byte count for %q: %v, wanted %v", result, got, want)
|
||||
}
|
||||
|
||||
if got, want := len(b), c.expectedBlockCount; got != want {
|
||||
t.Errorf("invalid block count for %v, got %v, wanted %v", result, got, want)
|
||||
}
|
||||
|
||||
verifyIndirectBlock(ctx, t, om, result)
|
||||
}
|
||||
}
|
||||
|
||||
func indirectionLevel(oid ID) int {
|
||||
indexObjectID, ok := oid.IndexObjectID()
|
||||
if !ok {
|
||||
return 0
|
||||
}
|
||||
|
||||
return 1 + indirectionLevel(indexObjectID)
|
||||
}
|
||||
|
||||
func TestHMAC(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
content := bytes.Repeat([]byte{0xcd}, 50)
|
||||
|
||||
_, om := setupTest(t)
|
||||
|
||||
w := om.NewWriter(ctx, WriterOptions{})
|
||||
w.Write(content) //nolint:errcheck
|
||||
result, err := w.Result()
|
||||
if result.String() != "cad29ff89951a3c085c86cb7ed22b82b51f7bdfda24f932c7f9601f51d5975ba" {
|
||||
t.Errorf("unexpected result: %v err: %v", result.String(), err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReader(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data, om := setupTest(t)
|
||||
|
||||
storedPayload := []byte("foo\nbar")
|
||||
data["a76999788386641a3ec798554f1fe7e6"] = storedPayload
|
||||
|
||||
cases := []struct {
|
||||
text string
|
||||
payload []byte
|
||||
}{
|
||||
{"a76999788386641a3ec798554f1fe7e6", storedPayload},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
objectID, err := ParseID(c.text)
|
||||
if err != nil {
|
||||
t.Errorf("cannot parse object ID: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
reader, err := om.Open(ctx, objectID)
|
||||
if err != nil {
|
||||
t.Errorf("cannot create reader for %v: %v", objectID, err)
|
||||
continue
|
||||
}
|
||||
|
||||
d, err := ioutil.ReadAll(reader)
|
||||
if err != nil {
|
||||
t.Errorf("cannot read all data for %v: %v", objectID, err)
|
||||
continue
|
||||
}
|
||||
if !bytes.Equal(d, c.payload) {
|
||||
t.Errorf("incorrect payload for %v: expected: %v got: %v", objectID, c.payload, d)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestReaderStoredBlockNotFound(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
_, om := setupTest(t)
|
||||
|
||||
objectID, err := ParseID("deadbeef")
|
||||
if err != nil {
|
||||
t.Errorf("cannot parse object ID: %v", err)
|
||||
}
|
||||
reader, err := om.Open(ctx, objectID)
|
||||
if err != storage.ErrBlockNotFound || reader != nil {
|
||||
t.Errorf("unexpected result: reader: %v err: %v", reader, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEndToEndReadAndSeek(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
_, om := setupTest(t)
|
||||
|
||||
for _, size := range []int{1, 199, 200, 201, 9999, 512434} {
|
||||
// Create some random data sample of the specified size.
|
||||
randomData := make([]byte, size)
|
||||
cryptorand.Read(randomData) //nolint:errcheck
|
||||
|
||||
writer := om.NewWriter(ctx, WriterOptions{})
|
||||
if _, err := writer.Write(randomData); err != nil {
|
||||
t.Errorf("write error: %v", err)
|
||||
}
|
||||
objectID, err := writer.Result()
|
||||
writer.Close()
|
||||
if err != nil {
|
||||
t.Errorf("cannot get writer result for %v: %v", size, err)
|
||||
continue
|
||||
}
|
||||
|
||||
verify(ctx, t, om, objectID, randomData, fmt.Sprintf("%v %v", objectID, size))
|
||||
}
|
||||
}
|
||||
|
||||
func verify(ctx context.Context, t *testing.T, om *Manager, objectID ID, expectedData []byte, testCaseID string) {
|
||||
t.Helper()
|
||||
reader, err := om.Open(ctx, objectID)
|
||||
if err != nil {
|
||||
t.Errorf("cannot get reader for %v (%v): %v %v", testCaseID, objectID, err, string(debug.Stack()))
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < 20; i++ {
|
||||
sampleSize := int(rand.Int31n(300))
|
||||
seekOffset := int(rand.Int31n(int32(len(expectedData))))
|
||||
if seekOffset+sampleSize > len(expectedData) {
|
||||
sampleSize = len(expectedData) - seekOffset
|
||||
}
|
||||
if sampleSize > 0 {
|
||||
got := make([]byte, sampleSize)
|
||||
if offset, err := reader.Seek(int64(seekOffset), 0); err != nil || offset != int64(seekOffset) {
|
||||
t.Errorf("seek error: %v offset=%v expected:%v", err, offset, seekOffset)
|
||||
}
|
||||
if n, err := reader.Read(got); err != nil || n != sampleSize {
|
||||
t.Errorf("invalid data: n=%v, expected=%v, err:%v", n, sampleSize, err)
|
||||
}
|
||||
|
||||
expected := expectedData[seekOffset : seekOffset+sampleSize]
|
||||
|
||||
if !bytes.Equal(expected, got) {
|
||||
t.Errorf("incorrect data read for %v: expected: %x, got: %x", testCaseID, expected, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
159
object/object_reader.go
Normal file
159
object/object_reader.go
Normal file
@@ -0,0 +1,159 @@
|
||||
package object
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
func (i *indirectObjectEntry) endOffset() int64 {
|
||||
return i.Start + i.Length
|
||||
}
|
||||
|
||||
type objectReader struct {
|
||||
ctx context.Context
|
||||
repo *Manager
|
||||
|
||||
seekTable []indirectObjectEntry
|
||||
|
||||
currentPosition int64 // Overall position in the objectReader
|
||||
totalLength int64 // Overall length
|
||||
|
||||
currentChunkIndex int // Index of current chunk in the seek table
|
||||
currentChunkData []byte // Current chunk data
|
||||
currentChunkPosition int // Read position in the current chunk
|
||||
}
|
||||
|
||||
func (r *objectReader) Read(buffer []byte) (int, error) {
|
||||
readBytes := 0
|
||||
remaining := len(buffer)
|
||||
|
||||
for remaining > 0 {
|
||||
if r.currentChunkData != nil {
|
||||
toCopy := len(r.currentChunkData) - r.currentChunkPosition
|
||||
if toCopy == 0 {
|
||||
// EOF on curren chunk
|
||||
r.closeCurrentChunk()
|
||||
r.currentChunkIndex++
|
||||
continue
|
||||
}
|
||||
|
||||
if toCopy > remaining {
|
||||
toCopy = remaining
|
||||
}
|
||||
|
||||
copy(buffer[readBytes:],
|
||||
r.currentChunkData[r.currentChunkPosition:r.currentChunkPosition+toCopy])
|
||||
r.currentChunkPosition += toCopy
|
||||
r.currentPosition += int64(toCopy)
|
||||
readBytes += toCopy
|
||||
remaining -= toCopy
|
||||
} else if r.currentChunkIndex < len(r.seekTable) {
|
||||
err := r.openCurrentChunk()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if readBytes == 0 {
|
||||
return readBytes, io.EOF
|
||||
}
|
||||
|
||||
return readBytes, nil
|
||||
}
|
||||
|
||||
func (r *objectReader) openCurrentChunk() error {
|
||||
st := r.seekTable[r.currentChunkIndex]
|
||||
blockData, err := r.repo.Open(r.ctx, st.Object)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer blockData.Close() //nolint:errcheck
|
||||
|
||||
b := make([]byte, st.Length)
|
||||
if _, err := io.ReadFull(blockData, b); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
r.currentChunkData = b
|
||||
r.currentChunkPosition = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *objectReader) closeCurrentChunk() {
|
||||
r.currentChunkData = nil
|
||||
}
|
||||
|
||||
func (r *objectReader) findChunkIndexForOffset(offset int64) (int, error) {
|
||||
left := 0
|
||||
right := len(r.seekTable) - 1
|
||||
for left <= right {
|
||||
middle := (left + right) / 2
|
||||
|
||||
if offset < r.seekTable[middle].Start {
|
||||
right = middle - 1
|
||||
continue
|
||||
}
|
||||
|
||||
if offset >= r.seekTable[middle].endOffset() {
|
||||
left = middle + 1
|
||||
continue
|
||||
}
|
||||
|
||||
return middle, nil
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("can't find chunk for offset %v", offset)
|
||||
}
|
||||
|
||||
func (r *objectReader) Seek(offset int64, whence int) (int64, error) {
|
||||
if whence == 1 {
|
||||
return r.Seek(r.currentPosition+offset, 0)
|
||||
}
|
||||
|
||||
if whence == 2 {
|
||||
return r.Seek(r.totalLength+offset, 0)
|
||||
}
|
||||
|
||||
if offset < 0 {
|
||||
return -1, fmt.Errorf("invalid seek %v %v", offset, whence)
|
||||
}
|
||||
|
||||
if offset > r.totalLength {
|
||||
offset = r.totalLength
|
||||
}
|
||||
|
||||
index, err := r.findChunkIndexForOffset(offset)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("invalid seek %v %v: %v", offset, whence, err)
|
||||
}
|
||||
|
||||
chunkStartOffset := r.seekTable[index].Start
|
||||
|
||||
if index != r.currentChunkIndex {
|
||||
r.closeCurrentChunk()
|
||||
r.currentChunkIndex = index
|
||||
}
|
||||
|
||||
if r.currentChunkData == nil {
|
||||
if err := r.openCurrentChunk(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
r.currentChunkPosition = int(offset - chunkStartOffset)
|
||||
r.currentPosition = offset
|
||||
|
||||
return r.currentPosition, nil
|
||||
}
|
||||
|
||||
func (r *objectReader) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *objectReader) Length() int64 {
|
||||
return r.totalLength
|
||||
}
|
||||
110
object/object_splitter.go
Normal file
110
object/object_splitter.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package object
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
"github.com/silvasur/buzhash"
|
||||
)
|
||||
|
||||
type objectSplitter interface {
|
||||
add(b byte) bool
|
||||
}
|
||||
|
||||
// SupportedSplitters is a list of supported object splitters including:
|
||||
//
|
||||
// NEVER - prevents objects from ever splitting
|
||||
// FIXED - always splits large objects exactly at the maximum block size boundary
|
||||
// DYNAMIC - dynamically splits large objects based on rolling hash of contents.
|
||||
var SupportedSplitters []string
|
||||
|
||||
var splitterFactories = map[string]func(*Format) objectSplitter{
|
||||
"NEVER": func(f *Format) objectSplitter {
|
||||
return newNeverSplitter()
|
||||
},
|
||||
"FIXED": func(f *Format) objectSplitter {
|
||||
return newFixedSplitter(f.MaxBlockSize)
|
||||
},
|
||||
"DYNAMIC": func(f *Format) objectSplitter {
|
||||
return newRollingHashSplitter(buzhash.NewBuzHash(32), f.MinBlockSize, f.AvgBlockSize, f.MaxBlockSize)
|
||||
},
|
||||
}
|
||||
|
||||
func init() {
|
||||
for k := range splitterFactories {
|
||||
SupportedSplitters = append(SupportedSplitters, k)
|
||||
}
|
||||
sort.Strings(SupportedSplitters)
|
||||
}
|
||||
|
||||
// DefaultSplitter is the name of the splitter used by default for new repositories.
|
||||
const DefaultSplitter = "DYNAMIC"
|
||||
|
||||
type neverSplitter struct{}
|
||||
|
||||
func (s *neverSplitter) add(b byte) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func newNeverSplitter() objectSplitter {
|
||||
return &neverSplitter{}
|
||||
}
|
||||
|
||||
type fixedSplitter struct {
|
||||
cur int
|
||||
chunkLength int
|
||||
}
|
||||
|
||||
func (s *fixedSplitter) add(b byte) bool {
|
||||
s.cur++
|
||||
if s.cur >= s.chunkLength {
|
||||
s.cur = 0
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func newFixedSplitter(chunkLength int) objectSplitter {
|
||||
return &fixedSplitter{chunkLength: chunkLength}
|
||||
}
|
||||
|
||||
type rollingHash interface {
|
||||
HashByte(b byte) uint32
|
||||
}
|
||||
|
||||
type rollingHashSplitter struct {
|
||||
rh rollingHash
|
||||
mask uint32
|
||||
|
||||
currentBlockSize int
|
||||
minBlockSize int
|
||||
maxBlockSize int
|
||||
}
|
||||
|
||||
func (rs *rollingHashSplitter) add(b byte) bool {
|
||||
sum := rs.rh.HashByte(b)
|
||||
rs.currentBlockSize++
|
||||
if rs.currentBlockSize >= rs.maxBlockSize {
|
||||
rs.currentBlockSize = 0
|
||||
return true
|
||||
}
|
||||
if sum&rs.mask == 0 && rs.currentBlockSize > rs.minBlockSize && sum != 0 {
|
||||
//log.Printf("splitting %v on sum %x mask %x", rs.currentBlockSize, sum, rs.mask)
|
||||
rs.currentBlockSize = 0
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func newRollingHashSplitter(rh rollingHash, minBlockSize int, approxBlockSize int, maxBlockSize int) objectSplitter {
|
||||
bits := rollingHashBits(approxBlockSize)
|
||||
mask := ^(^uint32(0) << bits)
|
||||
return &rollingHashSplitter{rh, mask, 0, minBlockSize, maxBlockSize}
|
||||
}
|
||||
|
||||
func rollingHashBits(n int) uint {
|
||||
e := math.Log2(float64(n))
|
||||
exp := math.Floor(e + 0.5)
|
||||
return uint(exp)
|
||||
}
|
||||
134
object/object_splitter_test.go
Normal file
134
object/object_splitter_test.go
Normal file
@@ -0,0 +1,134 @@
|
||||
package object
|
||||
|
||||
import (
|
||||
"math"
|
||||
"math/rand"
|
||||
"testing"
|
||||
|
||||
"github.com/silvasur/buzhash"
|
||||
)
|
||||
|
||||
func TestSplitters(t *testing.T) {
|
||||
cases := []struct {
|
||||
desc string
|
||||
newSplitter func() objectSplitter
|
||||
}{
|
||||
{"rolling buzhash with 3 bits", func() objectSplitter { return newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 8, 20) }},
|
||||
{"rolling buzhash with 5 bits", func() objectSplitter { return newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 32, 20) }},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
s1 := tc.newSplitter()
|
||||
s2 := tc.newSplitter()
|
||||
|
||||
rnd := make([]byte, 50000000)
|
||||
rand.Read(rnd)
|
||||
|
||||
for i, p := range rnd {
|
||||
if got, want := s1.add(p), s2.add(p); got != want {
|
||||
t.Errorf("incorrect add() result for %v at offset %v", tc.desc, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSplitterStability(t *testing.T) {
|
||||
r := rand.New(rand.NewSource(5))
|
||||
rnd := make([]byte, 5000000)
|
||||
if n, err := r.Read(rnd); n != len(rnd) || err != nil {
|
||||
t.Fatalf("can't initialize random data: %v", err)
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
splitter objectSplitter
|
||||
count int
|
||||
avg int
|
||||
minSplit int
|
||||
maxSplit int
|
||||
}{
|
||||
{newFixedSplitter(1000), 5000, 1000, 1000, 1000},
|
||||
{newFixedSplitter(10000), 500, 10000, 10000, 10000},
|
||||
|
||||
{newNeverSplitter(), 0, 0, math.MaxInt32, 0},
|
||||
|
||||
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 32, math.MaxInt32), 156262, 31, 1, 404},
|
||||
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 1024, math.MaxInt32), 4933, 1013, 1, 8372},
|
||||
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 2048, math.MaxInt32), 2476, 2019, 1, 19454},
|
||||
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 32768, math.MaxInt32), 185, 27027, 1, 177510},
|
||||
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 65536, math.MaxInt32), 99, 50505, 418, 230449},
|
||||
|
||||
// min and max
|
||||
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 32, 64), 179921, 27, 1, 64},
|
||||
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 1024, 10000), 4933, 1013, 1, 8372},
|
||||
{newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 2048, 10000), 2490, 2008, 1, 10000},
|
||||
{newRollingHashSplitter(buzhash.NewBuzHash(32), 500, 32768, 100000), 183, 27322, 522, 100000},
|
||||
{newRollingHashSplitter(buzhash.NewBuzHash(32), 500, 65536, 100000), 113, 44247, 522, 100000},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
s := tc.splitter
|
||||
|
||||
lastSplit := -1
|
||||
maxSplit := 0
|
||||
minSplit := int(math.MaxInt32)
|
||||
count := 0
|
||||
for i, p := range rnd {
|
||||
if s.add(p) {
|
||||
l := i - lastSplit
|
||||
if l >= maxSplit {
|
||||
maxSplit = l
|
||||
}
|
||||
if l < minSplit {
|
||||
minSplit = l
|
||||
}
|
||||
count++
|
||||
lastSplit = i
|
||||
}
|
||||
}
|
||||
|
||||
var avg int
|
||||
if count > 0 {
|
||||
avg = len(rnd) / count
|
||||
}
|
||||
|
||||
if got, want := avg, tc.avg; got != want {
|
||||
t.Errorf("invalid split average size %v, wanted %v", got, want)
|
||||
}
|
||||
|
||||
if got, want := count, tc.count; got != want {
|
||||
t.Errorf("invalid split count %v, wanted %v", got, want)
|
||||
}
|
||||
if got, want := minSplit, tc.minSplit; got != want {
|
||||
t.Errorf("min split %v, wanted %v", got, want)
|
||||
}
|
||||
if got, want := maxSplit, tc.maxSplit; got != want {
|
||||
t.Errorf("max split %v, wanted %v", got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRollingHashBits(t *testing.T) {
|
||||
cases := []struct {
|
||||
blockSize int
|
||||
bits uint
|
||||
}{
|
||||
{256, 8},
|
||||
{128, 7},
|
||||
{100, 7},
|
||||
{500, 9},
|
||||
{700, 9},
|
||||
{724, 9},
|
||||
{725, 10},
|
||||
{768, 10},
|
||||
{1000, 10},
|
||||
{1000000, 20},
|
||||
{10000000, 23},
|
||||
{20000000, 24},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
if got, want := rollingHashBits(tc.blockSize), tc.bits; got != want {
|
||||
t.Errorf("rollingHashBits(%v) = %v, wanted %v", tc.blockSize, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
145
object/object_writer.go
Normal file
145
object/object_writer.go
Normal file
@@ -0,0 +1,145 @@
|
||||
package object
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// Writer allows writing content to the storage and supports automatic deduplication and encryption
|
||||
// of written data.
|
||||
type Writer interface {
|
||||
io.WriteCloser
|
||||
|
||||
Result() (ID, error)
|
||||
}
|
||||
|
||||
type blockTracker struct {
|
||||
mu sync.Mutex
|
||||
blocks map[string]bool
|
||||
}
|
||||
|
||||
func (t *blockTracker) addBlock(blockID string) {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
if t.blocks == nil {
|
||||
t.blocks = make(map[string]bool)
|
||||
}
|
||||
t.blocks[blockID] = true
|
||||
}
|
||||
|
||||
func (t *blockTracker) blockIDs() []string {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
result := make([]string, 0, len(t.blocks))
|
||||
for k := range t.blocks {
|
||||
result = append(result, k)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
type objectWriter struct {
|
||||
ctx context.Context
|
||||
repo *Manager
|
||||
|
||||
prefix string
|
||||
buffer bytes.Buffer
|
||||
totalLength int64
|
||||
|
||||
currentPosition int64
|
||||
blockIndex []indirectObjectEntry
|
||||
|
||||
description string
|
||||
|
||||
splitter objectSplitter
|
||||
}
|
||||
|
||||
func (w *objectWriter) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *objectWriter) Write(data []byte) (n int, err error) {
|
||||
dataLen := len(data)
|
||||
w.totalLength += int64(dataLen)
|
||||
|
||||
for _, d := range data {
|
||||
w.buffer.WriteByte(d)
|
||||
|
||||
if w.splitter.add(d) {
|
||||
if err := w.flushBuffer(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dataLen, nil
|
||||
}
|
||||
|
||||
func (w *objectWriter) flushBuffer() error {
|
||||
length := w.buffer.Len()
|
||||
chunkID := len(w.blockIndex)
|
||||
w.blockIndex = append(w.blockIndex, indirectObjectEntry{})
|
||||
w.blockIndex[chunkID].Start = w.currentPosition
|
||||
w.blockIndex[chunkID].Length = int64(length)
|
||||
w.currentPosition += int64(length)
|
||||
|
||||
var b2 bytes.Buffer
|
||||
w.buffer.WriteTo(&b2) //nolint:errcheck
|
||||
w.buffer.Reset()
|
||||
|
||||
blockID, err := w.repo.blockMgr.WriteBlock(w.ctx, b2.Bytes(), w.prefix)
|
||||
w.repo.trace("OBJECT_WRITER(%q) stored %v (%v bytes)", w.description, blockID, length)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error when flushing chunk %d of %s: %v", chunkID, w.description, err)
|
||||
}
|
||||
|
||||
w.blockIndex[chunkID].Object = DirectObjectID(blockID)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *objectWriter) Result() (ID, error) {
|
||||
if w.buffer.Len() > 0 || len(w.blockIndex) == 0 {
|
||||
if err := w.flushBuffer(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
if len(w.blockIndex) == 1 {
|
||||
return w.blockIndex[0].Object, nil
|
||||
}
|
||||
|
||||
iw := &objectWriter{
|
||||
ctx: w.ctx,
|
||||
repo: w.repo,
|
||||
description: "LIST(" + w.description + ")",
|
||||
splitter: w.repo.newSplitter(),
|
||||
prefix: w.prefix,
|
||||
}
|
||||
|
||||
ind := indirectObject{
|
||||
StreamID: "kopia:indirect",
|
||||
Entries: w.blockIndex,
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(iw).Encode(ind); err != nil {
|
||||
return "", errors.Wrap(err, "unable to write indirect block index")
|
||||
}
|
||||
oid, err := iw.Result()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return IndirectObjectID(oid), nil
|
||||
}
|
||||
|
||||
// WriterOptions can be passed to Repository.NewWriter()
|
||||
type WriterOptions struct {
|
||||
Description string
|
||||
Prefix string // empty string or a single-character ('g'..'z')
|
||||
}
|
||||
94
object/objectid.go
Normal file
94
object/objectid.go
Normal file
@@ -0,0 +1,94 @@
|
||||
package object
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ID is an identifier of a repository object. Repository objects can be stored.
|
||||
//
|
||||
// 1. In a single content block, this is the most common case for small objects.
|
||||
// 2. In a series of content blocks with an indirect block pointing at them (multiple indirections are allowed).
|
||||
// This is used for larger files. Object IDs using indirect blocks start with "I"
|
||||
type ID string
|
||||
|
||||
// HasObjectID exposes the identifier of an object.
|
||||
type HasObjectID interface {
|
||||
ObjectID() ID
|
||||
}
|
||||
|
||||
// String returns string representation of ObjectID that is suitable for displaying in the UI.
|
||||
func (i ID) String() string {
|
||||
return strings.Replace(string(i), "D", "", -1)
|
||||
}
|
||||
|
||||
// IndexObjectID returns the object ID of the underlying index object.
|
||||
func (i ID) IndexObjectID() (ID, bool) {
|
||||
if strings.HasPrefix(string(i), "I") {
|
||||
return i[1:], true
|
||||
}
|
||||
|
||||
return "", false
|
||||
}
|
||||
|
||||
// BlockID returns the block ID of the underlying content storage block.
|
||||
func (i ID) BlockID() (string, bool) {
|
||||
if strings.HasPrefix(string(i), "D") {
|
||||
return string(i[1:]), true
|
||||
}
|
||||
if strings.HasPrefix(string(i), "I") {
|
||||
return "", false
|
||||
}
|
||||
|
||||
return string(i), true
|
||||
}
|
||||
|
||||
// Validate checks the ID format for validity and reports any errors.
|
||||
func (i ID) Validate() error {
|
||||
if indexObjectID, ok := i.IndexObjectID(); ok {
|
||||
if err := indexObjectID.Validate(); err != nil {
|
||||
return fmt.Errorf("invalid indirect object ID %v: %v", i, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
if blockID, ok := i.BlockID(); ok {
|
||||
if len(blockID) < 2 {
|
||||
return fmt.Errorf("missing block ID")
|
||||
}
|
||||
|
||||
// odd length - firstcharacter must be a single character between 'g' and 'z'
|
||||
if len(blockID)%2 == 1 {
|
||||
if blockID[0] < 'g' || blockID[0] > 'z' {
|
||||
return fmt.Errorf("invalid block ID prefix: %v", blockID)
|
||||
}
|
||||
blockID = blockID[1:]
|
||||
}
|
||||
|
||||
if _, err := hex.DecodeString(blockID); err != nil {
|
||||
return fmt.Errorf("invalid blockID suffix, must be base-16 encoded: %v", blockID)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("invalid object ID: %v", i)
|
||||
}
|
||||
|
||||
// DirectObjectID returns direct object ID based on the provided block ID.
|
||||
func DirectObjectID(blockID string) ID {
|
||||
return ID(blockID)
|
||||
}
|
||||
|
||||
// IndirectObjectID returns indirect object ID based on the underlying index object ID.
|
||||
func IndirectObjectID(indexObjectID ID) ID {
|
||||
return "I" + indexObjectID
|
||||
}
|
||||
|
||||
// ParseID converts the specified string into object ID
|
||||
func ParseID(s string) (ID, error) {
|
||||
i := ID(s)
|
||||
return i, i.Validate()
|
||||
}
|
||||
46
object/objectid_test.go
Normal file
46
object/objectid_test.go
Normal file
@@ -0,0 +1,46 @@
|
||||
package object
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseObjectID(t *testing.T) {
|
||||
cases := []struct {
|
||||
text string
|
||||
isValid bool
|
||||
}{
|
||||
{"Df0f0", true},
|
||||
{"IDf0f0", true},
|
||||
{"IDf0f0", true},
|
||||
{"IIDf0f0", true},
|
||||
{"Dxf0f0", true},
|
||||
{"IDxf0f0", true},
|
||||
{"IDxf0f0", true},
|
||||
{"IIDxf0f0", true},
|
||||
{"Dxf0f", false},
|
||||
{"IDxf0f", false},
|
||||
{"Da", false},
|
||||
{"Daf0f0", false},
|
||||
{"", false},
|
||||
{"B!$@#$!@#$", false},
|
||||
{"X", false},
|
||||
{"I.", false},
|
||||
{"I.x", false},
|
||||
{"I.af", false},
|
||||
{"Ix.ag", false},
|
||||
{"Iab.", false},
|
||||
{"I1", false},
|
||||
{"I1,", false},
|
||||
{"I-1,X", false},
|
||||
{"Xsomething", false},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
_, err := ParseID(tc.text)
|
||||
if err != nil && tc.isValid {
|
||||
t.Errorf("error parsing %q: %v", tc.text, err)
|
||||
} else if err == nil && !tc.isValid {
|
||||
t.Errorf("unexpected success parsing %v", tc.text)
|
||||
}
|
||||
}
|
||||
}
|
||||
209
open.go
Normal file
209
open.go
Normal file
@@ -0,0 +1,209 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/kopia/repo/block"
|
||||
"github.com/kopia/repo/internal/repologging"
|
||||
"github.com/kopia/repo/manifest"
|
||||
"github.com/kopia/repo/object"
|
||||
"github.com/kopia/repo/storage"
|
||||
"github.com/kopia/repo/storage/logging"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
var (
|
||||
log = repologging.Logger("kopia/repo")
|
||||
)
|
||||
|
||||
// Options provides configuration parameters for connection to a repository.
|
||||
type Options struct {
|
||||
TraceStorage func(f string, args ...interface{}) // Logs all storage access using provided Printf-style function
|
||||
ObjectManagerOptions object.ManagerOptions
|
||||
}
|
||||
|
||||
// Open opens a Repository specified in the configuration file.
|
||||
func Open(ctx context.Context, configFile string, password string, options *Options) (rep *Repository, err error) {
|
||||
log.Debugf("opening repository from %v", configFile)
|
||||
defer func() {
|
||||
if err == nil {
|
||||
log.Debugf("opened repository")
|
||||
} else {
|
||||
log.Errorf("failed to open repository: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
if options == nil {
|
||||
options = &Options{}
|
||||
}
|
||||
|
||||
configFile, err = filepath.Abs(configFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Debugf("loading config from file: %v", configFile)
|
||||
lc, err := loadConfigFromFile(configFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Debugf("opening storage: %v", lc.Storage.Type)
|
||||
|
||||
st, err := storage.NewStorage(ctx, lc.Storage)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "cannot open storage")
|
||||
}
|
||||
|
||||
if options.TraceStorage != nil {
|
||||
st = logging.NewWrapper(st, logging.Prefix("[STORAGE] "), logging.Output(options.TraceStorage))
|
||||
}
|
||||
|
||||
r, err := OpenWithConfig(ctx, st, lc, password, options, lc.Caching)
|
||||
if err != nil {
|
||||
st.Close(ctx) //nolint:errcheck
|
||||
return nil, err
|
||||
}
|
||||
|
||||
r.ConfigFile = configFile
|
||||
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// OpenWithConfig opens the repository with a given configuration, avoiding the need for a config file.
|
||||
func OpenWithConfig(ctx context.Context, st storage.Storage, lc *LocalConfig, password string, options *Options, caching block.CachingOptions) (*Repository, error) {
|
||||
log.Debugf("reading encrypted format block")
|
||||
// Read cache block, potentially from cache.
|
||||
fb, err := readAndCacheFormatBlockBytes(ctx, st, caching.CacheDirectory)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "unable to read format block")
|
||||
}
|
||||
|
||||
f, err := parseFormatBlock(fb)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "can't parse format block")
|
||||
}
|
||||
|
||||
fb, err = addFormatBlockChecksumAndLength(fb)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to add checksum")
|
||||
}
|
||||
|
||||
masterKey, err := f.deriveMasterKeyFromPassword(password)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
repoConfig, err := f.decryptFormatBytes(masterKey)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "unable to decrypt repository config")
|
||||
}
|
||||
|
||||
caching.HMACSecret = deriveKeyFromMasterKey(masterKey, f.UniqueID, []byte("local-cache-integrity"), 16)
|
||||
|
||||
fo := repoConfig.FormattingOptions
|
||||
if fo.MaxPackSize == 0 {
|
||||
fo.MaxPackSize = repoConfig.MaxBlockSize
|
||||
}
|
||||
|
||||
log.Debugf("initializing block manager")
|
||||
bm, err := block.NewManager(ctx, st, fo, caching, fb)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "unable to open block manager")
|
||||
}
|
||||
|
||||
log.Debugf("initializing object manager")
|
||||
om, err := object.NewObjectManager(ctx, bm, repoConfig.Format, options.ObjectManagerOptions)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "unable to open object manager")
|
||||
}
|
||||
|
||||
log.Debugf("initializing manifest manager")
|
||||
manifests, err := manifest.NewManager(ctx, bm)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "unable to open manifests")
|
||||
}
|
||||
|
||||
return &Repository{
|
||||
Blocks: bm,
|
||||
Objects: om,
|
||||
Storage: st,
|
||||
Manifests: manifests,
|
||||
CacheDirectory: caching.CacheDirectory,
|
||||
UniqueID: f.UniqueID,
|
||||
|
||||
formatBlock: f,
|
||||
masterKey: masterKey,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// SetCachingConfig changes caching configuration for a given repository config file.
|
||||
func SetCachingConfig(ctx context.Context, configFile string, opt block.CachingOptions) error {
|
||||
configFile, err := filepath.Abs(configFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
lc, err := loadConfigFromFile(configFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
st, err := storage.NewStorage(ctx, lc.Storage)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "cannot open storage")
|
||||
}
|
||||
|
||||
fb, err := readAndCacheFormatBlockBytes(ctx, st, "")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "can't read format block")
|
||||
}
|
||||
|
||||
f, err := parseFormatBlock(fb)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "can't parse format block")
|
||||
}
|
||||
|
||||
if err = setupCaching(configFile, lc, opt, f.UniqueID); err != nil {
|
||||
return errors.Wrap(err, "unable to set up caching")
|
||||
}
|
||||
|
||||
d, err := json.MarshalIndent(&lc, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := ioutil.WriteFile(configFile, d, 0600); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func readAndCacheFormatBlockBytes(ctx context.Context, st storage.Storage, cacheDirectory string) ([]byte, error) {
|
||||
cachedFile := filepath.Join(cacheDirectory, "kopia.repository")
|
||||
if cacheDirectory != "" {
|
||||
b, err := ioutil.ReadFile(cachedFile)
|
||||
if err == nil {
|
||||
// read from cache.
|
||||
return b, nil
|
||||
}
|
||||
}
|
||||
|
||||
b, err := st.GetBlock(ctx, FormatBlockID, 0, -1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if cacheDirectory != "" {
|
||||
if err := ioutil.WriteFile(cachedFile, b, 0600); err != nil {
|
||||
log.Warningf("warning: unable to write cache: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return b, nil
|
||||
}
|
||||
87
repository.go
Normal file
87
repository.go
Normal file
@@ -0,0 +1,87 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/block"
|
||||
"github.com/kopia/repo/manifest"
|
||||
"github.com/kopia/repo/object"
|
||||
"github.com/kopia/repo/storage"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// Repository represents storage where both content-addressable and user-addressable data is kept.
|
||||
type Repository struct {
|
||||
Blocks *block.Manager
|
||||
Objects *object.Manager
|
||||
Storage storage.Storage
|
||||
Manifests *manifest.Manager
|
||||
UniqueID []byte
|
||||
|
||||
ConfigFile string
|
||||
CacheDirectory string
|
||||
|
||||
formatBlock *formatBlock
|
||||
masterKey []byte
|
||||
}
|
||||
|
||||
// Close closes the repository and releases all resources.
|
||||
func (r *Repository) Close(ctx context.Context) error {
|
||||
if err := r.Manifests.Flush(ctx); err != nil {
|
||||
return errors.Wrap(err, "error flushing manifests")
|
||||
}
|
||||
if err := r.Blocks.Flush(ctx); err != nil {
|
||||
return errors.Wrap(err, "error closing blocks")
|
||||
}
|
||||
if err := r.Storage.Close(ctx); err != nil {
|
||||
return errors.Wrap(err, "error closing storage")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Flush waits for all in-flight writes to complete.
|
||||
func (r *Repository) Flush(ctx context.Context) error {
|
||||
if err := r.Manifests.Flush(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return r.Blocks.Flush(ctx)
|
||||
}
|
||||
|
||||
// Refresh periodically makes external changes visible to repository.
|
||||
func (r *Repository) Refresh(ctx context.Context) error {
|
||||
updated, err := r.Blocks.Refresh(ctx)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "error refreshing block index")
|
||||
}
|
||||
|
||||
if !updated {
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Debugf("block index refreshed")
|
||||
|
||||
if err := r.Manifests.Refresh(ctx); err != nil {
|
||||
return errors.Wrap(err, "error reloading manifests")
|
||||
}
|
||||
|
||||
log.Debugf("manifests refreshed")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RefreshPeriodically periodically refreshes the repository to reflect the changes made by other hosts.
|
||||
func (r *Repository) RefreshPeriodically(ctx context.Context, interval time.Duration) {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
|
||||
case <-time.After(interval):
|
||||
if err := r.Refresh(ctx); err != nil {
|
||||
log.Warningf("error refreshing repository: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
328
repository_test.go
Normal file
328
repository_test.go
Normal file
@@ -0,0 +1,328 @@
|
||||
package repo_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
cryptorand "crypto/rand"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"runtime/debug"
|
||||
"testing"
|
||||
|
||||
"github.com/kopia/repo"
|
||||
"github.com/kopia/repo/block"
|
||||
"github.com/kopia/repo/internal/repotesting"
|
||||
"github.com/kopia/repo/object"
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
func TestWriters(t *testing.T) {
|
||||
cases := []struct {
|
||||
data []byte
|
||||
objectID object.ID
|
||||
}{
|
||||
{
|
||||
[]byte("the quick brown fox jumps over the lazy dog"),
|
||||
"345acef0bcf82f1daf8e49fab7b7fac7ec296c518501eabea3645b99345a4e08",
|
||||
},
|
||||
{make([]byte, 100), "1d804f1f69df08f3f59070bf962de69433e3d61ac18522a805a84d8c92741340"}, // 100 zero bytes
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
for _, c := range cases {
|
||||
var env repotesting.Environment
|
||||
defer env.Setup(t).Close(t)
|
||||
|
||||
writer := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{})
|
||||
if _, err := writer.Write(c.data); err != nil {
|
||||
t.Fatalf("write error: %v", err)
|
||||
}
|
||||
|
||||
result, err := writer.Result()
|
||||
if err != nil {
|
||||
t.Errorf("error getting writer results for %v, expected: %v", c.data, c.objectID.String())
|
||||
continue
|
||||
}
|
||||
|
||||
if !objectIDsEqual(result, c.objectID) {
|
||||
t.Errorf("incorrect result for %v, expected: %v got: %v", c.data, c.objectID.String(), result.String())
|
||||
}
|
||||
|
||||
env.Repository.Blocks.Flush(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
func objectIDsEqual(o1 object.ID, o2 object.ID) bool {
|
||||
return reflect.DeepEqual(o1, o2)
|
||||
}
|
||||
|
||||
func TestWriterCompleteChunkInTwoWrites(t *testing.T) {
|
||||
var env repotesting.Environment
|
||||
defer env.Setup(t).Close(t)
|
||||
ctx := context.Background()
|
||||
|
||||
bytes := make([]byte, 100)
|
||||
writer := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{})
|
||||
writer.Write(bytes[0:50]) //nolint:errcheck
|
||||
writer.Write(bytes[0:50]) //nolint:errcheck
|
||||
result, err := writer.Result()
|
||||
if result != "1d804f1f69df08f3f59070bf962de69433e3d61ac18522a805a84d8c92741340" {
|
||||
t.Errorf("unexpected result: %v err: %v", result, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPackingSimple(t *testing.T) {
|
||||
var env repotesting.Environment
|
||||
defer env.Setup(t).Close(t)
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
content1 := "hello, how do you do?"
|
||||
content2 := "hi, how are you?"
|
||||
content3 := "thank you!"
|
||||
|
||||
oid1a := writeObject(ctx, t, env.Repository, []byte(content1), "packed-object-1a")
|
||||
oid1b := writeObject(ctx, t, env.Repository, []byte(content1), "packed-object-1b")
|
||||
oid2a := writeObject(ctx, t, env.Repository, []byte(content2), "packed-object-2a")
|
||||
oid2b := writeObject(ctx, t, env.Repository, []byte(content2), "packed-object-2b")
|
||||
|
||||
oid3a := writeObject(ctx, t, env.Repository, []byte(content3), "packed-object-3a")
|
||||
oid3b := writeObject(ctx, t, env.Repository, []byte(content3), "packed-object-3b")
|
||||
verify(ctx, t, env.Repository, oid1a, []byte(content1), "packed-object-1")
|
||||
verify(ctx, t, env.Repository, oid2a, []byte(content2), "packed-object-2")
|
||||
oid2c := writeObject(ctx, t, env.Repository, []byte(content2), "packed-object-2c")
|
||||
oid1c := writeObject(ctx, t, env.Repository, []byte(content1), "packed-object-1c")
|
||||
|
||||
env.Repository.Blocks.Flush(ctx)
|
||||
|
||||
if got, want := oid1a.String(), oid1b.String(); got != want {
|
||||
t.Errorf("oid1a(%q) != oid1b(%q)", got, want)
|
||||
}
|
||||
if got, want := oid1a.String(), oid1c.String(); got != want {
|
||||
t.Errorf("oid1a(%q) != oid1c(%q)", got, want)
|
||||
}
|
||||
if got, want := oid2a.String(), oid2b.String(); got != want {
|
||||
t.Errorf("oid2(%q)a != oidb(%q)", got, want)
|
||||
}
|
||||
if got, want := oid2a.String(), oid2c.String(); got != want {
|
||||
t.Errorf("oid2(%q)a != oidc(%q)", got, want)
|
||||
}
|
||||
if got, want := oid3a.String(), oid3b.String(); got != want {
|
||||
t.Errorf("oid3a(%q) != oid3b(%q)", got, want)
|
||||
}
|
||||
|
||||
env.VerifyStorageBlockCount(t, 3)
|
||||
|
||||
env.MustReopen(t)
|
||||
|
||||
verify(ctx, t, env.Repository, oid1a, []byte(content1), "packed-object-1")
|
||||
verify(ctx, t, env.Repository, oid2a, []byte(content2), "packed-object-2")
|
||||
verify(ctx, t, env.Repository, oid3a, []byte(content3), "packed-object-3")
|
||||
|
||||
if err := env.Repository.Blocks.CompactIndexes(ctx, block.CompactOptions{MinSmallBlocks: 1, MaxSmallBlocks: 1}); err != nil {
|
||||
t.Errorf("optimize error: %v", err)
|
||||
}
|
||||
|
||||
env.MustReopen(t)
|
||||
|
||||
verify(ctx, t, env.Repository, oid1a, []byte(content1), "packed-object-1")
|
||||
verify(ctx, t, env.Repository, oid2a, []byte(content2), "packed-object-2")
|
||||
verify(ctx, t, env.Repository, oid3a, []byte(content3), "packed-object-3")
|
||||
|
||||
if err := env.Repository.Blocks.CompactIndexes(ctx, block.CompactOptions{MinSmallBlocks: 1, MaxSmallBlocks: 1}); err != nil {
|
||||
t.Errorf("optimize error: %v", err)
|
||||
}
|
||||
|
||||
env.MustReopen(t)
|
||||
|
||||
verify(ctx, t, env.Repository, oid1a, []byte(content1), "packed-object-1")
|
||||
verify(ctx, t, env.Repository, oid2a, []byte(content2), "packed-object-2")
|
||||
verify(ctx, t, env.Repository, oid3a, []byte(content3), "packed-object-3")
|
||||
}
|
||||
|
||||
func TestHMAC(t *testing.T) {
|
||||
var env repotesting.Environment
|
||||
defer env.Setup(t).Close(t)
|
||||
ctx := context.Background()
|
||||
|
||||
content := bytes.Repeat([]byte{0xcd}, 50)
|
||||
|
||||
w := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{})
|
||||
w.Write(content) //nolint:errcheck
|
||||
result, err := w.Result()
|
||||
if result.String() != "367352007ee6ca9fa755ce8352347d092c17a24077fd33c62f655574a8cf906d" {
|
||||
t.Errorf("unexpected result: %v err: %v", result.String(), err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpgrade(t *testing.T) {
|
||||
var env repotesting.Environment
|
||||
defer env.Setup(t).Close(t)
|
||||
ctx := context.Background()
|
||||
|
||||
if err := env.Repository.Upgrade(ctx); err != nil {
|
||||
t.Errorf("upgrade error: %v", err)
|
||||
}
|
||||
|
||||
if err := env.Repository.Upgrade(ctx); err != nil {
|
||||
t.Errorf("2nd upgrade error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReaderStoredBlockNotFound(t *testing.T) {
|
||||
var env repotesting.Environment
|
||||
defer env.Setup(t).Close(t)
|
||||
ctx := context.Background()
|
||||
|
||||
objectID, err := object.ParseID("Ddeadbeef")
|
||||
if err != nil {
|
||||
t.Errorf("cannot parse object ID: %v", err)
|
||||
}
|
||||
reader, err := env.Repository.Objects.Open(ctx, objectID)
|
||||
if err != storage.ErrBlockNotFound || reader != nil {
|
||||
t.Errorf("unexpected result: reader: %v err: %v", reader, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEndToEndReadAndSeek(t *testing.T) {
|
||||
var env repotesting.Environment
|
||||
defer env.Setup(t).Close(t)
|
||||
ctx := context.Background()
|
||||
|
||||
for _, size := range []int{1, 199, 200, 201, 9999, 512434} {
|
||||
// Create some random data sample of the specified size.
|
||||
randomData := make([]byte, size)
|
||||
cryptorand.Read(randomData) //nolint:errcheck
|
||||
|
||||
writer := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{})
|
||||
writer.Write(randomData) //nolint:errcheck
|
||||
objectID, err := writer.Result()
|
||||
writer.Close()
|
||||
if err != nil {
|
||||
t.Errorf("cannot get writer result for %v: %v", size, err)
|
||||
continue
|
||||
}
|
||||
|
||||
verify(ctx, t, env.Repository, objectID, randomData, fmt.Sprintf("%v %v", objectID, size))
|
||||
}
|
||||
}
|
||||
|
||||
func writeObject(ctx context.Context, t *testing.T, rep *repo.Repository, data []byte, testCaseID string) object.ID {
|
||||
w := rep.Objects.NewWriter(ctx, object.WriterOptions{})
|
||||
if _, err := w.Write(data); err != nil {
|
||||
t.Fatalf("can't write object %q - write failed: %v", testCaseID, err)
|
||||
|
||||
}
|
||||
oid, err := w.Result()
|
||||
if err != nil {
|
||||
t.Fatalf("can't write object %q - result failed: %v", testCaseID, err)
|
||||
}
|
||||
|
||||
return oid
|
||||
}
|
||||
|
||||
func verify(ctx context.Context, t *testing.T, rep *repo.Repository, objectID object.ID, expectedData []byte, testCaseID string) {
|
||||
t.Helper()
|
||||
reader, err := rep.Objects.Open(ctx, objectID)
|
||||
if err != nil {
|
||||
t.Errorf("cannot get reader for %v (%v): %v %v", testCaseID, objectID, err, string(debug.Stack()))
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < 20; i++ {
|
||||
sampleSize := int(rand.Int31n(300))
|
||||
seekOffset := int(rand.Int31n(int32(len(expectedData))))
|
||||
if seekOffset+sampleSize > len(expectedData) {
|
||||
sampleSize = len(expectedData) - seekOffset
|
||||
}
|
||||
if sampleSize > 0 {
|
||||
got := make([]byte, sampleSize)
|
||||
if offset, err := reader.Seek(int64(seekOffset), 0); err != nil || offset != int64(seekOffset) {
|
||||
t.Errorf("seek error: %v offset=%v expected:%v", err, offset, seekOffset)
|
||||
}
|
||||
if n, err := reader.Read(got); err != nil || n != sampleSize {
|
||||
t.Errorf("invalid data: n=%v, expected=%v, err:%v", n, sampleSize, err)
|
||||
}
|
||||
|
||||
expected := expectedData[seekOffset : seekOffset+sampleSize]
|
||||
|
||||
if !bytes.Equal(expected, got) {
|
||||
t.Errorf("incorrect data read for %v: expected: %x, got: %x", testCaseID, expected, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormats(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
makeFormat := func(hash, encryption string) func(*repo.NewRepositoryOptions) {
|
||||
return func(n *repo.NewRepositoryOptions) {
|
||||
n.BlockFormat.Hash = hash
|
||||
n.BlockFormat.Encryption = encryption
|
||||
n.BlockFormat.HMACSecret = []byte("key")
|
||||
n.ObjectFormat.MaxBlockSize = 10000
|
||||
n.ObjectFormat.Splitter = "FIXED"
|
||||
}
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
format func(*repo.NewRepositoryOptions)
|
||||
oids map[string]object.ID
|
||||
}{
|
||||
{
|
||||
format: func(n *repo.NewRepositoryOptions) {
|
||||
n.ObjectFormat.MaxBlockSize = 10000
|
||||
},
|
||||
oids: map[string]object.ID{
|
||||
"": "b613679a0814d9ec772f95d778c35fc5ff1697c493715653c6c712144292c5ad",
|
||||
"The quick brown fox jumps over the lazy dog": "fb011e6154a19b9a4c767373c305275a5a69e8b68b0b4c9200c383dced19a416",
|
||||
},
|
||||
},
|
||||
{
|
||||
format: makeFormat("HMAC-SHA256", "NONE"),
|
||||
oids: map[string]object.ID{
|
||||
"The quick brown fox jumps over the lazy dog": "f7bc83f430538424b13298e6aa6fb143ef4d59a14946175997479dbc2d1a3cd8",
|
||||
},
|
||||
},
|
||||
{
|
||||
format: makeFormat("HMAC-SHA256-128", "NONE"),
|
||||
oids: map[string]object.ID{
|
||||
"The quick brown fox jumps over the lazy dog": "f7bc83f430538424b13298e6aa6fb143",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for caseIndex, c := range cases {
|
||||
var env repotesting.Environment
|
||||
defer env.Setup(t, c.format).Close(t)
|
||||
|
||||
for k, v := range c.oids {
|
||||
bytesToWrite := []byte(k)
|
||||
w := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{})
|
||||
w.Write(bytesToWrite) //nolint:errcheck
|
||||
oid, err := w.Result()
|
||||
if err != nil {
|
||||
t.Errorf("error: %v", err)
|
||||
}
|
||||
if !objectIDsEqual(oid, v) {
|
||||
t.Errorf("invalid oid for #%v\ngot:\n%#v\nexpected:\n%#v", caseIndex, oid.String(), v.String())
|
||||
}
|
||||
|
||||
rc, err := env.Repository.Objects.Open(ctx, oid)
|
||||
if err != nil {
|
||||
t.Errorf("open failed: %v", err)
|
||||
continue
|
||||
}
|
||||
bytesRead, err := ioutil.ReadAll(rc)
|
||||
if err != nil {
|
||||
t.Errorf("error reading: %v", err)
|
||||
}
|
||||
if !bytes.Equal(bytesRead, bytesToWrite) {
|
||||
t.Errorf("data mismatch, read:%x vs written:%v", bytesRead, bytesToWrite)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
47
storage/config.go
Normal file
47
storage/config.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// ConnectionInfo represents JSON-serializable configuration of a blob storage.
|
||||
type ConnectionInfo struct {
|
||||
Type string
|
||||
Config interface{}
|
||||
}
|
||||
|
||||
// UnmarshalJSON parses the JSON-encoded data into ConnectionInfo.
|
||||
func (c *ConnectionInfo) UnmarshalJSON(b []byte) error {
|
||||
raw := struct {
|
||||
Type string `json:"type"`
|
||||
Data json.RawMessage `json:"config"`
|
||||
}{}
|
||||
|
||||
if err := json.Unmarshal(b, &raw); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.Type = raw.Type
|
||||
f := factories[raw.Type]
|
||||
if f == nil {
|
||||
return fmt.Errorf("storage type '%v' not registered", raw.Type)
|
||||
}
|
||||
c.Config = f.defaultConfigFunc()
|
||||
if err := json.Unmarshal(raw.Data, c.Config); err != nil {
|
||||
return fmt.Errorf("unable to unmarshal config: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalJSON returns JSON-encoded storage configuration.
|
||||
func (c ConnectionInfo) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(struct {
|
||||
Type string `json:"type"`
|
||||
Data interface{} `json:"config"`
|
||||
}{
|
||||
Type: c.Type,
|
||||
Data: c.Config,
|
||||
})
|
||||
}
|
||||
2
storage/doc.go
Normal file
2
storage/doc.go
Normal file
@@ -0,0 +1,2 @@
|
||||
// Package storage implements simple storage of immutable, unstructured binary large objects (BLOBs).
|
||||
package storage
|
||||
40
storage/filesystem/filesystem_options.go
Normal file
40
storage/filesystem/filesystem_options.go
Normal file
@@ -0,0 +1,40 @@
|
||||
package filesystem
|
||||
|
||||
import "os"
|
||||
|
||||
// Options defines options for Filesystem-backed storage.
|
||||
type Options struct {
|
||||
Path string `json:"path"`
|
||||
|
||||
DirectoryShards []int `json:"dirShards"`
|
||||
|
||||
FileMode os.FileMode `json:"fileMode,omitempty"`
|
||||
DirectoryMode os.FileMode `json:"dirMode,omitempty"`
|
||||
|
||||
FileUID *int `json:"uid,omitempty"`
|
||||
FileGID *int `json:"gid,omitempty"`
|
||||
}
|
||||
|
||||
func (fso *Options) fileMode() os.FileMode {
|
||||
if fso.FileMode == 0 {
|
||||
return fsDefaultFileMode
|
||||
}
|
||||
|
||||
return fso.FileMode
|
||||
}
|
||||
|
||||
func (fso *Options) dirMode() os.FileMode {
|
||||
if fso.DirectoryMode == 0 {
|
||||
return fsDefaultDirMode
|
||||
}
|
||||
|
||||
return fso.DirectoryMode
|
||||
}
|
||||
|
||||
func (fso *Options) shards() []int {
|
||||
if fso.DirectoryShards == nil {
|
||||
return fsDefaultShards
|
||||
}
|
||||
|
||||
return fso.DirectoryShards
|
||||
}
|
||||
248
storage/filesystem/filesystem_storage.go
Normal file
248
storage/filesystem/filesystem_storage.go
Normal file
@@ -0,0 +1,248 @@
|
||||
// Package filesystem implements filesystem-based Storage.
|
||||
package filesystem
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/internal/repologging"
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
var log = repologging.Logger("repo/filesystem")
|
||||
|
||||
const (
|
||||
fsStorageType = "filesystem"
|
||||
fsStorageChunkSuffix = ".f"
|
||||
)
|
||||
|
||||
var (
|
||||
fsDefaultShards = []int{3, 3}
|
||||
fsDefaultFileMode os.FileMode = 0600
|
||||
fsDefaultDirMode os.FileMode = 0700
|
||||
)
|
||||
|
||||
type fsStorage struct {
|
||||
Options
|
||||
}
|
||||
|
||||
func (fs *fsStorage) GetBlock(ctx context.Context, blockID string, offset, length int64) ([]byte, error) {
|
||||
_, path := fs.getShardedPathAndFilePath(blockID)
|
||||
|
||||
f, err := os.Open(path)
|
||||
if os.IsNotExist(err) {
|
||||
return nil, storage.ErrBlockNotFound
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close() //nolint:errcheck
|
||||
|
||||
if length < 0 {
|
||||
return ioutil.ReadAll(f)
|
||||
}
|
||||
|
||||
if _, err = f.Seek(offset, io.SeekStart); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
b, err := ioutil.ReadAll(io.LimitReader(f, length))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if int64(len(b)) != length {
|
||||
return nil, fmt.Errorf("invalid length")
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func getstringFromFileName(name string) (string, bool) {
|
||||
if strings.HasSuffix(name, fsStorageChunkSuffix) {
|
||||
return name[0 : len(name)-len(fsStorageChunkSuffix)], true
|
||||
}
|
||||
|
||||
return string(""), false
|
||||
}
|
||||
|
||||
func makeFileName(blockID string) string {
|
||||
return blockID + fsStorageChunkSuffix
|
||||
}
|
||||
|
||||
func (fs *fsStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
|
||||
var walkDir func(string, string) error
|
||||
|
||||
walkDir = func(directory string, currentPrefix string) error {
|
||||
entries, err := ioutil.ReadDir(directory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
newPrefix := currentPrefix + e.Name()
|
||||
var match bool
|
||||
|
||||
if len(prefix) > len(newPrefix) {
|
||||
match = strings.HasPrefix(prefix, newPrefix)
|
||||
} else {
|
||||
match = strings.HasPrefix(newPrefix, prefix)
|
||||
}
|
||||
|
||||
if match {
|
||||
if err := walkDir(directory+"/"+e.Name(), currentPrefix+e.Name()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else if fullID, ok := getstringFromFileName(currentPrefix + e.Name()); ok {
|
||||
if strings.HasPrefix(fullID, prefix) {
|
||||
if err := callback(storage.BlockMetadata{
|
||||
BlockID: fullID,
|
||||
Length: e.Size(),
|
||||
Timestamp: e.ModTime(),
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return walkDir(fs.Path, "")
|
||||
}
|
||||
|
||||
// TouchBlock updates file modification time to current time if it's sufficiently old.
|
||||
func (fs *fsStorage) TouchBlock(ctx context.Context, blockID string, threshold time.Duration) error {
|
||||
_, path := fs.getShardedPathAndFilePath(blockID)
|
||||
st, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
n := time.Now()
|
||||
age := n.Sub(st.ModTime())
|
||||
if age < threshold {
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Debugf("updating timestamp on %v to %v", path, n)
|
||||
return os.Chtimes(path, n, n)
|
||||
}
|
||||
|
||||
func (fs *fsStorage) PutBlock(ctx context.Context, blockID string, data []byte) error {
|
||||
_, path := fs.getShardedPathAndFilePath(blockID)
|
||||
|
||||
tempFile := fmt.Sprintf("%s.tmp.%d", path, rand.Int())
|
||||
f, err := fs.createTempFileAndDir(tempFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot create temporary file: %v", err)
|
||||
}
|
||||
|
||||
if _, err = f.Write(data); err != nil {
|
||||
return fmt.Errorf("can't write temporary file: %v", err)
|
||||
}
|
||||
if err = f.Close(); err != nil {
|
||||
return fmt.Errorf("can't close temporary file: %v", err)
|
||||
}
|
||||
|
||||
err = os.Rename(tempFile, path)
|
||||
if err != nil {
|
||||
if removeErr := os.Remove(tempFile); removeErr != nil {
|
||||
log.Warningf("can't remove temp file: %v", removeErr)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
if fs.FileUID != nil && fs.FileGID != nil && os.Geteuid() == 0 {
|
||||
if chownErr := os.Chown(path, *fs.FileUID, *fs.FileGID); chownErr != nil {
|
||||
log.Warningf("can't change file permissions: %v", chownErr)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (fs *fsStorage) createTempFileAndDir(tempFile string) (*os.File, error) {
|
||||
flags := os.O_CREATE | os.O_WRONLY | os.O_EXCL
|
||||
f, err := os.OpenFile(tempFile, flags, fs.fileMode())
|
||||
if os.IsNotExist(err) {
|
||||
if err = os.MkdirAll(filepath.Dir(tempFile), fs.dirMode()); err != nil {
|
||||
return nil, fmt.Errorf("cannot create directory: %v", err)
|
||||
}
|
||||
return os.OpenFile(tempFile, flags, fs.fileMode())
|
||||
}
|
||||
|
||||
return f, err
|
||||
}
|
||||
|
||||
func (fs *fsStorage) DeleteBlock(ctx context.Context, blockID string) error {
|
||||
_, path := fs.getShardedPathAndFilePath(blockID)
|
||||
err := os.Remove(path)
|
||||
if err == nil || os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (fs *fsStorage) getShardDirectory(blockID string) (string, string) {
|
||||
shardPath := fs.Path
|
||||
if len(blockID) < 20 {
|
||||
return shardPath, blockID
|
||||
}
|
||||
for _, size := range fs.shards() {
|
||||
shardPath = filepath.Join(shardPath, blockID[0:size])
|
||||
blockID = blockID[size:]
|
||||
}
|
||||
|
||||
return shardPath, blockID
|
||||
}
|
||||
|
||||
func (fs *fsStorage) getShardedPathAndFilePath(blockID string) (string, string) {
|
||||
shardPath, blockID := fs.getShardDirectory(blockID)
|
||||
result := filepath.Join(shardPath, makeFileName(blockID))
|
||||
return shardPath, result
|
||||
}
|
||||
|
||||
func (fs *fsStorage) ConnectionInfo() storage.ConnectionInfo {
|
||||
return storage.ConnectionInfo{
|
||||
Type: fsStorageType,
|
||||
Config: &fs.Options,
|
||||
}
|
||||
}
|
||||
|
||||
func (fs *fsStorage) Close(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// New creates new filesystem-backed storage in a specified directory.
|
||||
func New(ctx context.Context, opts *Options) (storage.Storage, error) {
|
||||
var err error
|
||||
|
||||
if _, err = os.Stat(opts.Path); err != nil {
|
||||
return nil, fmt.Errorf("cannot access storage path: %v", err)
|
||||
}
|
||||
|
||||
r := &fsStorage{
|
||||
Options: *opts,
|
||||
}
|
||||
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
storage.AddSupportedStorage(
|
||||
fsStorageType,
|
||||
func() interface{} { return &Options{} },
|
||||
func(ctx context.Context, o interface{}) (storage.Storage, error) {
|
||||
return New(ctx, o.(*Options))
|
||||
})
|
||||
}
|
||||
120
storage/filesystem/filesystem_storage_test.go
Normal file
120
storage/filesystem/filesystem_storage_test.go
Normal file
@@ -0,0 +1,120 @@
|
||||
package filesystem
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/storage"
|
||||
|
||||
"github.com/kopia/repo/internal/storagetesting"
|
||||
)
|
||||
|
||||
func TestFileStorage(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := context.Background()
|
||||
|
||||
// Test varioush shard configurations.
|
||||
for _, shardSpec := range [][]int{
|
||||
{0},
|
||||
{1},
|
||||
{3, 3},
|
||||
{2},
|
||||
{1, 1},
|
||||
{1, 2},
|
||||
{2, 2, 2},
|
||||
} {
|
||||
path, _ := ioutil.TempDir("", "r-fs")
|
||||
defer os.RemoveAll(path)
|
||||
|
||||
r, err := New(ctx, &Options{
|
||||
Path: path,
|
||||
DirectoryShards: shardSpec,
|
||||
})
|
||||
|
||||
if r == nil || err != nil {
|
||||
t.Errorf("unexpected result: %v %v", r, err)
|
||||
}
|
||||
|
||||
storagetesting.VerifyStorage(ctx, t, r)
|
||||
storagetesting.AssertConnectionInfoRoundTrips(ctx, t, r)
|
||||
if err := r.Close(ctx); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileStorageTouch(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := context.Background()
|
||||
|
||||
t1 := "392ee1bc299db9f235e046a62625afb84902"
|
||||
t2 := "2a7ff4f29eddbcd4c18fa9e73fec20bbb71f"
|
||||
t3 := "0dae5918f83e6a24c8b3e274ca1026e43f24"
|
||||
|
||||
path, _ := ioutil.TempDir("", "r-fs")
|
||||
defer os.RemoveAll(path)
|
||||
|
||||
r, err := New(ctx, &Options{
|
||||
Path: path,
|
||||
})
|
||||
|
||||
if r == nil || err != nil {
|
||||
t.Errorf("unexpected result: %v %v", r, err)
|
||||
}
|
||||
|
||||
fs := r.(*fsStorage)
|
||||
assertNoError(t, fs.PutBlock(ctx, t1, []byte{1}))
|
||||
time.Sleep(1 * time.Second) // sleep a bit to accommodate Apple filesystems with low timestamp resolution
|
||||
assertNoError(t, fs.PutBlock(ctx, t2, []byte{1}))
|
||||
time.Sleep(1 * time.Second)
|
||||
assertNoError(t, fs.PutBlock(ctx, t3, []byte{1}))
|
||||
|
||||
verifyBlockTimestampOrder(t, fs, t1, t2, t3)
|
||||
|
||||
assertNoError(t, fs.TouchBlock(ctx, t2, 1*time.Hour)) // has no effect, all timestamps are very new
|
||||
verifyBlockTimestampOrder(t, fs, t1, t2, t3)
|
||||
|
||||
assertNoError(t, fs.TouchBlock(ctx, t1, 0)) // moves t1 to the top of the pile
|
||||
verifyBlockTimestampOrder(t, fs, t2, t3, t1)
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
assertNoError(t, fs.TouchBlock(ctx, t2, 0)) // moves t2 to the top of the pile
|
||||
verifyBlockTimestampOrder(t, fs, t3, t1, t2)
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
assertNoError(t, fs.TouchBlock(ctx, t1, 0)) // moves t1 to the top of the pile
|
||||
verifyBlockTimestampOrder(t, fs, t3, t2, t1)
|
||||
}
|
||||
|
||||
func verifyBlockTimestampOrder(t *testing.T, st storage.Storage, want ...string) {
|
||||
blocks, err := storage.ListAllBlocks(context.Background(), st, "")
|
||||
if err != nil {
|
||||
t.Errorf("error listing blocks: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
sort.Slice(blocks, func(i, j int) bool {
|
||||
return blocks[i].Timestamp.Before(blocks[j].Timestamp)
|
||||
})
|
||||
|
||||
var got []string
|
||||
for _, b := range blocks {
|
||||
got = append(got, b.BlockID)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(got, want) {
|
||||
t.Errorf("incorrect block order: %v, wanted %v", blocks, want)
|
||||
}
|
||||
}
|
||||
|
||||
func assertNoError(t *testing.T, err error) {
|
||||
t.Helper()
|
||||
if err != nil {
|
||||
t.Errorf("err: %v", err)
|
||||
}
|
||||
}
|
||||
20
storage/gcs/gcs_options.go
Normal file
20
storage/gcs/gcs_options.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package gcs
|
||||
|
||||
// Options defines options Google Cloud Storage-backed storage.
|
||||
type Options struct {
|
||||
// BucketName is the name of the GCS bucket where data is stored.
|
||||
BucketName string `json:"bucket"`
|
||||
|
||||
// Prefix specifies additional string to prepend to all objects.
|
||||
Prefix string `json:"prefix,omitempty"`
|
||||
|
||||
// ServiceAccountCredentials specifies the name of the file with GCS credentials.
|
||||
ServiceAccountCredentials string `json:"credentialsFile,omitempty"`
|
||||
|
||||
// ReadOnly causes GCS connection to be opened with read-only scope to prevent accidental mutations.
|
||||
ReadOnly bool `json:"readOnly,omitempty"`
|
||||
|
||||
MaxUploadSpeedBytesPerSecond int `json:"maxUploadSpeedBytesPerSecond,omitempty"`
|
||||
|
||||
MaxDownloadSpeedBytesPerSecond int `json:"maxDownloadSpeedBytesPerSecond,omitempty"`
|
||||
}
|
||||
270
storage/gcs/gcs_storage.go
Normal file
270
storage/gcs/gcs_storage.go
Normal file
@@ -0,0 +1,270 @@
|
||||
// Package gcs implements Storage based on Google Cloud Storage bucket.
|
||||
package gcs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
||||
"google.golang.org/api/googleapi"
|
||||
|
||||
"github.com/efarrer/iothrottler"
|
||||
"github.com/kopia/repo/internal/retry"
|
||||
"github.com/kopia/repo/internal/throttle"
|
||||
"github.com/kopia/repo/storage"
|
||||
"golang.org/x/oauth2"
|
||||
"golang.org/x/oauth2/google"
|
||||
"google.golang.org/api/iterator"
|
||||
"google.golang.org/api/option"
|
||||
|
||||
gcsclient "cloud.google.com/go/storage"
|
||||
)
|
||||
|
||||
const (
|
||||
gcsStorageType = "gcs"
|
||||
)
|
||||
|
||||
type gcsStorage struct {
|
||||
Options
|
||||
|
||||
ctx context.Context
|
||||
storageClient *gcsclient.Client
|
||||
bucket *gcsclient.BucketHandle
|
||||
|
||||
downloadThrottler *iothrottler.IOThrottlerPool
|
||||
uploadThrottler *iothrottler.IOThrottlerPool
|
||||
}
|
||||
|
||||
func (gcs *gcsStorage) GetBlock(ctx context.Context, b string, offset, length int64) ([]byte, error) {
|
||||
if offset < 0 {
|
||||
return nil, fmt.Errorf("invalid offset")
|
||||
}
|
||||
|
||||
attempt := func() (interface{}, error) {
|
||||
reader, err := gcs.bucket.Object(gcs.getObjectNameString(b)).NewRangeReader(gcs.ctx, offset, length)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer reader.Close() //nolint:errcheck
|
||||
|
||||
return ioutil.ReadAll(reader)
|
||||
}
|
||||
|
||||
v, err := exponentialBackoff(fmt.Sprintf("GetBlock(%q,%v,%v)", b, offset, length), attempt)
|
||||
if err != nil {
|
||||
return nil, translateError(err)
|
||||
}
|
||||
|
||||
fetched := v.([]byte)
|
||||
if len(fetched) != int(length) && length >= 0 {
|
||||
return nil, fmt.Errorf("invalid offset/length")
|
||||
}
|
||||
|
||||
return fetched, nil
|
||||
}
|
||||
|
||||
func exponentialBackoff(desc string, att retry.AttemptFunc) (interface{}, error) {
|
||||
return retry.WithExponentialBackoff(desc, att, isRetriableError)
|
||||
}
|
||||
|
||||
func isRetriableError(err error) bool {
|
||||
if apiError, ok := err.(*googleapi.Error); ok {
|
||||
return apiError.Code >= 500
|
||||
}
|
||||
|
||||
switch err {
|
||||
case nil:
|
||||
return false
|
||||
case gcsclient.ErrObjectNotExist:
|
||||
return false
|
||||
case gcsclient.ErrBucketNotExist:
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func translateError(err error) error {
|
||||
switch err {
|
||||
case nil:
|
||||
return nil
|
||||
case gcsclient.ErrObjectNotExist:
|
||||
return storage.ErrBlockNotFound
|
||||
case gcsclient.ErrBucketNotExist:
|
||||
return storage.ErrBlockNotFound
|
||||
default:
|
||||
return fmt.Errorf("unexpected GCS error: %v", err)
|
||||
}
|
||||
}
|
||||
func (gcs *gcsStorage) PutBlock(ctx context.Context, b string, data []byte) error {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
|
||||
obj := gcs.bucket.Object(gcs.getObjectNameString(b))
|
||||
writer := obj.NewWriter(ctx)
|
||||
writer.ChunkSize = 1 << 20
|
||||
writer.ContentType = "application/x-kopia"
|
||||
|
||||
progressCallback := storage.ProgressCallback(ctx)
|
||||
|
||||
if progressCallback != nil {
|
||||
progressCallback(b, 0, int64(len(data)))
|
||||
defer progressCallback(b, int64(len(data)), int64(len(data)))
|
||||
|
||||
writer.ProgressFunc = func(completed int64) {
|
||||
if completed != int64(len(data)) {
|
||||
progressCallback(b, completed, int64(len(data)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_, err := io.Copy(writer, bytes.NewReader(data))
|
||||
if err != nil {
|
||||
// cancel context before closing the writer causes it to abandon the upload.
|
||||
cancel()
|
||||
writer.Close() //nolint:errcheck
|
||||
return translateError(err)
|
||||
}
|
||||
defer cancel()
|
||||
|
||||
// calling close before cancel() causes it to commit the upload.
|
||||
return translateError(writer.Close())
|
||||
}
|
||||
|
||||
func (gcs *gcsStorage) DeleteBlock(ctx context.Context, b string) error {
|
||||
attempt := func() (interface{}, error) {
|
||||
return nil, gcs.bucket.Object(gcs.getObjectNameString(b)).Delete(gcs.ctx)
|
||||
}
|
||||
|
||||
_, err := exponentialBackoff(fmt.Sprintf("DeleteBlock(%q)", b), attempt)
|
||||
err = translateError(err)
|
||||
if err == storage.ErrBlockNotFound {
|
||||
return nil
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (gcs *gcsStorage) getObjectNameString(blockID string) string {
|
||||
return gcs.Prefix + blockID
|
||||
}
|
||||
|
||||
func (gcs *gcsStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
|
||||
lst := gcs.bucket.Objects(gcs.ctx, &gcsclient.Query{
|
||||
Prefix: gcs.getObjectNameString(prefix),
|
||||
})
|
||||
|
||||
oa, err := lst.Next()
|
||||
for err == nil {
|
||||
if err = callback(storage.BlockMetadata{
|
||||
BlockID: oa.Name[len(gcs.Prefix):],
|
||||
Length: oa.Size,
|
||||
Timestamp: oa.Created,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
oa, err = lst.Next()
|
||||
}
|
||||
|
||||
if err != iterator.Done {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (gcs *gcsStorage) ConnectionInfo() storage.ConnectionInfo {
|
||||
return storage.ConnectionInfo{
|
||||
Type: gcsStorageType,
|
||||
Config: &gcs.Options,
|
||||
}
|
||||
}
|
||||
|
||||
func (gcs *gcsStorage) Close(ctx context.Context) error {
|
||||
gcs.storageClient.Close() //nolint:errcheck
|
||||
return nil
|
||||
}
|
||||
|
||||
func toBandwidth(bytesPerSecond int) iothrottler.Bandwidth {
|
||||
if bytesPerSecond <= 0 {
|
||||
return iothrottler.Unlimited
|
||||
}
|
||||
|
||||
return iothrottler.Bandwidth(bytesPerSecond) * iothrottler.BytesPerSecond
|
||||
}
|
||||
|
||||
func tokenSourceFromCredentialsFile(ctx context.Context, fn string, scopes ...string) (oauth2.TokenSource, error) {
|
||||
data, err := ioutil.ReadFile(fn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cfg, err := google.JWTConfigFromJSON(data, scopes...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("google.JWTConfigFromJSON: %v", err)
|
||||
}
|
||||
return cfg.TokenSource(ctx), nil
|
||||
}
|
||||
|
||||
// New creates new Google Cloud Storage-backed storage with specified options:
|
||||
//
|
||||
// - the 'BucketName' field is required and all other parameters are optional.
|
||||
//
|
||||
// By default the connection reuses credentials managed by (https://cloud.google.com/sdk/),
|
||||
// but this can be disabled by setting IgnoreDefaultCredentials to true.
|
||||
func New(ctx context.Context, opt *Options) (storage.Storage, error) {
|
||||
var ts oauth2.TokenSource
|
||||
var err error
|
||||
|
||||
scope := gcsclient.ScopeReadWrite
|
||||
if opt.ReadOnly {
|
||||
scope = gcsclient.ScopeReadOnly
|
||||
}
|
||||
|
||||
if sa := opt.ServiceAccountCredentials; sa != "" {
|
||||
ts, err = tokenSourceFromCredentialsFile(ctx, sa, scope)
|
||||
} else {
|
||||
ts, err = google.DefaultTokenSource(ctx, scope)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
downloadThrottler := iothrottler.NewIOThrottlerPool(toBandwidth(opt.MaxDownloadSpeedBytesPerSecond))
|
||||
uploadThrottler := iothrottler.NewIOThrottlerPool(toBandwidth(opt.MaxUploadSpeedBytesPerSecond))
|
||||
|
||||
hc := oauth2.NewClient(ctx, ts)
|
||||
hc.Transport = throttle.NewRoundTripper(hc.Transport, downloadThrottler, uploadThrottler)
|
||||
|
||||
cli, err := gcsclient.NewClient(ctx, option.WithHTTPClient(hc))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if opt.BucketName == "" {
|
||||
return nil, errors.New("bucket name must be specified")
|
||||
}
|
||||
|
||||
return &gcsStorage{
|
||||
Options: *opt,
|
||||
ctx: ctx,
|
||||
storageClient: cli,
|
||||
bucket: cli.Bucket(opt.BucketName),
|
||||
downloadThrottler: downloadThrottler,
|
||||
uploadThrottler: uploadThrottler,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
storage.AddSupportedStorage(
|
||||
gcsStorageType,
|
||||
func() interface{} {
|
||||
return &Options{}
|
||||
},
|
||||
func(ctx context.Context, o interface{}) (storage.Storage, error) {
|
||||
return New(ctx, o.(*Options))
|
||||
})
|
||||
}
|
||||
75
storage/gcs/gcs_storage_test.go
Normal file
75
storage/gcs/gcs_storage_test.go
Normal file
@@ -0,0 +1,75 @@
|
||||
package gcs_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/kopia/repo/internal/storagetesting"
|
||||
|
||||
"github.com/kopia/repo/storage"
|
||||
"github.com/kopia/repo/storage/gcs"
|
||||
)
|
||||
|
||||
func TestGCSStorage(t *testing.T) {
|
||||
bucket := os.Getenv("KOPIA_GCS_TEST_BUCKET")
|
||||
if bucket == "" {
|
||||
t.Skip("KOPIA_GCS_TEST_BUCKET not provided")
|
||||
}
|
||||
|
||||
credsFile := os.Getenv("KOPIA_GCS_CREDENTIALS_FILE")
|
||||
if _, err := os.Stat(credsFile); err != nil {
|
||||
t.Skip("skipping test because GCS credentials file can't be opened")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
st, err := gcs.New(ctx, &gcs.Options{
|
||||
BucketName: bucket,
|
||||
ServiceAccountCredentials: credsFile,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unable to connect to GCS: %v", err)
|
||||
}
|
||||
|
||||
if err := st.ListBlocks(ctx, "", func(bm storage.BlockMetadata) error {
|
||||
return st.DeleteBlock(ctx, bm.BlockID)
|
||||
}); err != nil {
|
||||
t.Fatalf("unable to clear GCS bucket: %v", err)
|
||||
}
|
||||
|
||||
storagetesting.VerifyStorage(ctx, t, st)
|
||||
storagetesting.AssertConnectionInfoRoundTrips(ctx, t, st)
|
||||
|
||||
// delete everything again
|
||||
if err := st.ListBlocks(ctx, "", func(bm storage.BlockMetadata) error {
|
||||
return st.DeleteBlock(ctx, bm.BlockID)
|
||||
}); err != nil {
|
||||
t.Fatalf("unable to clear GCS bucket: %v", err)
|
||||
}
|
||||
if err := st.Close(ctx); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGCSStorageInvalid(t *testing.T) {
|
||||
bucket := os.Getenv("KOPIA_GCS_TEST_BUCKET")
|
||||
if bucket == "" {
|
||||
t.Skip("KOPIA_GCS_TEST_BUCKET not provided")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
st, err := gcs.New(ctx, &gcs.Options{
|
||||
BucketName: bucket + "-no-such-bucket",
|
||||
ServiceAccountCredentials: os.Getenv("KOPIA_GCS_CREDENTIALS_FILE"),
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unable to connect to GCS: %v", err)
|
||||
}
|
||||
|
||||
defer st.Close(ctx)
|
||||
if err := st.PutBlock(ctx, "xxx", []byte{1, 2, 3}); err == nil {
|
||||
t.Errorf("unexpecte success when adding to non-existent bucket")
|
||||
}
|
||||
}
|
||||
96
storage/logging/logging_storage.go
Normal file
96
storage/logging/logging_storage.go
Normal file
@@ -0,0 +1,96 @@
|
||||
// Package logging implements wrapper around Storage that logs all activity.
|
||||
package logging
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/internal/repologging"
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
var log = repologging.Logger("repo/storage")
|
||||
|
||||
type loggingStorage struct {
|
||||
base storage.Storage
|
||||
printf func(string, ...interface{})
|
||||
prefix string
|
||||
}
|
||||
|
||||
func (s *loggingStorage) GetBlock(ctx context.Context, id string, offset, length int64) ([]byte, error) {
|
||||
t0 := time.Now()
|
||||
result, err := s.base.GetBlock(ctx, id, offset, length)
|
||||
dt := time.Since(t0)
|
||||
if len(result) < 20 {
|
||||
s.printf(s.prefix+"GetBlock(%q,%v,%v)=(%#v, %#v) took %v", id, offset, length, result, err, dt)
|
||||
} else {
|
||||
s.printf(s.prefix+"GetBlock(%q,%v,%v)=({%#v bytes}, %#v) took %v", id, offset, length, len(result), err, dt)
|
||||
}
|
||||
return result, err
|
||||
}
|
||||
|
||||
func (s *loggingStorage) PutBlock(ctx context.Context, id string, data []byte) error {
|
||||
t0 := time.Now()
|
||||
err := s.base.PutBlock(ctx, id, data)
|
||||
dt := time.Since(t0)
|
||||
s.printf(s.prefix+"PutBlock(%q,len=%v)=%#v took %v", id, len(data), err, dt)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *loggingStorage) DeleteBlock(ctx context.Context, id string) error {
|
||||
t0 := time.Now()
|
||||
err := s.base.DeleteBlock(ctx, id)
|
||||
dt := time.Since(t0)
|
||||
s.printf(s.prefix+"DeleteBlock(%q)=%#v took %v", id, err, dt)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *loggingStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
|
||||
t0 := time.Now()
|
||||
cnt := 0
|
||||
err := s.base.ListBlocks(ctx, prefix, func(bi storage.BlockMetadata) error {
|
||||
cnt++
|
||||
return callback(bi)
|
||||
})
|
||||
s.printf(s.prefix+"ListBlocks(%q)=%v returned %v items and took %v", prefix, err, cnt, time.Since(t0))
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *loggingStorage) Close(ctx context.Context) error {
|
||||
t0 := time.Now()
|
||||
err := s.base.Close(ctx)
|
||||
dt := time.Since(t0)
|
||||
s.printf(s.prefix+"Close()=%#v took %v", err, dt)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *loggingStorage) ConnectionInfo() storage.ConnectionInfo {
|
||||
return s.base.ConnectionInfo()
|
||||
}
|
||||
|
||||
// Option modifies the behavior of logging storage wrapper.
|
||||
type Option func(s *loggingStorage)
|
||||
|
||||
// NewWrapper returns a Storage wrapper that logs all storage commands.
|
||||
func NewWrapper(wrapped storage.Storage, options ...Option) storage.Storage {
|
||||
s := &loggingStorage{base: wrapped, printf: log.Debugf}
|
||||
for _, o := range options {
|
||||
o(s)
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// Output is a logging storage option that causes all output to be sent to a given function instead of log.Printf()
|
||||
func Output(outputFunc func(fmt string, args ...interface{})) Option {
|
||||
return func(s *loggingStorage) {
|
||||
s.printf = outputFunc
|
||||
}
|
||||
}
|
||||
|
||||
// Prefix specifies prefix to be prepended to all log output.
|
||||
func Prefix(prefix string) Option {
|
||||
return func(s *loggingStorage) {
|
||||
s.prefix = prefix
|
||||
}
|
||||
}
|
||||
39
storage/logging/logging_storage_test.go
Normal file
39
storage/logging/logging_storage_test.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package logging
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/kopia/repo/internal/storagetesting"
|
||||
)
|
||||
|
||||
func TestLoggingStorage(t *testing.T) {
|
||||
var outputCount int
|
||||
myPrefix := "myprefix"
|
||||
myOutput := func(msg string, args ...interface{}) {
|
||||
if !strings.HasPrefix(msg, myPrefix) {
|
||||
t.Errorf("unexpected prefix %v", msg)
|
||||
}
|
||||
outputCount++
|
||||
}
|
||||
|
||||
data := map[string][]byte{}
|
||||
underlying := storagetesting.NewMapStorage(data, nil, nil)
|
||||
st := NewWrapper(underlying, Output(myOutput), Prefix(myPrefix))
|
||||
if st == nil {
|
||||
t.Fatalf("unexpected result: %v", st)
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
storagetesting.VerifyStorage(ctx, t, st)
|
||||
if err := st.Close(ctx); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
if outputCount == 0 {
|
||||
t.Errorf("did not write any output!")
|
||||
}
|
||||
if got, want := st.ConnectionInfo().Type, underlying.ConnectionInfo().Type; got != want {
|
||||
t.Errorf("unexpected connection infor %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
21
storage/progress.go
Normal file
21
storage/progress.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package storage
|
||||
|
||||
import "context"
|
||||
|
||||
type contextKey string
|
||||
|
||||
var progressCallbackContextKey contextKey = "progress-callback"
|
||||
|
||||
// ProgressFunc is used to report progress of a long-running storage operation.
|
||||
type ProgressFunc func(desc string, completed, total int64)
|
||||
|
||||
// WithUploadProgressCallback returns a context that passes callback function to be used storage upload progress.
|
||||
func WithUploadProgressCallback(ctx context.Context, callback ProgressFunc) context.Context {
|
||||
return context.WithValue(ctx, progressCallbackContextKey, callback)
|
||||
}
|
||||
|
||||
// ProgressCallback gets the progress callback function from the context.
|
||||
func ProgressCallback(ctx context.Context) ProgressFunc {
|
||||
pf, _ := ctx.Value(progressCallbackContextKey).(ProgressFunc)
|
||||
return pf
|
||||
}
|
||||
8
storage/providers/providers.go
Normal file
8
storage/providers/providers.go
Normal file
@@ -0,0 +1,8 @@
|
||||
// Package providers registers all storage providers that are included as part of Kopia.
|
||||
package providers
|
||||
|
||||
import (
|
||||
// Register well-known blob storage providers
|
||||
_ "github.com/kopia/repo/storage/filesystem"
|
||||
_ "github.com/kopia/repo/storage/gcs"
|
||||
)
|
||||
39
storage/registry.go
Normal file
39
storage/registry.go
Normal file
@@ -0,0 +1,39 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
var (
|
||||
factories = map[string]*storageFactory{}
|
||||
)
|
||||
|
||||
// StorageFactory allows creation of repositories in a generic way.
|
||||
type storageFactory struct {
|
||||
defaultConfigFunc func() interface{}
|
||||
createStorageFunc func(context.Context, interface{}) (Storage, error)
|
||||
}
|
||||
|
||||
// AddSupportedStorage registers factory function to create storage with a given type name.
|
||||
func AddSupportedStorage(
|
||||
urlScheme string,
|
||||
defaultConfigFunc func() interface{},
|
||||
createStorageFunc func(context.Context, interface{}) (Storage, error)) {
|
||||
|
||||
f := &storageFactory{
|
||||
defaultConfigFunc: defaultConfigFunc,
|
||||
createStorageFunc: createStorageFunc,
|
||||
}
|
||||
factories[urlScheme] = f
|
||||
}
|
||||
|
||||
// NewStorage creates new storage based on ConnectionInfo.
|
||||
// The storage type must be previously registered using AddSupportedStorage.
|
||||
func NewStorage(ctx context.Context, cfg ConnectionInfo) (Storage, error) {
|
||||
if factory, ok := factories[cfg.Type]; ok {
|
||||
return factory.createStorageFunc(ctx, cfg.Config)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unknown storage type: %s", cfg.Type)
|
||||
}
|
||||
20
storage/s3/s3_options.go
Normal file
20
storage/s3/s3_options.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package s3
|
||||
|
||||
// Options defines options for S3-based storage.
|
||||
type Options struct {
|
||||
// BucketName is the name of the bucket where data is stored.
|
||||
BucketName string `json:"bucket"`
|
||||
|
||||
// Prefix specifies additional string to prepend to all objects.
|
||||
Prefix string `json:"prefix,omitempty"`
|
||||
|
||||
Endpoint string `json:"endpoint"`
|
||||
DoNotUseTLS bool `json:"doNotUseTLS,omitempyy"`
|
||||
|
||||
AccessKeyID string `json:"accessKeyID"`
|
||||
SecretAccessKey string `json:"secretAccessKey" kopia:"sensitive"`
|
||||
|
||||
MaxUploadSpeedBytesPerSecond int `json:"maxUploadSpeedBytesPerSecond,omitempty"`
|
||||
|
||||
MaxDownloadSpeedBytesPerSecond int `json:"maxDownloadSpeedBytesPerSecond,omitempty"`
|
||||
}
|
||||
244
storage/s3/s3_storage.go
Normal file
244
storage/s3/s3_storage.go
Normal file
@@ -0,0 +1,244 @@
|
||||
// Package s3 implements Storage based on an S3 bucket.
|
||||
package s3
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
||||
"github.com/efarrer/iothrottler"
|
||||
"github.com/kopia/repo/internal/retry"
|
||||
"github.com/kopia/repo/storage"
|
||||
"github.com/minio/minio-go"
|
||||
)
|
||||
|
||||
const (
|
||||
s3storageType = "s3"
|
||||
)
|
||||
|
||||
type s3Storage struct {
|
||||
Options
|
||||
|
||||
ctx context.Context
|
||||
|
||||
cli *minio.Client
|
||||
|
||||
downloadThrottler *iothrottler.IOThrottlerPool
|
||||
uploadThrottler *iothrottler.IOThrottlerPool
|
||||
}
|
||||
|
||||
func (s *s3Storage) GetBlock(ctx context.Context, b string, offset, length int64) ([]byte, error) {
|
||||
attempt := func() (interface{}, error) {
|
||||
var opt minio.GetObjectOptions
|
||||
if length > 0 {
|
||||
if err := opt.SetRange(offset, offset+length-1); err != nil {
|
||||
return nil, fmt.Errorf("unable to set range: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
o, err := s.cli.GetObject(s.BucketName, s.getObjectNameString(b), opt)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
defer o.Close() //nolint:errcheck
|
||||
throttled, err := s.downloadThrottler.AddReader(o)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
b, err := ioutil.ReadAll(throttled)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(b) != int(length) && length > 0 {
|
||||
return nil, fmt.Errorf("invalid length, got %v bytes, but expected %v", len(b), length)
|
||||
}
|
||||
|
||||
if length == 0 {
|
||||
return []byte{}, nil
|
||||
}
|
||||
|
||||
return b, nil
|
||||
}
|
||||
|
||||
v, err := exponentialBackoff(fmt.Sprintf("GetBlock(%q,%v,%v)", b, offset, length), attempt)
|
||||
if err != nil {
|
||||
return nil, translateError(err)
|
||||
}
|
||||
|
||||
return v.([]byte), nil
|
||||
}
|
||||
|
||||
func exponentialBackoff(desc string, att retry.AttemptFunc) (interface{}, error) {
|
||||
return retry.WithExponentialBackoff(desc, att, isRetriableError)
|
||||
}
|
||||
|
||||
func isRetriableError(err error) bool {
|
||||
if me, ok := err.(minio.ErrorResponse); ok {
|
||||
// retry on server errors, not on client errors
|
||||
return me.StatusCode >= 500
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func translateError(err error) error {
|
||||
if me, ok := err.(minio.ErrorResponse); ok {
|
||||
if me.StatusCode == 200 {
|
||||
return nil
|
||||
}
|
||||
if me.StatusCode == 404 {
|
||||
return storage.ErrBlockNotFound
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *s3Storage) PutBlock(ctx context.Context, b string, data []byte) error {
|
||||
throttled, err := s.uploadThrottler.AddReader(ioutil.NopCloser(bytes.NewReader(data)))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
progressCallback := storage.ProgressCallback(ctx)
|
||||
if progressCallback != nil {
|
||||
progressCallback(b, 0, int64(len(data)))
|
||||
defer progressCallback(b, int64(len(data)), int64(len(data)))
|
||||
}
|
||||
n, err := s.cli.PutObject(s.BucketName, s.getObjectNameString(b), throttled, -1, minio.PutObjectOptions{
|
||||
ContentType: "application/x-kopia",
|
||||
Progress: newProgressReader(progressCallback, b, int64(len(data))),
|
||||
})
|
||||
if err == io.EOF && n == 0 {
|
||||
// special case empty stream
|
||||
_, err = s.cli.PutObject(s.BucketName, s.getObjectNameString(b), bytes.NewBuffer(nil), 0, minio.PutObjectOptions{
|
||||
ContentType: "application/x-kopia",
|
||||
})
|
||||
}
|
||||
|
||||
return translateError(err)
|
||||
}
|
||||
|
||||
func (s *s3Storage) DeleteBlock(ctx context.Context, b string) error {
|
||||
attempt := func() (interface{}, error) {
|
||||
return nil, s.cli.RemoveObject(s.BucketName, s.getObjectNameString(b))
|
||||
}
|
||||
|
||||
_, err := exponentialBackoff(fmt.Sprintf("DeleteBlock(%q)", b), attempt)
|
||||
return translateError(err)
|
||||
}
|
||||
|
||||
func (s *s3Storage) getObjectNameString(b string) string {
|
||||
return s.Prefix + b
|
||||
}
|
||||
|
||||
func (s *s3Storage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
|
||||
oi := s.cli.ListObjects(s.BucketName, s.Prefix+prefix, false, ctx.Done())
|
||||
for o := range oi {
|
||||
if err := o.Err; err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
bm := storage.BlockMetadata{
|
||||
BlockID: o.Key[len(s.Prefix):],
|
||||
Length: o.Size,
|
||||
Timestamp: o.LastModified,
|
||||
}
|
||||
|
||||
if err := callback(bm); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *s3Storage) ConnectionInfo() storage.ConnectionInfo {
|
||||
return storage.ConnectionInfo{
|
||||
Type: s3storageType,
|
||||
Config: &s.Options,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *s3Storage) Close(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *s3Storage) String() string {
|
||||
return fmt.Sprintf("s3://%v/%v", s.BucketName, s.Prefix)
|
||||
}
|
||||
|
||||
type progressReader struct {
|
||||
cb storage.ProgressFunc
|
||||
blockID string
|
||||
completed int64
|
||||
totalLength int64
|
||||
lastReported int64
|
||||
}
|
||||
|
||||
func (r *progressReader) Read(b []byte) (int, error) {
|
||||
r.completed += int64(len(b))
|
||||
if r.completed >= r.lastReported+1000000 && r.completed < r.totalLength {
|
||||
r.cb(r.blockID, r.completed, r.totalLength)
|
||||
r.lastReported = r.completed
|
||||
}
|
||||
return len(b), nil
|
||||
}
|
||||
|
||||
func newProgressReader(cb storage.ProgressFunc, blockID string, totalLength int64) io.Reader {
|
||||
if cb == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &progressReader{cb: cb, blockID: blockID, totalLength: totalLength}
|
||||
}
|
||||
|
||||
func toBandwidth(bytesPerSecond int) iothrottler.Bandwidth {
|
||||
if bytesPerSecond <= 0 {
|
||||
return iothrottler.Unlimited
|
||||
}
|
||||
|
||||
return iothrottler.Bandwidth(bytesPerSecond) * iothrottler.BytesPerSecond
|
||||
}
|
||||
|
||||
// New creates new S3-backed storage with specified options:
|
||||
//
|
||||
// - the 'BucketName' field is required and all other parameters are optional.
|
||||
func New(ctx context.Context, opt *Options) (storage.Storage, error) {
|
||||
if opt.BucketName == "" {
|
||||
return nil, errors.New("bucket name must be specified")
|
||||
}
|
||||
|
||||
cli, err := minio.New(opt.Endpoint, opt.AccessKeyID, opt.SecretAccessKey, !opt.DoNotUseTLS)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to create client: %v", err)
|
||||
}
|
||||
|
||||
downloadThrottler := iothrottler.NewIOThrottlerPool(toBandwidth(opt.MaxDownloadSpeedBytesPerSecond))
|
||||
uploadThrottler := iothrottler.NewIOThrottlerPool(toBandwidth(opt.MaxUploadSpeedBytesPerSecond))
|
||||
|
||||
return &s3Storage{
|
||||
Options: *opt,
|
||||
ctx: ctx,
|
||||
cli: cli,
|
||||
downloadThrottler: downloadThrottler,
|
||||
uploadThrottler: uploadThrottler,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
storage.AddSupportedStorage(
|
||||
s3storageType,
|
||||
func() interface{} {
|
||||
return &Options{}
|
||||
},
|
||||
func(ctx context.Context, o interface{}) (storage.Storage, error) {
|
||||
return New(ctx, o.(*Options))
|
||||
})
|
||||
}
|
||||
116
storage/s3/s3_storage_test.go
Normal file
116
storage/s3/s3_storage_test.go
Normal file
@@ -0,0 +1,116 @@
|
||||
package s3
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"crypto/sha1"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/internal/storagetesting"
|
||||
"github.com/kopia/repo/storage"
|
||||
"github.com/minio/minio-go"
|
||||
)
|
||||
|
||||
// https://github.com/minio/minio-go
|
||||
const (
|
||||
endpoint = "play.minio.io:9000"
|
||||
accessKeyID = "Q3AM3UQ867SPQQA43P2F"
|
||||
secretAccessKey = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG"
|
||||
useSSL = true
|
||||
|
||||
// the test takes a few seconds, delete stuff older than 1h to avoid accumulating cruft
|
||||
cleanupAge = 1 * time.Hour
|
||||
)
|
||||
|
||||
var bucketName = getBucketName()
|
||||
|
||||
func getBucketName() string {
|
||||
hn, err := os.Hostname()
|
||||
if err != nil {
|
||||
return "kopia-test-1"
|
||||
}
|
||||
h := sha1.New()
|
||||
fmt.Fprintf(h, "%v", hn)
|
||||
return fmt.Sprintf("kopia-test-%x", h.Sum(nil)[0:8])
|
||||
}
|
||||
|
||||
func endpointReachable() bool {
|
||||
conn, err := net.DialTimeout("tcp4", endpoint, 5*time.Second)
|
||||
if err == nil {
|
||||
conn.Close()
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func TestS3Storage(t *testing.T) {
|
||||
if !endpointReachable() {
|
||||
t.Skip("endpoint not reachable")
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// recreate per-host bucket, which sometimes get cleaned up by play.minio.io
|
||||
createBucket(t)
|
||||
cleanupOldData(ctx, t)
|
||||
|
||||
data := make([]byte, 8)
|
||||
rand.Read(data) //nolint:errcheck
|
||||
|
||||
st, err := New(context.Background(), &Options{
|
||||
AccessKeyID: accessKeyID,
|
||||
SecretAccessKey: secretAccessKey,
|
||||
Endpoint: endpoint,
|
||||
BucketName: bucketName,
|
||||
Prefix: fmt.Sprintf("test-%v-%x-", time.Now().Unix(), data),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
storagetesting.VerifyStorage(ctx, t, st)
|
||||
storagetesting.AssertConnectionInfoRoundTrips(ctx, t, st)
|
||||
if err := st.Close(ctx); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func createBucket(t *testing.T) {
|
||||
minioClient, err := minio.New(endpoint, accessKeyID, secretAccessKey, useSSL)
|
||||
if err != nil {
|
||||
t.Fatalf("can't initialize minio client: %v", err)
|
||||
}
|
||||
// ignore error
|
||||
_ = minioClient.MakeBucket(bucketName, "us-east-1")
|
||||
}
|
||||
|
||||
func cleanupOldData(ctx context.Context, t *testing.T) {
|
||||
// cleanup old data from the bucket
|
||||
st, err := New(context.Background(), &Options{
|
||||
AccessKeyID: accessKeyID,
|
||||
SecretAccessKey: secretAccessKey,
|
||||
Endpoint: endpoint,
|
||||
BucketName: bucketName,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
_ = st.ListBlocks(ctx, "", func(it storage.BlockMetadata) error {
|
||||
age := time.Since(it.Timestamp)
|
||||
if age > cleanupAge {
|
||||
if err := st.DeleteBlock(ctx, it.BlockID); err != nil {
|
||||
t.Errorf("warning: unable to delete %q: %v", it.BlockID, err)
|
||||
}
|
||||
} else {
|
||||
log.Printf("keeping %v", it.BlockID)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
108
storage/storage.go
Normal file
108
storage/storage.go
Normal file
@@ -0,0 +1,108 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// CancelFunc requests cancellation of a storage operation.
|
||||
type CancelFunc func()
|
||||
|
||||
// Storage encapsulates API for connecting to blob storage.
|
||||
//
|
||||
// The underlying storage system must provide:
|
||||
//
|
||||
// * high durability, availability and bit-rot protection
|
||||
// * read-after-write - block written using PutBlock() must be immediately readable using GetBlock() and ListBlocks()
|
||||
// * atomicity - it mustn't be possible to observe partial results of PutBlock() via either GetBlock() or ListBlocks()
|
||||
// * timestamps that don't go back in time (small clock skew up to minutes is allowed)
|
||||
// * reasonably low latency for retrievals
|
||||
//
|
||||
// The required semantics are provided by existing commercial cloud storage products (Google Cloud, AWS, Azure).
|
||||
type Storage interface {
|
||||
// PutBlock uploads the block with given data to the repository or replaces existing block with the provided
|
||||
// id with given contents.
|
||||
PutBlock(ctx context.Context, id string, data []byte) error
|
||||
|
||||
// DeleteBlock removes the block from storage. Future GetBlock() operations will fail with ErrBlockNotFound.
|
||||
DeleteBlock(ctx context.Context, id string) error
|
||||
|
||||
// GetBlock returns full or partial contents of a block with given ID.
|
||||
// If length>0, the the function retrieves a range of bytes [offset,offset+length)
|
||||
// If length<0, the entire block must be fetched.
|
||||
GetBlock(ctx context.Context, id string, offset, length int64) ([]byte, error)
|
||||
|
||||
// ListBlocks returns a channel of BlockMetadata that describes storage blocks with existing name prefixes.
|
||||
// Iteration continues until all blocks have been listed or until client code invokes the returned cancellation function.
|
||||
ListBlocks(ctx context.Context, prefix string, cb func(bm BlockMetadata) error) error
|
||||
|
||||
// ConnectionInfo returns JSON-serializable data structure containing information required to
|
||||
// connect to storage.
|
||||
ConnectionInfo() ConnectionInfo
|
||||
|
||||
// Close releases all resources associated with storage.
|
||||
Close(ctx context.Context) error
|
||||
}
|
||||
|
||||
// BlockMetadata represents metadata about a single block in a storage.
|
||||
type BlockMetadata struct {
|
||||
BlockID string
|
||||
Length int64
|
||||
Timestamp time.Time
|
||||
}
|
||||
|
||||
// ErrBlockNotFound is returned when a block cannot be found in storage.
|
||||
var ErrBlockNotFound = errors.New("block not found")
|
||||
|
||||
// ListAllBlocks returns BlockMetadata for all blocks in a given storage that have the provided name prefix.
|
||||
func ListAllBlocks(ctx context.Context, st Storage, prefix string) ([]BlockMetadata, error) {
|
||||
var result []BlockMetadata
|
||||
|
||||
err := st.ListBlocks(ctx, prefix, func(bm BlockMetadata) error {
|
||||
result = append(result, bm)
|
||||
return nil
|
||||
})
|
||||
|
||||
return result, err
|
||||
}
|
||||
|
||||
// ListAllBlocksConsistent lists all blocks with given name prefix in the provided storage until the results are
|
||||
// consistent. The results are consistent if the list result fetched twice is identical. This guarantees that while
|
||||
// the first scan was in progress, no new block was added or removed.
|
||||
// maxAttempts specifies maximum number of list attempts (must be >= 2)
|
||||
func ListAllBlocksConsistent(ctx context.Context, st Storage, prefix string, maxAttempts int) ([]BlockMetadata, error) {
|
||||
var previous []BlockMetadata
|
||||
|
||||
for i := 0; i < maxAttempts; i++ {
|
||||
result, err := ListAllBlocks(ctx, st, prefix)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if i > 0 && sameBlocks(result, previous) {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
previous = result
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unable to achieve consistent snapshot despite %v attempts", maxAttempts)
|
||||
}
|
||||
|
||||
// sameBlocks returns true if b1 & b2 contain the same blocks (ignoring order).
|
||||
func sameBlocks(b1, b2 []BlockMetadata) bool {
|
||||
if len(b1) != len(b2) {
|
||||
return false
|
||||
}
|
||||
m := map[string]BlockMetadata{}
|
||||
for _, b := range b1 {
|
||||
m[b.BlockID] = b
|
||||
}
|
||||
for _, b := range b2 {
|
||||
if m[b.BlockID] != b {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
57
storage/storage_test.go
Normal file
57
storage/storage_test.go
Normal file
@@ -0,0 +1,57 @@
|
||||
package storage_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/internal/storagetesting"
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
func TestListAllBlocksConsistent(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
st := storagetesting.NewMapStorage(data, nil, time.Now)
|
||||
st.PutBlock(ctx, "foo1", []byte{1, 2, 3}) //nolint:errcheck
|
||||
st.PutBlock(ctx, "foo2", []byte{1, 2, 3}) //nolint:errcheck
|
||||
st.PutBlock(ctx, "foo3", []byte{1, 2, 3}) //nolint:errcheck
|
||||
|
||||
// set up faulty storage that will add a block while a scan is in progress.
|
||||
f := &storagetesting.FaultyStorage{
|
||||
Base: st,
|
||||
Faults: map[string][]*storagetesting.Fault{
|
||||
"ListBlocksItem": {
|
||||
{ErrCallback: func() error {
|
||||
st.PutBlock(ctx, "foo0", []byte{1, 2, 3}) //nolint:errcheck
|
||||
return nil
|
||||
}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
r, err := storage.ListAllBlocksConsistent(ctx, f, "foo", 3)
|
||||
if err != nil {
|
||||
t.Fatalf("error: %v", err)
|
||||
}
|
||||
|
||||
// make sure we get the list with 4 items, not 3.
|
||||
if got, want := len(r), 4; got != want {
|
||||
t.Errorf("unexpected list result count: %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListAllBlocksConsistentEmpty(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
data := map[string][]byte{}
|
||||
st := storagetesting.NewMapStorage(data, nil, time.Now)
|
||||
|
||||
r, err := storage.ListAllBlocksConsistent(ctx, st, "foo", 3)
|
||||
if err != nil {
|
||||
t.Fatalf("error: %v", err)
|
||||
}
|
||||
|
||||
if got, want := len(r), 0; got != want {
|
||||
t.Errorf("unexpected list result count: %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
17
storage/webdav/webdav_options.go
Normal file
17
storage/webdav/webdav_options.go
Normal file
@@ -0,0 +1,17 @@
|
||||
package webdav
|
||||
|
||||
// Options defines options for Filesystem-backed storage.
|
||||
type Options struct {
|
||||
URL string `json:"url"`
|
||||
DirectoryShards []int `json:"dirShards"`
|
||||
Username string `json:"username,omitempty"`
|
||||
Password string `json:"password,omitempty" kopia:"sensitive"`
|
||||
}
|
||||
|
||||
func (fso *Options) shards() []int {
|
||||
if fso.DirectoryShards == nil {
|
||||
return fsDefaultShards
|
||||
}
|
||||
|
||||
return fso.DirectoryShards
|
||||
}
|
||||
210
storage/webdav/webdav_storage.go
Normal file
210
storage/webdav/webdav_storage.go
Normal file
@@ -0,0 +1,210 @@
|
||||
// Package webdav implements WebDAV-based Storage.
|
||||
package webdav
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/kopia/repo/storage"
|
||||
"github.com/studio-b12/gowebdav"
|
||||
)
|
||||
|
||||
const (
|
||||
davStorageType = "webdav"
|
||||
fsStorageChunkSuffix = ".f"
|
||||
)
|
||||
|
||||
var (
|
||||
fsDefaultShards = []int{3, 3}
|
||||
)
|
||||
|
||||
// davStorage implements blob.Storage on top of remove WebDAV repository.
|
||||
// It is very similar to File storage, except uses HTTP URLs instead of local files.
|
||||
// Storage formats are compatible (both use sharded directory structure), so a repository
|
||||
// may be accessed using WebDAV or File interchangeably.
|
||||
type davStorage struct {
|
||||
Options
|
||||
|
||||
cli *gowebdav.Client
|
||||
}
|
||||
|
||||
func (d *davStorage) GetBlock(ctx context.Context, blockID string, offset, length int64) ([]byte, error) {
|
||||
_, path := d.getDirPathAndFilePath(blockID)
|
||||
|
||||
data, err := d.cli.Read(path)
|
||||
if err != nil {
|
||||
return nil, d.translateError(err)
|
||||
}
|
||||
if length < 0 {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
if int(offset) > len(data) || offset < 0 {
|
||||
return nil, errors.New("invalid offset")
|
||||
}
|
||||
|
||||
data = data[offset:]
|
||||
if int(length) > len(data) {
|
||||
return nil, errors.New("invalid length")
|
||||
}
|
||||
|
||||
return data[0:length], nil
|
||||
}
|
||||
|
||||
func (d *davStorage) translateError(err error) error {
|
||||
switch err := err.(type) {
|
||||
case *os.PathError:
|
||||
switch err.Err.Error() {
|
||||
case "404":
|
||||
return storage.ErrBlockNotFound
|
||||
}
|
||||
return err
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
func getBlockIDFromFileName(name string) (string, bool) {
|
||||
if strings.HasSuffix(name, fsStorageChunkSuffix) {
|
||||
return name[0 : len(name)-len(fsStorageChunkSuffix)], true
|
||||
}
|
||||
|
||||
return "", false
|
||||
}
|
||||
|
||||
func makeFileName(blockID string) string {
|
||||
return blockID + fsStorageChunkSuffix
|
||||
}
|
||||
|
||||
func (d *davStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error {
|
||||
var walkDir func(string, string) error
|
||||
|
||||
walkDir = func(path string, currentPrefix string) error {
|
||||
entries, err := d.cli.ReadDir(gowebdav.FixSlash(path))
|
||||
if err != nil {
|
||||
return fmt.Errorf("read dir error on %v: %v", path, err)
|
||||
}
|
||||
|
||||
sort.Slice(entries, func(i, j int) bool {
|
||||
return entries[i].Name() < entries[j].Name()
|
||||
})
|
||||
|
||||
for _, e := range entries {
|
||||
if e.IsDir() {
|
||||
newPrefix := currentPrefix + e.Name()
|
||||
var match bool
|
||||
|
||||
if len(prefix) > len(newPrefix) {
|
||||
// looking for 'abcd', got 'ab' so far, worth trying
|
||||
match = strings.HasPrefix(prefix, newPrefix)
|
||||
} else {
|
||||
match = strings.HasPrefix(newPrefix, prefix)
|
||||
}
|
||||
|
||||
if match {
|
||||
if err := walkDir(path+"/"+e.Name(), currentPrefix+e.Name()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else if fullID, ok := getBlockIDFromFileName(currentPrefix + e.Name()); ok {
|
||||
if strings.HasPrefix(fullID, prefix) {
|
||||
if err := callback(storage.BlockMetadata{
|
||||
BlockID: fullID,
|
||||
Length: e.Size(),
|
||||
Timestamp: e.ModTime(),
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return walkDir("", "")
|
||||
}
|
||||
|
||||
func (d *davStorage) PutBlock(ctx context.Context, blockID string, data []byte) error {
|
||||
dirPath, filePath := d.getDirPathAndFilePath(blockID)
|
||||
tmpPath := fmt.Sprintf("%v-%v", filePath, rand.Int63())
|
||||
if err := d.translateError(d.cli.Write(tmpPath, data, 0600)); err != nil {
|
||||
if err != storage.ErrBlockNotFound {
|
||||
return err
|
||||
}
|
||||
|
||||
d.cli.MkdirAll(dirPath, 0700) //nolint:errcheck
|
||||
if err = d.translateError(d.cli.Write(tmpPath, data, 0600)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return d.translateError(d.cli.Rename(tmpPath, filePath, true))
|
||||
}
|
||||
|
||||
func (d *davStorage) DeleteBlock(ctx context.Context, blockID string) error {
|
||||
_, filePath := d.getDirPathAndFilePath(blockID)
|
||||
return d.translateError(d.cli.Remove(filePath))
|
||||
}
|
||||
|
||||
func (d *davStorage) getShardDirectory(blockID string) (string, string) {
|
||||
shardPath := "/"
|
||||
if len(blockID) < 20 {
|
||||
return shardPath, blockID
|
||||
}
|
||||
for _, size := range d.shards() {
|
||||
shardPath = filepath.Join(shardPath, blockID[0:size])
|
||||
blockID = blockID[size:]
|
||||
}
|
||||
|
||||
return shardPath, blockID
|
||||
}
|
||||
|
||||
func (d *davStorage) getDirPathAndFilePath(blockID string) (string, string) {
|
||||
shardPath, blockID := d.getShardDirectory(blockID)
|
||||
result := filepath.Join(shardPath, makeFileName(blockID))
|
||||
return shardPath, result
|
||||
}
|
||||
|
||||
func (d *davStorage) ConnectionInfo() storage.ConnectionInfo {
|
||||
return storage.ConnectionInfo{
|
||||
Type: davStorageType,
|
||||
Config: &d.Options,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *davStorage) Close(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// New creates new WebDAV-backed storage in a specified URL.
|
||||
func New(ctx context.Context, opts *Options) (storage.Storage, error) {
|
||||
r := &davStorage{
|
||||
Options: *opts,
|
||||
cli: gowebdav.NewClient(opts.URL, opts.Username, opts.Password),
|
||||
}
|
||||
|
||||
for _, s := range r.shards() {
|
||||
if s == 0 {
|
||||
return nil, fmt.Errorf("invalid shard spec: %v", opts.DirectoryShards)
|
||||
}
|
||||
}
|
||||
|
||||
r.Options.URL = strings.TrimSuffix(r.Options.URL, "/")
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
storage.AddSupportedStorage(
|
||||
davStorageType,
|
||||
func() interface{} { return &Options{} },
|
||||
func(ctx context.Context, o interface{}) (storage.Storage, error) {
|
||||
return New(ctx, o.(*Options))
|
||||
})
|
||||
}
|
||||
65
storage/webdav/webdav_storage_test.go
Normal file
65
storage/webdav/webdav_storage_test.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package webdav
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/net/webdav"
|
||||
|
||||
"github.com/kopia/repo/internal/storagetesting"
|
||||
)
|
||||
|
||||
func TestWebDAVStorage(t *testing.T) {
|
||||
tmpDir, _ := ioutil.TempDir("", "webdav")
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
t.Logf("tmpDir: %v", tmpDir)
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.Handle("/", &webdav.Handler{
|
||||
FileSystem: webdav.Dir(tmpDir),
|
||||
LockSystem: webdav.NewMemLS(),
|
||||
})
|
||||
|
||||
server := httptest.NewServer(mux)
|
||||
defer server.Close()
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Test varioush shard configurations.
|
||||
for _, shardSpec := range [][]int{
|
||||
{1},
|
||||
{3, 3},
|
||||
{2},
|
||||
{1, 1},
|
||||
{1, 2},
|
||||
{2, 2, 2},
|
||||
} {
|
||||
t.Run(fmt.Sprintf("shards-%v", shardSpec), func(t *testing.T) {
|
||||
if err := os.RemoveAll(tmpDir); err != nil {
|
||||
t.Errorf("can't remove all: %q", tmpDir)
|
||||
}
|
||||
os.MkdirAll(tmpDir, 0700) //nolint:errcheck
|
||||
|
||||
r, err := New(context.Background(), &Options{
|
||||
URL: server.URL,
|
||||
DirectoryShards: shardSpec,
|
||||
})
|
||||
|
||||
if r == nil || err != nil {
|
||||
t.Errorf("unexpected result: %v %v", r, err)
|
||||
}
|
||||
|
||||
storagetesting.VerifyStorage(ctx, t, r)
|
||||
storagetesting.AssertConnectionInfoRoundTrips(ctx, t, r)
|
||||
if err := r.Close(ctx); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
BIN
test_service_account.json.enc
Normal file
BIN
test_service_account.json.enc
Normal file
Binary file not shown.
3
tests/repository_stress_test/repository_stress.go
Normal file
3
tests/repository_stress_test/repository_stress.go
Normal file
@@ -0,0 +1,3 @@
|
||||
package repositorystress
|
||||
|
||||
// dummy package
|
||||
319
tests/repository_stress_test/repository_stress_test.go
Normal file
319
tests/repository_stress_test/repository_stress_test.go
Normal file
@@ -0,0 +1,319 @@
|
||||
package repositorystress_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo"
|
||||
"github.com/kopia/repo/block"
|
||||
"github.com/kopia/repo/storage"
|
||||
"github.com/kopia/repo/storage/filesystem"
|
||||
)
|
||||
|
||||
const masterPassword = "foo-bar-baz-1234"
|
||||
|
||||
var (
|
||||
knownBlocks []string
|
||||
knownBlocksMutex sync.Mutex
|
||||
)
|
||||
|
||||
func TestStressRepository(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping stress test during short tests")
|
||||
}
|
||||
ctx := block.UsingListCache(context.Background(), false)
|
||||
|
||||
tmpPath, err := ioutil.TempDir("", "kopia")
|
||||
if err != nil {
|
||||
t.Fatalf("unable to create temp directory")
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if !t.Failed() {
|
||||
os.RemoveAll(tmpPath)
|
||||
}
|
||||
}()
|
||||
|
||||
t.Logf("path: %v", tmpPath)
|
||||
|
||||
storagePath := filepath.Join(tmpPath, "storage")
|
||||
configFile1 := filepath.Join(tmpPath, "kopia1.config")
|
||||
configFile2 := filepath.Join(tmpPath, "kopia2.config")
|
||||
|
||||
assertNoError(t, os.MkdirAll(storagePath, 0700))
|
||||
st, err := filesystem.New(ctx, &filesystem.Options{
|
||||
Path: storagePath,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unable to initialize storage: %v", err)
|
||||
}
|
||||
|
||||
// create repository
|
||||
if err := repo.Initialize(ctx, st, &repo.NewRepositoryOptions{}, masterPassword); err != nil {
|
||||
t.Fatalf("unable to initialize repository: %v", err)
|
||||
}
|
||||
|
||||
// set up two parallel kopia connections, each with its own config file and cache.
|
||||
if err := repo.Connect(ctx, configFile1, st, masterPassword, repo.ConnectOptions{
|
||||
CachingOptions: block.CachingOptions{
|
||||
CacheDirectory: filepath.Join(tmpPath, "cache1"),
|
||||
MaxCacheSizeBytes: 2000000000,
|
||||
},
|
||||
}); err != nil {
|
||||
t.Fatalf("unable to connect 1: %v", err)
|
||||
}
|
||||
|
||||
if err := repo.Connect(ctx, configFile2, st, masterPassword, repo.ConnectOptions{
|
||||
CachingOptions: block.CachingOptions{
|
||||
CacheDirectory: filepath.Join(tmpPath, "cache2"),
|
||||
MaxCacheSizeBytes: 2000000000,
|
||||
},
|
||||
}); err != nil {
|
||||
t.Fatalf("unable to connect 2: %v", err)
|
||||
}
|
||||
|
||||
cancel := make(chan struct{})
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go longLivedRepositoryTest(ctx, t, cancel, configFile1, &wg)
|
||||
wg.Add(1)
|
||||
go longLivedRepositoryTest(ctx, t, cancel, configFile1, &wg)
|
||||
wg.Add(1)
|
||||
go longLivedRepositoryTest(ctx, t, cancel, configFile1, &wg)
|
||||
wg.Add(1)
|
||||
go longLivedRepositoryTest(ctx, t, cancel, configFile1, &wg)
|
||||
wg.Add(1)
|
||||
go longLivedRepositoryTest(ctx, t, cancel, configFile2, &wg)
|
||||
wg.Add(1)
|
||||
go longLivedRepositoryTest(ctx, t, cancel, configFile2, &wg)
|
||||
wg.Add(1)
|
||||
go longLivedRepositoryTest(ctx, t, cancel, configFile2, &wg)
|
||||
wg.Add(1)
|
||||
go longLivedRepositoryTest(ctx, t, cancel, configFile2, &wg)
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
close(cancel)
|
||||
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func longLivedRepositoryTest(ctx context.Context, t *testing.T, cancel chan struct{}, configFile string, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
|
||||
rep, err := repo.Open(ctx, configFile, masterPassword, &repo.Options{})
|
||||
if err != nil {
|
||||
t.Errorf("error opening repository: %v", err)
|
||||
return
|
||||
}
|
||||
defer rep.Close(ctx)
|
||||
|
||||
var wg2 sync.WaitGroup
|
||||
|
||||
for i := 0; i < 4; i++ {
|
||||
wg2.Add(1)
|
||||
go func() {
|
||||
defer wg2.Done()
|
||||
|
||||
repositoryTest(ctx, t, cancel, rep)
|
||||
}()
|
||||
}
|
||||
|
||||
wg2.Wait()
|
||||
}
|
||||
|
||||
func repositoryTest(ctx context.Context, t *testing.T, cancel chan struct{}, rep *repo.Repository) {
|
||||
// reopen := func(t *testing.T, r *repo.Repository) error {
|
||||
// if err := rep.Close(ctx); err != nil {
|
||||
// return fmt.Errorf("error closing: %v", err)
|
||||
// }
|
||||
|
||||
// t0 := time.Now()
|
||||
// rep, err = repo.Open(ctx, configFile, &repo.Options{})
|
||||
// log.Printf("reopened in %v", time.Since(t0))
|
||||
// return err
|
||||
// }
|
||||
|
||||
workTypes := []*struct {
|
||||
name string
|
||||
fun func(ctx context.Context, t *testing.T, r *repo.Repository) error
|
||||
weight int
|
||||
hitCount int
|
||||
}{
|
||||
//{"reopen", reopen, 1, 0},
|
||||
{"writeRandomBlock", writeRandomBlock, 100, 0},
|
||||
{"writeRandomManifest", writeRandomManifest, 100, 0},
|
||||
{"readKnownBlock", readKnownBlock, 500, 0},
|
||||
{"listBlocks", listBlocks, 50, 0},
|
||||
{"listAndReadAllBlocks", listAndReadAllBlocks, 5, 0},
|
||||
{"readRandomManifest", readRandomManifest, 50, 0},
|
||||
{"compact", compact, 1, 0},
|
||||
{"refresh", refresh, 3, 0},
|
||||
{"flush", flush, 1, 0},
|
||||
}
|
||||
|
||||
var totalWeight int
|
||||
for _, w := range workTypes {
|
||||
totalWeight += w.weight
|
||||
}
|
||||
|
||||
iter := 0
|
||||
for {
|
||||
select {
|
||||
case <-cancel:
|
||||
rep.Close(ctx)
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
if iter%1000 == 0 {
|
||||
var bits []string
|
||||
for _, w := range workTypes {
|
||||
bits = append(bits, fmt.Sprintf("%v:%v", w.name, w.hitCount))
|
||||
}
|
||||
log.Printf("#%v %v %v goroutines", iter, strings.Join(bits, " "), runtime.NumGoroutine())
|
||||
}
|
||||
iter++
|
||||
|
||||
roulette := rand.Intn(totalWeight)
|
||||
for _, w := range workTypes {
|
||||
if roulette < w.weight {
|
||||
w.hitCount++
|
||||
//log.Printf("running %v", w.name)
|
||||
if err := w.fun(ctx, t, rep); err != nil {
|
||||
w.hitCount++
|
||||
t.Errorf("error: %v", fmt.Errorf("error running %v: %v", w.name, err))
|
||||
return
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
roulette -= w.weight
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func writeRandomBlock(ctx context.Context, t *testing.T, r *repo.Repository) error {
|
||||
data := make([]byte, 1000)
|
||||
rand.Read(data)
|
||||
blockID, err := r.Blocks.WriteBlock(ctx, data, "")
|
||||
if err == nil {
|
||||
knownBlocksMutex.Lock()
|
||||
if len(knownBlocks) >= 1000 {
|
||||
n := rand.Intn(len(knownBlocks))
|
||||
knownBlocks[n] = blockID
|
||||
} else {
|
||||
knownBlocks = append(knownBlocks, blockID)
|
||||
}
|
||||
knownBlocksMutex.Unlock()
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func readKnownBlock(ctx context.Context, t *testing.T, r *repo.Repository) error {
|
||||
knownBlocksMutex.Lock()
|
||||
if len(knownBlocks) == 0 {
|
||||
knownBlocksMutex.Unlock()
|
||||
return nil
|
||||
}
|
||||
blockID := knownBlocks[rand.Intn(len(knownBlocks))]
|
||||
knownBlocksMutex.Unlock()
|
||||
|
||||
_, err := r.Blocks.GetBlock(ctx, blockID)
|
||||
if err == nil || err == storage.ErrBlockNotFound {
|
||||
return nil
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func listBlocks(ctx context.Context, t *testing.T, r *repo.Repository) error {
|
||||
_, err := r.Blocks.ListBlocks("")
|
||||
return err
|
||||
}
|
||||
|
||||
func listAndReadAllBlocks(ctx context.Context, t *testing.T, r *repo.Repository) error {
|
||||
blocks, err := r.Blocks.ListBlocks("")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, bi := range blocks {
|
||||
_, err := r.Blocks.GetBlock(ctx, bi)
|
||||
if err != nil {
|
||||
if err == storage.ErrBlockNotFound && strings.HasPrefix(bi, "m") {
|
||||
// this is ok, sometimes manifest manager will perform compaction and 'm' blocks will be marked as deleted
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("error reading block %v: %v", bi, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func compact(ctx context.Context, t *testing.T, r *repo.Repository) error {
|
||||
return r.Blocks.CompactIndexes(ctx, block.CompactOptions{
|
||||
MinSmallBlocks: 1,
|
||||
MaxSmallBlocks: 1,
|
||||
})
|
||||
}
|
||||
|
||||
func flush(ctx context.Context, t *testing.T, r *repo.Repository) error {
|
||||
return r.Flush(ctx)
|
||||
}
|
||||
|
||||
func refresh(ctx context.Context, t *testing.T, r *repo.Repository) error {
|
||||
return r.Refresh(ctx)
|
||||
}
|
||||
|
||||
func readRandomManifest(ctx context.Context, t *testing.T, r *repo.Repository) error {
|
||||
manifests, err := r.Manifests.Find(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(manifests) == 0 {
|
||||
return nil
|
||||
}
|
||||
n := rand.Intn(len(manifests))
|
||||
_, err = r.Manifests.GetRaw(ctx, manifests[n].ID)
|
||||
return err
|
||||
}
|
||||
|
||||
func writeRandomManifest(ctx context.Context, t *testing.T, r *repo.Repository) error {
|
||||
key1 := fmt.Sprintf("key-%v", rand.Intn(10))
|
||||
key2 := fmt.Sprintf("key-%v", rand.Intn(10))
|
||||
val1 := fmt.Sprintf("val1-%v", rand.Intn(10))
|
||||
val2 := fmt.Sprintf("val2-%v", rand.Intn(10))
|
||||
content1 := fmt.Sprintf("content-%v", rand.Intn(10))
|
||||
content2 := fmt.Sprintf("content-%v", rand.Intn(10))
|
||||
content1val := fmt.Sprintf("val1-%v", rand.Intn(10))
|
||||
content2val := fmt.Sprintf("val2-%v", rand.Intn(10))
|
||||
_, err := r.Manifests.Put(ctx, map[string]string{
|
||||
"type": key1,
|
||||
key1: val1,
|
||||
key2: val2,
|
||||
}, map[string]string{
|
||||
content1: content1val,
|
||||
content2: content2val,
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func assertNoError(t *testing.T, err error) {
|
||||
t.Helper()
|
||||
if err != nil {
|
||||
t.Errorf("err: %v", err)
|
||||
}
|
||||
}
|
||||
3
tests/stress_test/stress.go
Normal file
3
tests/stress_test/stress.go
Normal file
@@ -0,0 +1,3 @@
|
||||
package stress
|
||||
|
||||
// dummy package
|
||||
132
tests/stress_test/stress_test.go
Normal file
132
tests/stress_test/stress_test.go
Normal file
@@ -0,0 +1,132 @@
|
||||
package stress_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/kopia/repo/block"
|
||||
"github.com/kopia/repo/internal/storagetesting"
|
||||
"github.com/kopia/repo/storage"
|
||||
)
|
||||
|
||||
const goroutineCount = 16
|
||||
|
||||
func TestStressBlockManager(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping stress test during short tests")
|
||||
}
|
||||
|
||||
data := map[string][]byte{}
|
||||
keyTimes := map[string]time.Time{}
|
||||
memst := storagetesting.NewMapStorage(data, keyTimes, time.Now)
|
||||
|
||||
var duration = 3 * time.Second
|
||||
if os.Getenv("KOPIA_LONG_STRESS_TEST") != "" {
|
||||
duration = 3 * time.Minute
|
||||
}
|
||||
|
||||
stressTestWithStorage(t, memst, duration)
|
||||
}
|
||||
|
||||
func stressTestWithStorage(t *testing.T, st storage.Storage, duration time.Duration) {
|
||||
ctx := context.Background()
|
||||
|
||||
openMgr := func() (*block.Manager, error) {
|
||||
return block.NewManager(ctx, st, block.FormattingOptions{
|
||||
Version: 1,
|
||||
Hash: "HMAC-SHA256-128",
|
||||
Encryption: "AES-256-CTR",
|
||||
MaxPackSize: 20000000,
|
||||
MasterKey: []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
}, block.CachingOptions{}, nil)
|
||||
}
|
||||
|
||||
seed0 := time.Now().Nanosecond()
|
||||
|
||||
t.Logf("running with seed %v", seed0)
|
||||
|
||||
deadline := time.Now().Add(duration)
|
||||
|
||||
t.Run("workers", func(t *testing.T) {
|
||||
for i := 0; i < goroutineCount; i++ {
|
||||
i := i
|
||||
t.Run(fmt.Sprintf("worker-%v", i), func(t *testing.T) {
|
||||
t.Parallel()
|
||||
stressWorker(ctx, t, deadline, i, openMgr, int64(seed0+i))
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func stressWorker(ctx context.Context, t *testing.T, deadline time.Time, workerID int, openMgr func() (*block.Manager, error), seed int64) {
|
||||
src := rand.NewSource(seed)
|
||||
rand := rand.New(src)
|
||||
|
||||
bm, err := openMgr()
|
||||
if err != nil {
|
||||
t.Fatalf("error opening manager: %v", err)
|
||||
}
|
||||
|
||||
type writtenBlock struct {
|
||||
contentID string
|
||||
data []byte
|
||||
}
|
||||
|
||||
var workerBlocks []writtenBlock
|
||||
|
||||
for time.Now().Before(deadline) {
|
||||
l := rand.Intn(30000)
|
||||
data := make([]byte, l)
|
||||
if _, err := rand.Read(data); err != nil {
|
||||
t.Errorf("err: %v", err)
|
||||
return
|
||||
}
|
||||
dataCopy := append([]byte{}, data...)
|
||||
contentID, err := bm.WriteBlock(ctx, data, "")
|
||||
if err != nil {
|
||||
t.Errorf("err: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
switch rand.Intn(20) {
|
||||
case 0:
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
return
|
||||
}
|
||||
case 1:
|
||||
if err := bm.Flush(ctx); err != nil {
|
||||
t.Errorf("flush error: %v", err)
|
||||
return
|
||||
}
|
||||
bm, err = openMgr()
|
||||
if err != nil {
|
||||
t.Errorf("error opening: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
//log.Printf("wrote %v", contentID)
|
||||
workerBlocks = append(workerBlocks, writtenBlock{contentID, dataCopy})
|
||||
if len(workerBlocks) > 5 {
|
||||
pos := rand.Intn(len(workerBlocks))
|
||||
previous := workerBlocks[pos]
|
||||
//log.Printf("reading %v", previous.contentID)
|
||||
d2, err := bm.GetBlock(ctx, previous.contentID)
|
||||
if err != nil {
|
||||
t.Errorf("error verifying block %q: %v", previous.contentID, err)
|
||||
return
|
||||
}
|
||||
if !reflect.DeepEqual(previous.data, d2) {
|
||||
t.Errorf("invalid previous data for %q %x %x", previous.contentID, d2, previous.data)
|
||||
return
|
||||
}
|
||||
workerBlocks = append(workerBlocks[0:pos], workerBlocks[pos+1:]...)
|
||||
}
|
||||
}
|
||||
}
|
||||
35
upgrade.go
Normal file
35
upgrade.go
Normal file
@@ -0,0 +1,35 @@
|
||||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// Upgrade upgrades repository data structures to the latest version.
|
||||
func (r *Repository) Upgrade(ctx context.Context) error {
|
||||
f := r.formatBlock
|
||||
|
||||
log.Debug("decrypting format...")
|
||||
repoConfig, err := f.decryptFormatBytes(r.masterKey)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "unable to decrypt repository config")
|
||||
}
|
||||
|
||||
var migrated bool
|
||||
|
||||
// TODO(jkowalski): add migration code here
|
||||
if !migrated {
|
||||
log.Infof("nothing to do")
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Debug("encrypting format...")
|
||||
if err := encryptFormatBytes(f, repoConfig, r.masterKey, f.UniqueID); err != nil {
|
||||
return fmt.Errorf("unable to encrypt format bytes")
|
||||
}
|
||||
|
||||
log.Infof("writing updated format block...")
|
||||
return writeFormatBlock(ctx, r.Storage, f)
|
||||
}
|
||||
Reference in New Issue
Block a user