From 45101e3eca28909e66eaabb5c6d3427742d54c84 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Fri, 26 Oct 2018 17:28:49 -0700 Subject: [PATCH 01/74] Initial commit --- .gitignore | 12 ++++ LICENSE | 201 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..f1c181ec9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, build with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. From e458ee24d89d5c60ce4aab7bf79bac8c7e32d482 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Fri, 26 Oct 2018 17:33:58 -0700 Subject: [PATCH 02/74] imported github.com/kopia/kopia/repo and renamed package path to github.com/kopia/repo/ --- .gitignore | 1 + block/block_cache.go | 216 ++++ block/block_cache_test.go | 130 +++ block/block_formatter.go | 120 ++ block/block_formatter_test.go | 48 + block/block_formatting_options.go | 10 + block/block_index_recovery.go | 227 ++++ block/block_manager.go | 1018 +++++++++++++++++ block/block_manager_compaction.go | 148 +++ block/block_manager_test.go | 819 +++++++++++++ block/cache_hmac.go | 33 + block/caching_options.go | 10 + block/committed_block_index.go | 139 +++ block/committed_block_index_disk_cache.go | 135 +++ block/committed_block_index_mem_cache.go | 50 + block/context.go | 34 + block/list_cache.go | 123 ++ block/stats.go | 25 + connect.go | 111 ++ crypto_key_derivation.go | 37 + doc.go | 2 + format_block.go | 152 +++ initialize.go | 134 +++ internal/jsonstream/doc.go | 2 + internal/jsonstream/reader.go | 112 ++ internal/jsonstream/stream_test.go | 117 ++ internal/jsonstream/writer.go | 59 + internal/packindex/builder.go | 152 +++ internal/packindex/content_id_to_bytes.go | 38 + internal/packindex/format.go | 74 ++ internal/packindex/index.go | 196 ++++ internal/packindex/info.go | 22 + internal/packindex/merged.go | 132 +++ internal/packindex/merged_test.go | 94 ++ internal/packindex/packindex_internal_test.go | 26 + internal/packindex/packindex_test.go | 237 ++++ internal/packindex/subset.go | 28 + internal/packindex/subset_test.go | 60 + internal/repologging/logging.go | 9 + internal/repotesting/repotesting.go | 131 +++ internal/retry/retry.go | 44 + internal/storagetesting/asserts.go | 84 ++ internal/storagetesting/doc.go | 2 + internal/storagetesting/faulty.go | 115 ++ internal/storagetesting/map.go | 118 ++ internal/storagetesting/map_test.go | 15 + internal/storagetesting/verify.go | 58 + internal/throttle/round_tripper.go | 44 + internal/throttle/round_tripper_test.go | 103 ++ local_config.go | 56 + manifest/manifest_entry.go | 21 + manifest/manifest_manager.go | 514 +++++++++ manifest/manifest_manager_test.go | 193 ++++ manifest/serialized.go | 18 + object/indirect.go | 10 + object/object_manager.go | 280 +++++ object/object_manager_test.go | 354 ++++++ object/object_reader.go | 159 +++ object/object_splitter.go | 110 ++ object/object_splitter_test.go | 134 +++ object/object_writer.go | 176 +++ object/objectid.go | 94 ++ object/objectid_test.go | 48 + object/semaphore.go | 12 + object/writeback.go | 37 + open.go | 194 ++++ repository.go | 90 ++ storage/config.go | 47 + storage/doc.go | 2 + storage/filesystem/filesystem_options.go | 40 + storage/filesystem/filesystem_storage.go | 241 ++++ storage/filesystem/filesystem_storage_test.go | 110 ++ storage/gcs/gcs_options.go | 20 + storage/gcs/gcs_storage.go | 254 ++++ storage/logging/logging_storage.go | 96 ++ storage/logging/logging_storage_test.go | 17 + storage/progress.go | 21 + storage/providers/providers.go | 8 + storage/registry.go | 39 + storage/s3/s3_options.go | 20 + storage/s3/s3_storage.go | 241 ++++ storage/s3/s3_storage_test.go | 112 ++ storage/storage.go | 108 ++ storage/storage_test.go | 57 + storage/webdav/webdav_options.go | 17 + storage/webdav/webdav_storage.go | 210 ++++ storage/webdav/webdav_storage_test.go | 62 + .../repository_stress.go | 3 + .../repository_stress_test.go | 316 +++++ tests/repository_test/repository.go | 3 + tests/repository_test/repository_test.go | 314 +++++ tests/stress_test/stress.go | 3 + tests/stress_test/stress_test.go | 131 +++ 93 files changed, 10786 insertions(+) create mode 100644 block/block_cache.go create mode 100644 block/block_cache_test.go create mode 100644 block/block_formatter.go create mode 100644 block/block_formatter_test.go create mode 100644 block/block_formatting_options.go create mode 100644 block/block_index_recovery.go create mode 100644 block/block_manager.go create mode 100644 block/block_manager_compaction.go create mode 100644 block/block_manager_test.go create mode 100644 block/cache_hmac.go create mode 100644 block/caching_options.go create mode 100644 block/committed_block_index.go create mode 100644 block/committed_block_index_disk_cache.go create mode 100644 block/committed_block_index_mem_cache.go create mode 100644 block/context.go create mode 100644 block/list_cache.go create mode 100644 block/stats.go create mode 100644 connect.go create mode 100644 crypto_key_derivation.go create mode 100644 doc.go create mode 100644 format_block.go create mode 100644 initialize.go create mode 100644 internal/jsonstream/doc.go create mode 100644 internal/jsonstream/reader.go create mode 100644 internal/jsonstream/stream_test.go create mode 100644 internal/jsonstream/writer.go create mode 100644 internal/packindex/builder.go create mode 100644 internal/packindex/content_id_to_bytes.go create mode 100644 internal/packindex/format.go create mode 100644 internal/packindex/index.go create mode 100644 internal/packindex/info.go create mode 100644 internal/packindex/merged.go create mode 100644 internal/packindex/merged_test.go create mode 100644 internal/packindex/packindex_internal_test.go create mode 100644 internal/packindex/packindex_test.go create mode 100644 internal/packindex/subset.go create mode 100644 internal/packindex/subset_test.go create mode 100644 internal/repologging/logging.go create mode 100644 internal/repotesting/repotesting.go create mode 100644 internal/retry/retry.go create mode 100644 internal/storagetesting/asserts.go create mode 100644 internal/storagetesting/doc.go create mode 100644 internal/storagetesting/faulty.go create mode 100644 internal/storagetesting/map.go create mode 100644 internal/storagetesting/map_test.go create mode 100644 internal/storagetesting/verify.go create mode 100644 internal/throttle/round_tripper.go create mode 100644 internal/throttle/round_tripper_test.go create mode 100644 local_config.go create mode 100644 manifest/manifest_entry.go create mode 100644 manifest/manifest_manager.go create mode 100644 manifest/manifest_manager_test.go create mode 100644 manifest/serialized.go create mode 100644 object/indirect.go create mode 100644 object/object_manager.go create mode 100644 object/object_manager_test.go create mode 100644 object/object_reader.go create mode 100644 object/object_splitter.go create mode 100644 object/object_splitter_test.go create mode 100644 object/object_writer.go create mode 100644 object/objectid.go create mode 100644 object/objectid_test.go create mode 100644 object/semaphore.go create mode 100644 object/writeback.go create mode 100644 open.go create mode 100644 repository.go create mode 100644 storage/config.go create mode 100644 storage/doc.go create mode 100644 storage/filesystem/filesystem_options.go create mode 100644 storage/filesystem/filesystem_storage.go create mode 100644 storage/filesystem/filesystem_storage_test.go create mode 100644 storage/gcs/gcs_options.go create mode 100644 storage/gcs/gcs_storage.go create mode 100644 storage/logging/logging_storage.go create mode 100644 storage/logging/logging_storage_test.go create mode 100644 storage/progress.go create mode 100644 storage/providers/providers.go create mode 100644 storage/registry.go create mode 100644 storage/s3/s3_options.go create mode 100644 storage/s3/s3_storage.go create mode 100644 storage/s3/s3_storage_test.go create mode 100644 storage/storage.go create mode 100644 storage/storage_test.go create mode 100644 storage/webdav/webdav_options.go create mode 100644 storage/webdav/webdav_storage.go create mode 100644 storage/webdav/webdav_storage_test.go create mode 100644 tests/repository_stress_test/repository_stress.go create mode 100644 tests/repository_stress_test/repository_stress_test.go create mode 100644 tests/repository_test/repository.go create mode 100644 tests/repository_test/repository_test.go create mode 100644 tests/stress_test/stress.go create mode 100644 tests/stress_test/stress_test.go diff --git a/.gitignore b/.gitignore index f1c181ec9..28bb98eeb 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ # Output of the go coverage tool, specifically when used with LiteIDE *.out +*.cov diff --git a/block/block_cache.go b/block/block_cache.go new file mode 100644 index 000000000..d3e8c174e --- /dev/null +++ b/block/block_cache.go @@ -0,0 +1,216 @@ +package block + +import ( + "container/heap" + "context" + "fmt" + "os" + "path/filepath" + "sync" + "time" + + "github.com/kopia/repo/storage" + "github.com/kopia/repo/storage/filesystem" +) + +const ( + sweepCacheFrequency = 1 * time.Minute + touchThreshold = 10 * time.Minute +) + +type blockCache struct { + st storage.Storage + cacheStorage storage.Storage + maxSizeBytes int64 + hmacSecret []byte + + mu sync.Mutex + lastTotalSizeBytes int64 + + closed chan struct{} +} + +type blockToucher interface { + TouchBlock(ctx context.Context, blockID string, threshold time.Duration) error +} + +func adjustCacheKey(cacheKey string) string { + // block IDs with odd length have a single-byte prefix. + // move the prefix to the end of cache key to make sure the top level shard is spread 256 ways. + if len(cacheKey)%2 == 1 { + return cacheKey[1:] + cacheKey[0:1] + } + + return cacheKey +} + +func (c *blockCache) getContentBlock(ctx context.Context, cacheKey string, physicalBlockID string, offset, length int64) ([]byte, error) { + cacheKey = adjustCacheKey(cacheKey) + + useCache := shouldUseBlockCache(ctx) && c.cacheStorage != nil + if useCache { + if b := c.readAndVerifyCacheBlock(ctx, cacheKey); b != nil { + return b, nil + } + } + + b, err := c.st.GetBlock(ctx, physicalBlockID, offset, length) + if err == storage.ErrBlockNotFound { + // not found in underlying storage + return nil, err + } + + if err == nil && useCache { + if puterr := c.cacheStorage.PutBlock(ctx, cacheKey, appendHMAC(b, c.hmacSecret)); puterr != nil { + log.Warningf("unable to write cache item %v: %v", cacheKey, puterr) + } + } + + return b, err +} + +func (c *blockCache) readAndVerifyCacheBlock(ctx context.Context, cacheKey string) []byte { + b, err := c.cacheStorage.GetBlock(ctx, cacheKey, 0, -1) + if err == nil { + b, err = verifyAndStripHMAC(b, c.hmacSecret) + if err == nil { + if t, ok := c.cacheStorage.(blockToucher); ok { + t.TouchBlock(ctx, cacheKey, touchThreshold) //nolint:errcheck + } + + // retrieved from cache and HMAC valid + return b + } + + // ignore malformed blocks + log.Warningf("malformed block %v: %v", cacheKey, err) + return nil + } + + if err != storage.ErrBlockNotFound { + log.Warningf("unable to read cache %v: %v", cacheKey, err) + } + return nil +} + +func (c *blockCache) close() { + close(c.closed) +} + +func (c *blockCache) sweepDirectoryPeriodically(ctx context.Context) { + for { + select { + case <-c.closed: + return + + case <-time.After(sweepCacheFrequency): + err := c.sweepDirectory(ctx) + if err != nil { + log.Warningf("blockCache sweep failed: %v", err) + } + } + } +} + +// A blockMetadataHeap implements heap.Interface and holds storage.BlockMetadata. +type blockMetadataHeap []storage.BlockMetadata + +func (h blockMetadataHeap) Len() int { return len(h) } + +func (h blockMetadataHeap) Less(i, j int) bool { + return h[i].Timestamp.Before(h[j].Timestamp) +} + +func (h blockMetadataHeap) Swap(i, j int) { + h[i], h[j] = h[j], h[i] +} + +func (h *blockMetadataHeap) Push(x interface{}) { + *h = append(*h, x.(storage.BlockMetadata)) +} + +func (h *blockMetadataHeap) Pop() interface{} { + old := *h + n := len(old) + item := old[n-1] + *h = old[0 : n-1] + return item +} + +func (c *blockCache) sweepDirectory(ctx context.Context) (err error) { + c.mu.Lock() + defer c.mu.Unlock() + + if c.cacheStorage == nil { + return nil + } + + t0 := time.Now() + + var h blockMetadataHeap + var totalRetainedSize int64 + + err = c.cacheStorage.ListBlocks(ctx, "", func(it storage.BlockMetadata) error { + heap.Push(&h, it) + totalRetainedSize += it.Length + + if totalRetainedSize > c.maxSizeBytes { + oldest := heap.Pop(&h).(storage.BlockMetadata) + if delerr := c.cacheStorage.DeleteBlock(ctx, it.BlockID); delerr != nil { + log.Warningf("unable to remove %v: %v", it.BlockID, delerr) + } else { + totalRetainedSize -= oldest.Length + } + } + return nil + }) + if err != nil { + return fmt.Errorf("error listing cache: %v", err) + } + + log.Debugf("finished sweeping directory in %v and retained %v/%v bytes (%v %%)", time.Since(t0), totalRetainedSize, c.maxSizeBytes, 100*totalRetainedSize/c.maxSizeBytes) + c.lastTotalSizeBytes = totalRetainedSize + return nil +} + +func newBlockCache(ctx context.Context, st storage.Storage, caching CachingOptions) (*blockCache, error) { + var cacheStorage storage.Storage + var err error + + if caching.MaxCacheSizeBytes > 0 && caching.CacheDirectory != "" { + blockCacheDir := filepath.Join(caching.CacheDirectory, "blocks") + + if _, err = os.Stat(blockCacheDir); os.IsNotExist(err) { + if err = os.MkdirAll(blockCacheDir, 0700); err != nil { + return nil, err + } + } + + cacheStorage, err = filesystem.New(context.Background(), &filesystem.Options{ + Path: blockCacheDir, + DirectoryShards: []int{2}, + }) + if err != nil { + return nil, err + } + } + + return newBlockCacheWithCacheStorage(ctx, st, cacheStorage, caching) +} + +func newBlockCacheWithCacheStorage(ctx context.Context, st, cacheStorage storage.Storage, caching CachingOptions) (*blockCache, error) { + c := &blockCache{ + st: st, + cacheStorage: cacheStorage, + maxSizeBytes: caching.MaxCacheSizeBytes, + hmacSecret: append([]byte(nil), caching.HMACSecret...), + closed: make(chan struct{}), + } + + if err := c.sweepDirectory(ctx); err != nil { + return nil, err + } + go c.sweepDirectoryPeriodically(ctx) + + return c, nil +} diff --git a/block/block_cache_test.go b/block/block_cache_test.go new file mode 100644 index 000000000..dc2860654 --- /dev/null +++ b/block/block_cache_test.go @@ -0,0 +1,130 @@ +package block + +import ( + "context" + "fmt" + "io/ioutil" + "os" + "reflect" + "sort" + "testing" + + "github.com/kopia/repo/internal/storagetesting" + "github.com/kopia/repo/storage" +) + +func newUnderlyingStorageForBlockCacheTesting() storage.Storage { + ctx := context.Background() + data := map[string][]byte{} + st := storagetesting.NewMapStorage(data, nil, nil) + st.PutBlock(ctx, "block-1", []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + return st +} + +func TestInMemoryBlockCache(t *testing.T) { + cacheData := map[string][]byte{} + cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil) + + cache, err := newBlockCacheWithCacheStorage(context.Background(), newUnderlyingStorageForBlockCacheTesting(), cacheStorage, CachingOptions{ + MaxCacheSizeBytes: 10000, + }) + if err != nil { + t.Fatalf("err: %v", err) + } + verifyBlockCache(t, cache) +} + +func TestDiskBlockCache(t *testing.T) { + ctx := context.Background() + + tmpDir, err := ioutil.TempDir("", "kopia") + if err != nil { + t.Fatalf("error getting temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + cache, err := newBlockCache(ctx, newUnderlyingStorageForBlockCacheTesting(), CachingOptions{ + MaxCacheSizeBytes: 10000, + CacheDirectory: tmpDir, + }) + + if err != nil { + t.Fatalf("err: %v", err) + } + verifyBlockCache(t, cache) +} +func verifyBlockCache(t *testing.T, cache *blockCache) { + ctx := context.Background() + defer cache.close() + + t.Run("GetContentBlock", func(t *testing.T) { + cases := []struct { + cacheKey string + physicalBlockID string + offset int64 + length int64 + + expected []byte + err error + }{ + {"xf0f0f1", "block-1", 1, 5, []byte{2, 3, 4, 5, 6}, nil}, + {"xf0f0f2", "block-1", 0, -1, []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil}, + {"xf0f0f1", "block-1", 1, 5, []byte{2, 3, 4, 5, 6}, nil}, + {"xf0f0f2", "block-1", 0, -1, []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil}, + {"xf0f0f3", "no-such-block", 0, -1, nil, storage.ErrBlockNotFound}, + {"xf0f0f4", "no-such-block", 10, 5, nil, storage.ErrBlockNotFound}, + {"f0f0f5", "block-1", 7, 10, []byte{8, 9, 10}, nil}, + {"xf0f0f6", "block-1", 11, 10, nil, fmt.Errorf("invalid offset")}, + {"xf0f0f6", "block-1", -1, 5, nil, fmt.Errorf("invalid offset")}, + } + + for _, tc := range cases { + v, err := cache.getContentBlock(ctx, tc.cacheKey, tc.physicalBlockID, tc.offset, tc.length) + if !reflect.DeepEqual(err, tc.err) { + t.Errorf("unexpected error for %v: %+v, wanted %+v", tc.cacheKey, err, tc.err) + } + if !reflect.DeepEqual(v, tc.expected) { + t.Errorf("unexpected data for %v: %x, wanted %x", tc.cacheKey, v, tc.expected) + } + } + + verifyStorageBlockList(t, cache.cacheStorage, "f0f0f1x", "f0f0f2x", "f0f0f5") + }) + + t.Run("DataCorruption", func(t *testing.T) { + cacheKey := "f0f0f1x" + d, err := cache.cacheStorage.GetBlock(ctx, cacheKey, 0, -1) + if err != nil { + t.Fatalf("unable to retrieve data from cache: %v", err) + } + + // corrupt the data and write back + d[0] ^= 1 + + if err := cache.cacheStorage.PutBlock(ctx, cacheKey, d); err != nil { + t.Fatalf("unable to write corrupted block: %v", err) + } + + v, err := cache.getContentBlock(ctx, "xf0f0f1", "block-1", 1, 5) + if err != nil { + t.Fatalf("error in getContentBlock: %v", err) + } + if got, want := v, []byte{2, 3, 4, 5, 6}; !reflect.DeepEqual(v, want) { + t.Errorf("invalid result when reading corrupted data: %v, wanted %v", got, want) + } + }) +} + +func verifyStorageBlockList(t *testing.T, st storage.Storage, expectedBlocks ...string) { + t.Helper() + var foundBlocks []string + st.ListBlocks(context.Background(), "", func(bm storage.BlockMetadata) error { + foundBlocks = append(foundBlocks, bm.BlockID) + return nil + }) + + sort.Strings(foundBlocks) + if !reflect.DeepEqual(foundBlocks, expectedBlocks) { + t.Errorf("unexpected block list: %v, wanted %v", foundBlocks, expectedBlocks) + } +} diff --git a/block/block_formatter.go b/block/block_formatter.go new file mode 100644 index 000000000..cc7297bdf --- /dev/null +++ b/block/block_formatter.go @@ -0,0 +1,120 @@ +package block + +import ( + "crypto/aes" + "crypto/cipher" + "crypto/hmac" //nolint:gas + "crypto/sha256" + "fmt" + "hash" + "sort" +) + +// Formatter performs data block ID computation and encryption of a block of data when storing object in a repository. +type Formatter interface { + // ComputeBlockID computes ID of the storage block for the specified block of data and returns it in ObjectID. + ComputeBlockID(data []byte) []byte + + // Encrypt returns encrypted bytes corresponding to the given plaintext. Must not clobber the input slice. + Encrypt(plainText []byte, blockID []byte) ([]byte, error) + + // Decrypt returns unencrypted bytes corresponding to the given ciphertext. Must not clobber the input slice. + Decrypt(cipherText []byte, blockID []byte) ([]byte, error) +} + +// digestFunction computes the digest (hash, optionally HMAC) of a given block of bytes. +type digestFunction func([]byte) []byte + +// unencryptedFormat implements non-encrypted format. +type unencryptedFormat struct { + digestFunc digestFunction +} + +func (fi *unencryptedFormat) ComputeBlockID(data []byte) []byte { + return fi.digestFunc(data) +} + +func (fi *unencryptedFormat) Encrypt(plainText []byte, blockID []byte) ([]byte, error) { + return cloneBytes(plainText), nil +} + +func (fi *unencryptedFormat) Decrypt(cipherText []byte, blockID []byte) ([]byte, error) { + return cloneBytes(cipherText), nil +} + +// syntheticIVEncryptionFormat implements encrypted format with single master AES key and StorageBlock==IV that's +// derived from HMAC-SHA256(content, secret). +type syntheticIVEncryptionFormat struct { + digestFunc digestFunction + createCipher func(key []byte) (cipher.Block, error) + aesKey []byte +} + +func (fi *syntheticIVEncryptionFormat) ComputeBlockID(data []byte) []byte { + return fi.digestFunc(data) +} + +func (fi *syntheticIVEncryptionFormat) Encrypt(plainText []byte, blockID []byte) ([]byte, error) { + return symmetricEncrypt(fi.createCipher, fi.aesKey, blockID, plainText) +} + +func (fi *syntheticIVEncryptionFormat) Decrypt(cipherText []byte, blockID []byte) ([]byte, error) { + return symmetricEncrypt(fi.createCipher, fi.aesKey, blockID, cipherText) +} + +func symmetricEncrypt(createCipher func(key []byte) (cipher.Block, error), key []byte, iv []byte, b []byte) ([]byte, error) { + blockCipher, err := createCipher(key) + if err != nil { + return nil, err + } + + ctr := cipher.NewCTR(blockCipher, iv[0:blockCipher.BlockSize()]) + result := make([]byte, len(b)) + ctr.XORKeyStream(result, b) + return result, nil +} + +// SupportedFormats is a list of supported object formats including: +// +// UNENCRYPTED_HMAC_SHA256_128 - unencrypted, block IDs are 128-bit (32 characters long) +// UNENCRYPTED_HMAC_SHA256 - unencrypted, block IDs are 256-bit (64 characters long) +// ENCRYPTED_HMAC_SHA256_AES256_SIV - encrypted with AES-256 (shared key), IV==FOLD(HMAC-SHA256(content), 128) +var SupportedFormats []string + +// FormatterFactories maps known block formatters to their factory functions. +var FormatterFactories map[string]func(f FormattingOptions) (Formatter, error) + +func init() { + FormatterFactories = map[string]func(f FormattingOptions) (Formatter, error){ + "UNENCRYPTED_HMAC_SHA256": func(f FormattingOptions) (Formatter, error) { + return &unencryptedFormat{computeHMAC(sha256.New, f.HMACSecret, sha256.Size)}, nil + }, + "UNENCRYPTED_HMAC_SHA256_128": func(f FormattingOptions) (Formatter, error) { + return &unencryptedFormat{computeHMAC(sha256.New, f.HMACSecret, 16)}, nil + }, + "ENCRYPTED_HMAC_SHA256_AES256_SIV": func(f FormattingOptions) (Formatter, error) { + if len(f.MasterKey) < 32 { + return nil, fmt.Errorf("master key is not set") + } + return &syntheticIVEncryptionFormat{computeHMAC(sha256.New, f.HMACSecret, aes.BlockSize), aes.NewCipher, f.MasterKey}, nil + }, + } + + for formatName := range FormatterFactories { + SupportedFormats = append(SupportedFormats, formatName) + } + + sort.Strings(SupportedFormats) +} + +// DefaultFormat is the block format that should be used by default when creating new repositories. +const DefaultFormat = "ENCRYPTED_HMAC_SHA256_AES256_SIV" + +// computeHMAC returns a digestFunction that computes HMAC(hash, secret) of a given block of bytes and truncates results to the given size. +func computeHMAC(hf func() hash.Hash, secret []byte, truncate int) digestFunction { + return func(b []byte) []byte { + h := hmac.New(hf, secret) + h.Write(b) // nolint:errcheck + return h.Sum(nil)[0:truncate] + } +} diff --git a/block/block_formatter_test.go b/block/block_formatter_test.go new file mode 100644 index 000000000..59bcbdafe --- /dev/null +++ b/block/block_formatter_test.go @@ -0,0 +1,48 @@ +package block + +import ( + "bytes" + "crypto/rand" + "crypto/sha1" + "testing" +) + +func TestFormatters(t *testing.T) { + secret := []byte("secret") + f := FormattingOptions{HMACSecret: secret, MasterKey: make([]byte, 32)} + + for k, v := range FormatterFactories { + data := make([]byte, 100) + rand.Read(data) + + h0 := sha1.Sum(data) + + of, err := v(f) + if err != nil { + t.Errorf("error creating object formatter for %v: %v", k, err) + continue + } + + t.Logf("testing %v", k) + blockID := of.ComputeBlockID(data) + cipherText, err := of.Encrypt(data, blockID) + if err != nil || cipherText == nil { + t.Errorf("invalid response from Encrypt: %v %v", cipherText, err) + } + + plainText, err := of.Decrypt(cipherText, blockID) + if err != nil || plainText == nil { + t.Errorf("invalid response from Decrypt: %v %v", plainText, err) + } + + h1 := sha1.Sum(plainText) + + if !bytes.Equal(h0[:], h1[:]) { + t.Errorf("Encrypt()/Decrypt() does not round-trip: %x %x", h0, h1) + } + + if len(blockID)%16 != 0 { + t.Errorf("block ID for %v not a multiple of 16: %v", k, blockID) + } + } +} diff --git a/block/block_formatting_options.go b/block/block_formatting_options.go new file mode 100644 index 000000000..674c454e3 --- /dev/null +++ b/block/block_formatting_options.go @@ -0,0 +1,10 @@ +package block + +// FormattingOptions describes the rules for formatting blocks in repository. +type FormattingOptions struct { + Version int `json:"version,omitempty"` // version number, must be "1" + BlockFormat string `json:"objectFormat,omitempty"` // identifier of the block format + HMACSecret []byte `json:"secret,omitempty"` // HMAC secret used to generate encryption keys + MasterKey []byte `json:"masterKey,omitempty"` // master encryption key (SIV-mode encryption only) + MaxPackSize int `json:"maxPackSize,omitempty"` // maximum size of a pack object +} diff --git a/block/block_index_recovery.go b/block/block_index_recovery.go new file mode 100644 index 000000000..d9bd6f8fd --- /dev/null +++ b/block/block_index_recovery.go @@ -0,0 +1,227 @@ +package block + +import ( + "bytes" + "context" + "encoding/binary" + "fmt" + "hash/crc32" + "reflect" + + "github.com/kopia/repo/internal/packindex" +) + +// RecoverIndexFromPackFile attempts to recover index block entries from a given pack file. +// Pack file length may be provided (if known) to reduce the number of bytes that are read from the storage. +func (bm *Manager) RecoverIndexFromPackFile(ctx context.Context, packFile string, packFileLength int64, commit bool) ([]Info, error) { + localIndexBytes, err := bm.readPackFileLocalIndex(ctx, packFile, packFileLength) + if err != nil { + return nil, err + } + + ndx, err := packindex.Open(bytes.NewReader(localIndexBytes)) + if err != nil { + return nil, fmt.Errorf("unable to open index in file %v", packFile) + } + + var recovered []Info + + err = ndx.Iterate("", func(i Info) error { + recovered = append(recovered, i) + if commit { + bm.packIndexBuilder.Add(i) + } + return nil + }) + + return recovered, err +} + +type packBlockPostamble struct { + localIndexIV []byte + localIndexOffset uint32 + localIndexLength uint32 +} + +func (p *packBlockPostamble) toBytes() ([]byte, error) { + // 4 varints + IV + 4 bytes of checksum + 1 byte of postamble length + n := 0 + buf := make([]byte, 4*binary.MaxVarintLen64+len(p.localIndexIV)+4+1) + + n += binary.PutUvarint(buf[n:], uint64(1)) // version flag + n += binary.PutUvarint(buf[n:], uint64(len(p.localIndexIV))) // length of local index IV + copy(buf[n:], p.localIndexIV) + n += len(p.localIndexIV) + n += binary.PutUvarint(buf[n:], uint64(p.localIndexOffset)) + n += binary.PutUvarint(buf[n:], uint64(p.localIndexLength)) + + checksum := crc32.ChecksumIEEE(buf[0:n]) + binary.BigEndian.PutUint32(buf[n:], checksum) + n += 4 + if n > 255 { + return nil, fmt.Errorf("postamble too long: %v", n) + } + + buf[n] = byte(n) + return buf[0 : n+1], nil +} + +// findPostamble detects if a given block of bytes contains a possibly valid postamble, and returns it if so +// NOTE, even if this function returns a postamble, it should not be trusted to be correct, since it's not +// cryptographically signed. this is to facilitate data recovery. +func findPostamble(b []byte) *packBlockPostamble { + if len(b) == 0 { + // no postamble + return nil + } + + // length of postamble is the last byte + postambleLength := int(b[len(b)-1]) + if postambleLength < 5 { + // too short, must be at least 5 bytes (checksum + own length) + return nil + } + postambleStart := len(b) - 1 - postambleLength + postambleEnd := len(b) - 1 + if postambleStart < 0 { + // invalid last byte + return nil + } + + postambleBytes := b[postambleStart:postambleEnd] + payload, checksumBytes := postambleBytes[0:len(postambleBytes)-4], postambleBytes[len(postambleBytes)-4:] + checksum := binary.BigEndian.Uint32(checksumBytes) + validChecksum := crc32.ChecksumIEEE(payload) + + if checksum != validChecksum { + // invalid checksum, not a valid postamble + return nil + } + + return decodePostamble(payload) +} + +func decodePostamble(payload []byte) *packBlockPostamble { + flags, n := binary.Uvarint(payload) + if n <= 0 { + // invalid flags + return nil + } + if flags != 1 { + // unsupported flag + return nil + } + payload = payload[n:] + + ivLength, n := binary.Uvarint(payload) + if n <= 0 { + // invalid flags + return nil + } + payload = payload[n:] + if ivLength > uint64(len(payload)) { + // invalid IV length + return nil + } + + iv := payload[0:ivLength] + payload = payload[ivLength:] + + off, n := binary.Uvarint(payload) + if n <= 0 { + // invalid offset + return nil + } + payload = payload[n:] + + length, n := binary.Uvarint(payload) + if n <= 0 { + // invalid offset + return nil + } + + return &packBlockPostamble{ + localIndexIV: iv, + localIndexLength: uint32(length), + localIndexOffset: uint32(off), + } +} + +func (bm *Manager) buildLocalIndex(pending packindex.Builder) ([]byte, error) { + var buf bytes.Buffer + if err := pending.Build(&buf); err != nil { + return nil, fmt.Errorf("unable to build local index: %v", err) + } + + return buf.Bytes(), nil +} + +// appendPackFileIndexRecoveryData appends data designed to help with recovery of pack index in case it gets damaged or lost. +func (bm *Manager) appendPackFileIndexRecoveryData(blockData []byte, pending packindex.Builder) ([]byte, error) { + // build, encrypt and append local index + localIndexOffset := len(blockData) + localIndex, err := bm.buildLocalIndex(pending) + if err != nil { + return nil, err + } + + localIndexIV := bm.hashData(localIndex) + encryptedLocalIndex, err := bm.formatter.Encrypt(localIndex, localIndexIV) + if err != nil { + return nil, err + } + + postamble := packBlockPostamble{ + localIndexIV: localIndexIV, + localIndexOffset: uint32(localIndexOffset), + localIndexLength: uint32(len(localIndex)), + } + + blockData = append(blockData, encryptedLocalIndex...) + postambleBytes, err := postamble.toBytes() + if err != nil { + return nil, err + } + + blockData = append(blockData, postambleBytes...) + + pa2 := findPostamble(blockData) + if pa2 == nil { + log.Fatalf("invalid postamble written, that could not be immediately decoded, it's a bug") + } + + if !reflect.DeepEqual(postamble, *pa2) { + log.Fatalf("postamble did not round-trip: %v %v", postamble, *pa2) + } + + return blockData, nil +} + +func (bm *Manager) readPackFileLocalIndex(ctx context.Context, packFile string, packFileLength int64) ([]byte, error) { + payload, err := bm.st.GetBlock(ctx, packFile, 0, -1) + if err != nil { + return nil, err + } + + postamble := findPostamble(payload) + if postamble == nil { + return nil, fmt.Errorf("unable to find valid postamble in file %v", packFile) + } + + if uint64(postamble.localIndexOffset+postamble.localIndexLength) > uint64(len(payload)) { + // invalid offset/length + return nil, fmt.Errorf("unable to find valid local index in file %v", packFile) + } + + encryptedLocalIndexBytes := payload[postamble.localIndexOffset : postamble.localIndexOffset+postamble.localIndexLength] + if encryptedLocalIndexBytes == nil { + return nil, fmt.Errorf("unable to find valid local index in file %v", packFile) + } + + localIndexBytes, err := bm.decryptAndVerify(encryptedLocalIndexBytes, postamble.localIndexIV) + if err != nil { + return nil, fmt.Errorf("unable to decrypt local index: %v", err) + } + + return localIndexBytes, nil +} diff --git a/block/block_manager.go b/block/block_manager.go new file mode 100644 index 000000000..ca1aaa6e7 --- /dev/null +++ b/block/block_manager.go @@ -0,0 +1,1018 @@ +// Package block implements repository support content-addressable storage blocks. +package block + +import ( + "bytes" + "context" + "crypto/aes" + cryptorand "crypto/rand" + "encoding/hex" + "fmt" + "io" + "math" + "math/rand" + "os" + "reflect" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/kopia/repo/internal/packindex" + "github.com/kopia/repo/internal/repologging" + "github.com/kopia/repo/storage" +) + +var log = repologging.Logger("kopia/block") +var formatLog = repologging.Logger("kopia/block/format") + +// PackBlockPrefix is the prefix for all pack storage blocks. +const PackBlockPrefix = "p" + +const ( + parallelFetches = 5 // number of parallel reads goroutines + flushPackIndexTimeout = 10 * time.Minute // time after which all pending indexes are flushes + newIndexBlockPrefix = "n" + defaultMinPreambleLength = 32 + defaultMaxPreambleLength = 32 + defaultPaddingUnit = 4096 + + currentWriteVersion = 1 + minSupportedReadVersion = 0 + maxSupportedReadVersion = currentWriteVersion + + indexLoadAttempts = 10 +) + +// Info is an information about a single block managed by Manager. +type Info = packindex.Info + +// IndexInfo is an information about a single index block managed by Manager. +type IndexInfo struct { + FileName string + Length int64 + Timestamp time.Time +} + +// Manager manages storage blocks at a low level with encryption, deduplication and packaging. +type Manager struct { + Format FormattingOptions + + stats Stats + blockCache *blockCache + listCache *listCache + st storage.Storage + + mu sync.Mutex + locked bool + checkInvariantsOnUnlock bool + + currentPackItems map[string]Info // blocks that are in the pack block currently being built (all inline) + currentPackDataLength int // total length of all items in the current pack block + packIndexBuilder packindex.Builder // blocks that are in index currently being built (current pack and all packs saved but not committed) + committedBlocks *committedBlockIndex + + disableIndexFlushCount int + flushPackIndexesAfter time.Time // time when those indexes should be flushed + + closed chan struct{} + + writeFormatVersion int32 // format version to write + + maxPackSize int + formatter Formatter + + minPreambleLength int + maxPreambleLength int + paddingUnit int + timeNow func() time.Time +} + +// DeleteBlock marks the given blockID as deleted. +// +// NOTE: To avoid race conditions only blocks that cannot be possibly re-created +// should ever be deleted. That means that contents of such blocks should include some element +// of randomness or a contemporaneous timestamp that will never reappear. +func (bm *Manager) DeleteBlock(blockID string) error { + bm.lock() + defer bm.unlock() + + log.Debugf("DeleteBlock(%q)", blockID) + + // We have this block in current pack index and it's already deleted there. + if bi, ok := bm.packIndexBuilder[blockID]; ok { + if !bi.Deleted { + if bi.PackFile == "" { + // added and never committed, just forget about it. + delete(bm.packIndexBuilder, blockID) + delete(bm.currentPackItems, blockID) + return nil + } + + // added and committed. + bi2 := *bi + bi2.Deleted = true + bi2.TimestampSeconds = bm.timeNow().Unix() + bm.setPendingBlock(bi2) + } + return nil + } + + // We have this block in current pack index and it's already deleted there. + bi, err := bm.committedBlocks.getBlock(blockID) + if err != nil { + return err + } + + if bi.Deleted { + // already deleted + return nil + } + + // object present but not deleted, mark for deletion and add to pending + bi2 := bi + bi2.Deleted = true + bi2.TimestampSeconds = bm.timeNow().Unix() + bm.setPendingBlock(bi2) + return nil +} + +func (bm *Manager) setPendingBlock(i Info) { + bm.packIndexBuilder.Add(i) + bm.currentPackItems[i.BlockID] = i +} + +func (bm *Manager) addToPackLocked(ctx context.Context, blockID string, data []byte, isDeleted bool) error { + bm.assertLocked() + + data = cloneBytes(data) + bm.currentPackDataLength += len(data) + bm.setPendingBlock(Info{ + Deleted: isDeleted, + BlockID: blockID, + Payload: data, + Length: uint32(len(data)), + TimestampSeconds: bm.timeNow().Unix(), + }) + + if bm.currentPackDataLength >= bm.maxPackSize { + if err := bm.finishPackAndMaybeFlushIndexesLocked(ctx); err != nil { + return err + } + } + + return nil +} + +func (bm *Manager) finishPackAndMaybeFlushIndexesLocked(ctx context.Context) error { + bm.assertLocked() + if err := bm.finishPackLocked(ctx); err != nil { + return err + } + + if bm.timeNow().After(bm.flushPackIndexesAfter) { + if err := bm.flushPackIndexesLocked(ctx); err != nil { + return err + } + } + + return nil +} + +// Stats returns statistics about block manager operations. +func (bm *Manager) Stats() Stats { + return bm.stats +} + +// ResetStats resets statistics to zero values. +func (bm *Manager) ResetStats() { + bm.stats = Stats{} +} + +// DisableIndexFlush increments the counter preventing automatic index flushes. +func (bm *Manager) DisableIndexFlush() { + bm.lock() + defer bm.unlock() + log.Debugf("DisableIndexFlush()") + bm.disableIndexFlushCount++ +} + +// EnableIndexFlush decrements the counter preventing automatic index flushes. +// The flushes will be reenabled when the index drops to zero. +func (bm *Manager) EnableIndexFlush() { + bm.lock() + defer bm.unlock() + log.Debugf("EnableIndexFlush()") + bm.disableIndexFlushCount-- +} + +func (bm *Manager) verifyInvariantsLocked() { + bm.assertLocked() + + bm.verifyCurrentPackItemsLocked() + bm.verifyPackIndexBuilderLocked() +} + +func (bm *Manager) verifyCurrentPackItemsLocked() { + for k, cpi := range bm.currentPackItems { + if cpi.BlockID != k { + bm.invariantViolated("block ID entry has invalid key: %v %v", cpi.BlockID, k) + } + if cpi.PackFile != "" && !cpi.Deleted { + bm.invariantViolated("block ID entry has unexpected pack block ID %v: %v", cpi.BlockID, cpi.PackFile) + } + if cpi.TimestampSeconds == 0 { + bm.invariantViolated("block has no timestamp: %v", cpi.BlockID) + } + bi, ok := bm.packIndexBuilder[k] + if !ok { + bm.invariantViolated("block ID entry not present in pack index builder: %v", cpi.BlockID) + } + if !reflect.DeepEqual(*bi, cpi) { + bm.invariantViolated("current pack index does not match pack index builder: %v", cpi, *bi) + } + } +} + +func (bm *Manager) verifyPackIndexBuilderLocked() { + for k, cpi := range bm.packIndexBuilder { + if cpi.BlockID != k { + bm.invariantViolated("block ID entry has invalid key: %v %v", cpi.BlockID, k) + } + if _, ok := bm.currentPackItems[cpi.BlockID]; ok { + // ignore blocks also in currentPackItems + continue + } + if cpi.Deleted { + if cpi.PackFile != "" { + bm.invariantViolated("block can't be both deleted and have a pack block: %v", cpi.BlockID) + } + } else { + if cpi.PackFile == "" { + bm.invariantViolated("block that's not deleted must have a pack block: %+v", cpi) + } + if cpi.FormatVersion != byte(bm.writeFormatVersion) { + bm.invariantViolated("block that's not deleted must have a valid format version: %+v", cpi) + } + } + if cpi.TimestampSeconds == 0 { + bm.invariantViolated("block has no timestamp: %v", cpi.BlockID) + } + } +} + +func (bm *Manager) invariantViolated(msg string, arg ...interface{}) { + if len(arg) > 0 { + msg = fmt.Sprintf(msg, arg...) + } + + panic(msg) +} + +func (bm *Manager) startPackIndexLocked() { + bm.currentPackItems = make(map[string]Info) + bm.currentPackDataLength = 0 +} + +func (bm *Manager) flushPackIndexesLocked(ctx context.Context) error { + bm.assertLocked() + + if bm.disableIndexFlushCount > 0 { + log.Debugf("not flushing index because flushes are currently disabled") + return nil + } + + if len(bm.packIndexBuilder) > 0 { + var buf bytes.Buffer + + if err := bm.packIndexBuilder.Build(&buf); err != nil { + return fmt.Errorf("unable to build pack index: %v", err) + } + + data := buf.Bytes() + dataCopy := append([]byte(nil), data...) + + indexBlockID, err := bm.writePackIndexesNew(ctx, data) + if err != nil { + return err + } + + if err := bm.committedBlocks.addBlock(indexBlockID, dataCopy, true); err != nil { + return fmt.Errorf("unable to add committed block: %v", err) + } + bm.packIndexBuilder = packindex.NewBuilder() + } + + bm.flushPackIndexesAfter = bm.timeNow().Add(flushPackIndexTimeout) + return nil +} + +func (bm *Manager) writePackIndexesNew(ctx context.Context, data []byte) (string, error) { + return bm.encryptAndWriteBlockNotLocked(ctx, data, newIndexBlockPrefix) +} + +func (bm *Manager) finishPackLocked(ctx context.Context) error { + if len(bm.currentPackItems) == 0 { + log.Debugf("no current pack entries") + return nil + } + + if err := bm.writePackBlockLocked(ctx); err != nil { + return fmt.Errorf("error writing pack block: %v", err) + } + + bm.startPackIndexLocked() + return nil +} + +func (bm *Manager) writePackBlockLocked(ctx context.Context) error { + bm.assertLocked() + + blockID := make([]byte, 16) + if _, err := cryptorand.Read(blockID); err != nil { + return fmt.Errorf("unable to read crypto bytes: %v", err) + } + + packFile := fmt.Sprintf("%v%x", PackBlockPrefix, blockID) + + blockData, packFileIndex, err := bm.preparePackDataBlock(packFile) + if err != nil { + return fmt.Errorf("error preparing data block: %v", err) + } + + if err := bm.writePackFileNotLocked(ctx, packFile, blockData); err != nil { + return fmt.Errorf("can't save pack data block: %v", err) + } + + formatLog.Debugf("wrote pack file: %v", packFile) + for _, info := range packFileIndex { + bm.packIndexBuilder.Add(*info) + } + + return nil +} + +func (bm *Manager) preparePackDataBlock(packFile string) ([]byte, packindex.Builder, error) { + formatLog.Debugf("preparing block data with %v items", len(bm.currentPackItems)) + + blockData, err := appendRandomBytes(nil, rand.Intn(bm.maxPreambleLength-bm.minPreambleLength+1)+bm.minPreambleLength) + if err != nil { + return nil, nil, fmt.Errorf("unable to prepare block preamble: %v", err) + } + + packFileIndex := packindex.Builder{} + for blockID, info := range bm.currentPackItems { + if info.Payload == nil { + continue + } + + var encrypted []byte + encrypted, err = bm.maybeEncryptBlockDataForPacking(info.Payload, info.BlockID) + if err != nil { + return nil, nil, fmt.Errorf("unable to encrypt %q: %v", blockID, err) + } + + formatLog.Debugf("adding %v length=%v deleted=%v", blockID, len(info.Payload), info.Deleted) + + packFileIndex.Add(Info{ + BlockID: blockID, + Deleted: info.Deleted, + FormatVersion: byte(bm.writeFormatVersion), + PackFile: packFile, + PackOffset: uint32(len(blockData)), + Length: uint32(len(info.Payload)), + TimestampSeconds: info.TimestampSeconds, + }) + + blockData = append(blockData, encrypted...) + } + + if len(packFileIndex) == 0 { + return nil, nil, nil + } + + if bm.paddingUnit > 0 { + if missing := bm.paddingUnit - (len(blockData) % bm.paddingUnit); missing > 0 { + blockData, err = appendRandomBytes(blockData, missing) + if err != nil { + return nil, nil, fmt.Errorf("unable to prepare block postamble: %v", err) + } + } + } + + origBlockLength := len(blockData) + blockData, err = bm.appendPackFileIndexRecoveryData(blockData, packFileIndex) + + formatLog.Debugf("finished block %v bytes (%v bytes index)", len(blockData), len(blockData)-origBlockLength) + return blockData, packFileIndex, err +} + +func (bm *Manager) maybeEncryptBlockDataForPacking(data []byte, blockID string) ([]byte, error) { + if bm.writeFormatVersion == 0 { + // in v0 the entire block is encrypted together later on + return data, nil + } + iv, err := getPackedBlockIV(blockID) + if err != nil { + return nil, fmt.Errorf("unable to get packed block IV for %q: %v", blockID, err) + } + return bm.formatter.Encrypt(data, iv) +} + +func appendRandomBytes(b []byte, count int) ([]byte, error) { + rnd := make([]byte, count) + if _, err := io.ReadFull(cryptorand.Reader, rnd); err != nil { + return nil, err + } + + return append(b, rnd...), nil +} + +// IndexBlocks returns the list of active index blocks. +func (bm *Manager) IndexBlocks(ctx context.Context) ([]IndexInfo, error) { + return bm.listCache.listIndexBlocks(ctx) +} + +func (bm *Manager) loadPackIndexesUnlocked(ctx context.Context) ([]IndexInfo, bool, error) { + nextSleepTime := 100 * time.Millisecond + + for i := 0; i < indexLoadAttempts; i++ { + if err := ctx.Err(); err != nil { + return nil, false, err + } + + if i > 0 { + bm.listCache.deleteListCache(ctx) + log.Debugf("encountered NOT_FOUND when loading, sleeping %v before retrying #%v", nextSleepTime, i) + time.Sleep(nextSleepTime) + nextSleepTime *= 2 + } + + blocks, err := bm.listCache.listIndexBlocks(ctx) + if err != nil { + return nil, false, err + } + + err = bm.tryLoadPackIndexBlocksUnlocked(ctx, blocks) + if err == nil { + var blockIDs []string + for _, b := range blocks { + blockIDs = append(blockIDs, b.FileName) + } + var updated bool + updated, err = bm.committedBlocks.use(blockIDs) + if err != nil { + return nil, false, err + } + return blocks, updated, nil + } + if err != storage.ErrBlockNotFound { + return nil, false, err + } + } + + return nil, false, fmt.Errorf("unable to load pack indexes despite %v retries", indexLoadAttempts) +} + +func (bm *Manager) tryLoadPackIndexBlocksUnlocked(ctx context.Context, blocks []IndexInfo) error { + ch, err := bm.unprocessedIndexBlocksUnlocked(blocks) + if err != nil { + return err + } + if len(ch) == 0 { + return nil + } + + var wg sync.WaitGroup + + errors := make(chan error, parallelFetches) + + for i := 0; i < parallelFetches; i++ { + wg.Add(1) + go func() { + defer wg.Done() + + for indexBlockID := range ch { + data, err := bm.getPhysicalBlockInternal(ctx, indexBlockID) + if err != nil { + errors <- err + return + } + + if err := bm.committedBlocks.addBlock(indexBlockID, data, false); err != nil { + errors <- fmt.Errorf("unable to add to committed block cache: %v", err) + return + } + } + }() + } + + wg.Wait() + close(errors) + + // Propagate async errors, if any. + for err := range errors { + return err + } + + return nil +} + +// unprocessedIndexBlocksUnlocked returns a closed channel filled with block IDs that are not in committedBlocks cache. +func (bm *Manager) unprocessedIndexBlocksUnlocked(blocks []IndexInfo) (<-chan string, error) { + ch := make(chan string, len(blocks)) + for _, block := range blocks { + has, err := bm.committedBlocks.cache.hasIndexBlockID(block.FileName) + if err != nil { + return nil, err + } + if has { + log.Debugf("index block %q already in cache, skipping", block.FileName) + continue + } + ch <- block.FileName + } + close(ch) + return ch, nil +} + +// Close closes the block manager. +func (bm *Manager) Close() { + bm.blockCache.close() + close(bm.closed) +} + +// ListBlocks returns IDs of blocks matching given prefix. +func (bm *Manager) ListBlocks(prefix string) ([]string, error) { + bm.lock() + defer bm.unlock() + + var result []string + + appendToResult := func(i Info) error { + if i.Deleted || !strings.HasPrefix(i.BlockID, prefix) { + return nil + } + if bi, ok := bm.packIndexBuilder[i.BlockID]; ok && bi.Deleted { + return nil + } + result = append(result, i.BlockID) + return nil + } + + for _, bi := range bm.packIndexBuilder { + _ = appendToResult(*bi) + } + + _ = bm.committedBlocks.listBlocks(prefix, appendToResult) + return result, nil +} + +// ListBlockInfos returns the metadata about blocks with a given prefix and kind. +func (bm *Manager) ListBlockInfos(prefix string, includeDeleted bool) ([]Info, error) { + bm.lock() + defer bm.unlock() + + var result []Info + + appendToResult := func(i Info) error { + if (i.Deleted && !includeDeleted) || !strings.HasPrefix(i.BlockID, prefix) { + return nil + } + if bi, ok := bm.packIndexBuilder[i.BlockID]; ok && bi.Deleted { + return nil + } + result = append(result, i) + return nil + } + + for _, bi := range bm.packIndexBuilder { + _ = appendToResult(*bi) + } + + _ = bm.committedBlocks.listBlocks(prefix, appendToResult) + + return result, nil +} + +// Flush completes writing any pending packs and writes pack indexes to the underlyign storage. +func (bm *Manager) Flush(ctx context.Context) error { + bm.lock() + defer bm.unlock() + + if err := bm.finishPackLocked(ctx); err != nil { + return fmt.Errorf("error writing pending block: %v", err) + } + + if err := bm.flushPackIndexesLocked(ctx); err != nil { + return fmt.Errorf("error flushing indexes: %v", err) + } + + return nil +} + +// RewriteBlock causes reads and re-writes a given block using the most recent format. +func (bm *Manager) RewriteBlock(ctx context.Context, blockID string) error { + bi, err := bm.getBlockInfo(blockID) + if err != nil { + return err + } + + data, err := bm.getBlockContentsUnlocked(ctx, bi) + if err != nil { + return err + } + + bm.lock() + defer bm.unlock() + return bm.addToPackLocked(ctx, blockID, data, bi.Deleted) +} + +// WriteBlock saves a given block of data to a pack group with a provided name and returns a blockID +// that's based on the contents of data written. +func (bm *Manager) WriteBlock(ctx context.Context, data []byte, prefix string) (string, error) { + if err := validatePrefix(prefix); err != nil { + return "", err + } + blockID := prefix + hex.EncodeToString(bm.hashData(data)) + + // block already tracked + if bi, err := bm.getBlockInfo(blockID); err == nil { + if !bi.Deleted { + return blockID, nil + } + } + + log.Debugf("WriteBlock(%q) - new", blockID) + bm.lock() + defer bm.unlock() + err := bm.addToPackLocked(ctx, blockID, data, false) + return blockID, err +} + +func validatePrefix(prefix string) error { + switch len(prefix) { + case 0: + return nil + case 1: + if prefix[0] >= 'g' && prefix[0] <= 'z' { + return nil + } + } + + return fmt.Errorf("invalid prefix, must be a empty or single letter between 'g' and 'z'") +} + +func (bm *Manager) writePackFileNotLocked(ctx context.Context, packFile string, data []byte) error { + atomic.AddInt32(&bm.stats.WrittenBlocks, 1) + atomic.AddInt64(&bm.stats.WrittenBytes, int64(len(data))) + bm.listCache.deleteListCache(ctx) + return bm.st.PutBlock(ctx, packFile, data) +} + +func (bm *Manager) encryptAndWriteBlockNotLocked(ctx context.Context, data []byte, prefix string) (string, error) { + hash := bm.hashData(data) + physicalBlockID := prefix + hex.EncodeToString(hash) + + // Encrypt the block in-place. + atomic.AddInt64(&bm.stats.EncryptedBytes, int64(len(data))) + data2, err := bm.formatter.Encrypt(data, hash) + if err != nil { + return "", err + } + + atomic.AddInt32(&bm.stats.WrittenBlocks, 1) + atomic.AddInt64(&bm.stats.WrittenBytes, int64(len(data))) + bm.listCache.deleteListCache(ctx) + if err := bm.st.PutBlock(ctx, physicalBlockID, data2); err != nil { + return "", err + } + + return physicalBlockID, nil +} + +func (bm *Manager) hashData(data []byte) []byte { + // Hash the block and compute encryption key. + blockID := bm.formatter.ComputeBlockID(data) + atomic.AddInt32(&bm.stats.HashedBlocks, 1) + atomic.AddInt64(&bm.stats.HashedBytes, int64(len(data))) + return blockID +} + +func cloneBytes(b []byte) []byte { + return append([]byte{}, b...) +} + +// GetBlock gets the contents of a given block. If the block is not found returns blob.ErrBlockNotFound. +func (bm *Manager) GetBlock(ctx context.Context, blockID string) ([]byte, error) { + bi, err := bm.getBlockInfo(blockID) + if err != nil { + return nil, err + } + + if bi.Deleted { + return nil, storage.ErrBlockNotFound + } + + return bm.getBlockContentsUnlocked(ctx, bi) +} + +func (bm *Manager) getBlockInfo(blockID string) (Info, error) { + bm.lock() + defer bm.unlock() + + // check added blocks, not written to any packs. + if bi, ok := bm.currentPackItems[blockID]; ok { + return bi, nil + } + + // added blocks, written to packs but not yet added to indexes + if bi, ok := bm.packIndexBuilder[blockID]; ok { + return *bi, nil + } + + // read from committed block index + return bm.committedBlocks.getBlock(blockID) +} + +// GetIndexBlock gets the contents of a given index block. If the block is not found returns blob.ErrBlockNotFound. +func (bm *Manager) GetIndexBlock(ctx context.Context, blockID string) ([]byte, error) { + bm.lock() + defer bm.unlock() + + return bm.getPhysicalBlockInternal(ctx, blockID) +} + +// BlockInfo returns information about a single block. +func (bm *Manager) BlockInfo(ctx context.Context, blockID string) (Info, error) { + bi, err := bm.getBlockInfo(blockID) + if err != nil { + return Info{}, err + } + + if err == nil { + if bi.Deleted { + log.Debugf("BlockInfo(%q) - deleted", blockID) + } else { + log.Debugf("BlockInfo(%q) - exists in %v", blockID, bi.PackFile) + } + } else { + log.Debugf("BlockInfo(%q) - error %v", err) + } + + return bi, err +} + +// FindUnreferencedStorageFiles returns the list of unreferenced storage blocks. +func (bm *Manager) FindUnreferencedStorageFiles(ctx context.Context) ([]storage.BlockMetadata, error) { + infos, err := bm.ListBlockInfos("", false) + if err != nil { + return nil, fmt.Errorf("unable to list index blocks: %v", err) + } + + usedPackBlocks := findPackBlocksInUse(infos) + var unused []storage.BlockMetadata + err = bm.st.ListBlocks(ctx, PackBlockPrefix, func(bi storage.BlockMetadata) error { + u := usedPackBlocks[bi.BlockID] + if u > 0 { + log.Debugf("pack %v, in use by %v blocks", bi.BlockID, u) + return nil + } + + unused = append(unused, bi) + return nil + }) + if err != nil { + return nil, fmt.Errorf("error listing storage blocks: %v", err) + } + + return unused, nil +} + +func findPackBlocksInUse(infos []Info) map[string]int { + packUsage := map[string]int{} + + for _, bi := range infos { + packUsage[bi.PackFile]++ + } + + return packUsage +} + +func (bm *Manager) getBlockContentsUnlocked(ctx context.Context, bi Info) ([]byte, error) { + if bi.Payload != nil { + return cloneBytes(bi.Payload), nil + } + + payload, err := bm.blockCache.getContentBlock(ctx, bi.BlockID, bi.PackFile, int64(bi.PackOffset), int64(bi.Length)) + if err != nil { + return nil, err + } + + atomic.AddInt32(&bm.stats.ReadBlocks, 1) + atomic.AddInt64(&bm.stats.ReadBytes, int64(len(payload))) + + iv, err := getPackedBlockIV(bi.BlockID) + if err != nil { + return nil, err + } + + decrypted, err := bm.decryptAndVerify(payload, iv) + if err != nil { + return nil, fmt.Errorf("invalid checksum at %v offset %v length %v: %v", bi.PackFile, bi.PackOffset, len(payload), err) + } + + return decrypted, nil +} + +func (bm *Manager) decryptAndVerify(encrypted []byte, iv []byte) ([]byte, error) { + decrypted, err := bm.formatter.Decrypt(encrypted, iv) + if err != nil { + return nil, err + } + + atomic.AddInt64(&bm.stats.DecryptedBytes, int64(len(decrypted))) + + // Since the encryption key is a function of data, we must be able to generate exactly the same key + // after decrypting the content. This serves as a checksum. + return decrypted, bm.verifyChecksum(decrypted, iv) +} + +func (bm *Manager) getPhysicalBlockInternal(ctx context.Context, blockID string) ([]byte, error) { + payload, err := bm.blockCache.getContentBlock(ctx, blockID, blockID, 0, -1) + if err != nil { + return nil, err + } + + iv, err := getPhysicalBlockIV(blockID) + if err != nil { + return nil, err + } + + atomic.AddInt32(&bm.stats.ReadBlocks, 1) + atomic.AddInt64(&bm.stats.ReadBytes, int64(len(payload))) + + payload, err = bm.formatter.Decrypt(payload, iv) + atomic.AddInt64(&bm.stats.DecryptedBytes, int64(len(payload))) + if err != nil { + return nil, err + } + + // Since the encryption key is a function of data, we must be able to generate exactly the same key + // after decrypting the content. This serves as a checksum. + if err := bm.verifyChecksum(payload, iv); err != nil { + return nil, err + } + + return payload, nil +} + +func getPackedBlockIV(blockID string) ([]byte, error) { + return hex.DecodeString(blockID[len(blockID)-(aes.BlockSize*2):]) +} + +func getPhysicalBlockIV(s string) ([]byte, error) { + if p := strings.Index(s, "-"); p >= 0 { + s = s[0:p] + } + return hex.DecodeString(s[len(s)-(aes.BlockSize*2):]) +} + +func (bm *Manager) verifyChecksum(data []byte, blockID []byte) error { + expected := bm.formatter.ComputeBlockID(data) + expected = expected[len(expected)-aes.BlockSize:] + if !bytes.HasSuffix(blockID, expected) { + atomic.AddInt32(&bm.stats.InvalidBlocks, 1) + return fmt.Errorf("invalid checksum for blob %x, expected %x", blockID, expected) + } + + atomic.AddInt32(&bm.stats.ValidBlocks, 1) + return nil +} + +func (bm *Manager) lock() { + bm.mu.Lock() + bm.locked = true +} + +func (bm *Manager) unlock() { + if bm.checkInvariantsOnUnlock { + bm.verifyInvariantsLocked() + } + + bm.locked = false + bm.mu.Unlock() +} + +func (bm *Manager) assertLocked() { + if !bm.locked { + panic("must be locked") + } +} + +// Refresh reloads the committed block indexes. +func (bm *Manager) Refresh(ctx context.Context) (bool, error) { + bm.mu.Lock() + defer bm.mu.Unlock() + + log.Debugf("Refresh started") + t0 := time.Now() + _, updated, err := bm.loadPackIndexesUnlocked(ctx) + log.Debugf("Refresh completed in %v and updated=%v", time.Since(t0), updated) + return updated, err +} + +type cachedList struct { + Timestamp time.Time `json:"timestamp"` + Blocks []IndexInfo `json:"blocks"` +} + +// listIndexBlocksFromStorage returns the list of index blocks in the given storage. +// The list of blocks is not guaranteed to be sorted. +func listIndexBlocksFromStorage(ctx context.Context, st storage.Storage) ([]IndexInfo, error) { + snapshot, err := storage.ListAllBlocksConsistent(ctx, st, newIndexBlockPrefix, math.MaxInt32) + if err != nil { + return nil, err + } + + var results []IndexInfo + for _, it := range snapshot { + ii := IndexInfo{ + FileName: it.BlockID, + Timestamp: it.Timestamp, + Length: it.Length, + } + results = append(results, ii) + } + + return results, err +} + +// NewManager creates new block manager with given packing options and a formatter. +func NewManager(ctx context.Context, st storage.Storage, f FormattingOptions, caching CachingOptions) (*Manager, error) { + return newManagerWithOptions(ctx, st, f, caching, time.Now) +} + +func newManagerWithOptions(ctx context.Context, st storage.Storage, f FormattingOptions, caching CachingOptions, timeNow func() time.Time) (*Manager, error) { + if f.Version < minSupportedReadVersion || f.Version > currentWriteVersion { + return nil, fmt.Errorf("can't handle repositories created using version %v (min supported %v, max supported %v)", f.Version, minSupportedReadVersion, maxSupportedReadVersion) + } + + formatter, err := createFormatter(f) + if err != nil { + return nil, fmt.Errorf("unable to create block formatter: %v", err) + } + + blockCache, err := newBlockCache(ctx, st, caching) + if err != nil { + return nil, fmt.Errorf("unable to initialize block cache: %v", err) + } + + listCache, err := newListCache(ctx, st, caching) + if err != nil { + return nil, fmt.Errorf("unable to initialize list cache: %v", err) + } + + blockIndex, err := newCommittedBlockIndex(caching) + if err != nil { + return nil, fmt.Errorf("unable to initialize committed block index: %v", err) + } + + m := &Manager{ + Format: f, + timeNow: timeNow, + flushPackIndexesAfter: timeNow().Add(flushPackIndexTimeout), + maxPackSize: f.MaxPackSize, + formatter: formatter, + currentPackItems: make(map[string]Info), + packIndexBuilder: packindex.NewBuilder(), + committedBlocks: blockIndex, + minPreambleLength: defaultMinPreambleLength, + maxPreambleLength: defaultMaxPreambleLength, + paddingUnit: defaultPaddingUnit, + blockCache: blockCache, + listCache: listCache, + st: st, + + writeFormatVersion: int32(f.Version), + closed: make(chan struct{}), + checkInvariantsOnUnlock: os.Getenv("KOPIA_VERIFY_INVARIANTS") != "", + } + + m.startPackIndexLocked() + + if err := m.CompactIndexes(ctx, autoCompactionOptions); err != nil { + return nil, fmt.Errorf("error initializing block manager: %v", err) + } + + return m, nil +} + +func createFormatter(f FormattingOptions) (Formatter, error) { + sf := FormatterFactories[f.BlockFormat] + if sf == nil { + return nil, fmt.Errorf("unsupported block format: %v", f.BlockFormat) + } + + return sf(f) +} diff --git a/block/block_manager_compaction.go b/block/block_manager_compaction.go new file mode 100644 index 000000000..b6306d8b9 --- /dev/null +++ b/block/block_manager_compaction.go @@ -0,0 +1,148 @@ +package block + +import ( + "bytes" + "context" + "fmt" + "time" + + "github.com/kopia/repo/internal/packindex" +) + +var autoCompactionOptions = CompactOptions{ + MinSmallBlocks: 4 * parallelFetches, + MaxSmallBlocks: 64, +} + +// CompactOptions provides options for compaction +type CompactOptions struct { + MinSmallBlocks int + MaxSmallBlocks int + AllBlocks bool + SkipDeletedOlderThan time.Duration +} + +// CompactIndexes performs compaction of index blocks ensuring that # of small blocks is between minSmallBlockCount and maxSmallBlockCount +func (bm *Manager) CompactIndexes(ctx context.Context, opt CompactOptions) error { + log.Debugf("CompactIndexes(%+v)", opt) + if opt.MaxSmallBlocks < opt.MinSmallBlocks { + return fmt.Errorf("invalid block counts") + } + + indexBlocks, _, err := bm.loadPackIndexesUnlocked(ctx) + if err != nil { + return fmt.Errorf("error loading indexes: %v", err) + } + + blocksToCompact := bm.getBlocksToCompact(indexBlocks, opt) + + if err := bm.compactAndDeleteIndexBlocks(ctx, blocksToCompact, opt); err != nil { + log.Warningf("error performing quick compaction: %v", err) + } + + return nil +} + +func (bm *Manager) getBlocksToCompact(indexBlocks []IndexInfo, opt CompactOptions) []IndexInfo { + var nonCompactedBlocks []IndexInfo + var totalSizeNonCompactedBlocks int64 + + var verySmallBlocks []IndexInfo + var totalSizeVerySmallBlocks int64 + + var mediumSizedBlocks []IndexInfo + var totalSizeMediumSizedBlocks int64 + + for _, b := range indexBlocks { + if b.Length > int64(bm.maxPackSize) && !opt.AllBlocks { + continue + } + + nonCompactedBlocks = append(nonCompactedBlocks, b) + if b.Length < int64(bm.maxPackSize/20) { + verySmallBlocks = append(verySmallBlocks, b) + totalSizeVerySmallBlocks += b.Length + } else { + mediumSizedBlocks = append(mediumSizedBlocks, b) + totalSizeMediumSizedBlocks += b.Length + } + totalSizeNonCompactedBlocks += b.Length + } + + if len(nonCompactedBlocks) < opt.MinSmallBlocks { + // current count is below min allowed - nothing to do + formatLog.Debugf("no small blocks to compact") + return nil + } + + if len(verySmallBlocks) > len(nonCompactedBlocks)/2 && len(mediumSizedBlocks)+1 < opt.MinSmallBlocks { + formatLog.Debugf("compacting %v very small blocks", len(verySmallBlocks)) + return verySmallBlocks + } + + formatLog.Debugf("compacting all %v non-compacted blocks", len(nonCompactedBlocks)) + return nonCompactedBlocks +} + +func (bm *Manager) compactAndDeleteIndexBlocks(ctx context.Context, indexBlocks []IndexInfo, opt CompactOptions) error { + if len(indexBlocks) <= 1 { + return nil + } + formatLog.Debugf("compacting %v blocks", len(indexBlocks)) + t0 := time.Now() + + bld := packindex.NewBuilder() + for _, indexBlock := range indexBlocks { + if err := bm.addIndexBlocksToBuilder(ctx, bld, indexBlock, opt); err != nil { + return err + } + } + + var buf bytes.Buffer + if err := bld.Build(&buf); err != nil { + return fmt.Errorf("unable to build an index: %v", err) + } + + compactedIndexBlock, err := bm.writePackIndexesNew(ctx, buf.Bytes()) + if err != nil { + return fmt.Errorf("unable to write compacted indexes: %v", err) + } + + formatLog.Debugf("wrote compacted index (%v bytes) in %v", compactedIndexBlock, time.Since(t0)) + + for _, indexBlock := range indexBlocks { + if indexBlock.FileName == compactedIndexBlock { + continue + } + + bm.listCache.deleteListCache(ctx) + if err := bm.st.DeleteBlock(ctx, indexBlock.FileName); err != nil { + log.Warningf("unable to delete compacted block %q: %v", indexBlock.FileName, err) + } + } + + return nil +} + +func (bm *Manager) addIndexBlocksToBuilder(ctx context.Context, bld packindex.Builder, indexBlock IndexInfo, opt CompactOptions) error { + data, err := bm.getPhysicalBlockInternal(ctx, indexBlock.FileName) + if err != nil { + return err + } + + index, err := packindex.Open(bytes.NewReader(data)) + if err != nil { + return fmt.Errorf("unable to open index block %q: %v", indexBlock, err) + } + + _ = index.Iterate("", func(i Info) error { + if i.Deleted && opt.SkipDeletedOlderThan > 0 && time.Since(i.Timestamp()) > opt.SkipDeletedOlderThan { + log.Debugf("skipping block %v deleted at %v", i.BlockID, i.Timestamp()) + return nil + } + bld.Add(i) + return nil + }) + + return nil +} diff --git a/block/block_manager_test.go b/block/block_manager_test.go new file mode 100644 index 000000000..cce7b9e51 --- /dev/null +++ b/block/block_manager_test.go @@ -0,0 +1,819 @@ +package block + +import ( + "bytes" + "context" + "crypto/hmac" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "math/rand" + "reflect" + "strings" + "sync" + "testing" + "time" + + "github.com/kopia/repo/internal/packindex" + "github.com/kopia/repo/internal/storagetesting" + "github.com/kopia/repo/storage" + logging "github.com/op/go-logging" +) + +const ( + maxPackSize = 2000 +) + +var fakeTime = time.Date(2017, 1, 1, 0, 0, 0, 0, time.UTC) +var hmacSecret = []byte{1, 2, 3} + +func init() { + logging.SetLevel(logging.INFO, "") +} + +func TestBlockManagerEmptyFlush(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + bm.Flush(ctx) + if got, want := len(data), 0; got != want { + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } +} + +func TestBlockZeroBytes1(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + blockID := writeBlockAndVerify(ctx, t, bm, []byte{}) + bm.Flush(ctx) + if got, want := len(data), 2; got != want { + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } + dumpBlockManagerData(t, data) + bm = newTestBlockManager(data, keyTime, nil) + verifyBlock(ctx, t, bm, blockID, []byte{}) +} + +func TestBlockZeroBytes2(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 10)) + writeBlockAndVerify(ctx, t, bm, []byte{}) + bm.Flush(ctx) + if got, want := len(data), 2; got != want { + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + dumpBlockManagerData(t, data) + } +} + +func TestBlockManagerSmallBlockWrites(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + + for i := 0; i < 100; i++ { + writeBlockAndVerify(ctx, t, bm, seededRandomData(i, 10)) + } + if got, want := len(data), 0; got != want { + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } + bm.Flush(ctx) + if got, want := len(data), 2; got != want { + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } +} + +func TestBlockManagerDedupesPendingBlocks(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + + for i := 0; i < 100; i++ { + writeBlockAndVerify(ctx, t, bm, seededRandomData(0, 999)) + } + if got, want := len(data), 0; got != want { + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } + bm.Flush(ctx) + if got, want := len(data), 2; got != want { + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } +} + +func TestBlockManagerDedupesPendingAndUncommittedBlocks(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + + // no writes here, all data fits in a single pack. + writeBlockAndVerify(ctx, t, bm, seededRandomData(0, 950)) + writeBlockAndVerify(ctx, t, bm, seededRandomData(1, 950)) + writeBlockAndVerify(ctx, t, bm, seededRandomData(2, 10)) + if got, want := len(data), 0; got != want { + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } + + // no writes here + writeBlockAndVerify(ctx, t, bm, seededRandomData(0, 950)) + writeBlockAndVerify(ctx, t, bm, seededRandomData(1, 950)) + writeBlockAndVerify(ctx, t, bm, seededRandomData(2, 10)) + if got, want := len(data), 0; got != want { + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } + bm.Flush(ctx) + + // this flushes the pack block + index block + if got, want := len(data), 2; got != want { + dumpBlockManagerData(t, data) + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } +} + +func TestBlockManagerEmpty(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + + noSuchBlockID := string(hashValue([]byte("foo"))) + + b, err := bm.GetBlock(ctx, noSuchBlockID) + if err != storage.ErrBlockNotFound { + t.Errorf("unexpected error when getting non-existent block: %v, %v", b, err) + } + + bi, err := bm.BlockInfo(ctx, noSuchBlockID) + if err != storage.ErrBlockNotFound { + t.Errorf("unexpected error when getting non-existent block info: %v, %v", bi, err) + } + + if got, want := len(data), 0; got != want { + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } +} + +func verifyActiveIndexBlockCount(ctx context.Context, t *testing.T, bm *Manager, expected int) { + t.Helper() + + blks, err := bm.IndexBlocks(ctx) + if err != nil { + t.Errorf("error listing active index blocks: %v", err) + return + } + + if got, want := len(blks), expected; got != want { + t.Errorf("unexpected number of active index blocks %v, expected %v (%v)", got, want, blks) + } +} +func TestBlockManagerInternalFlush(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + + for i := 0; i < 100; i++ { + b := make([]byte, 25) + rand.Read(b) + writeBlockAndVerify(ctx, t, bm, b) + } + + // 1 data block written, but no index yet. + if got, want := len(data), 1; got != want { + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } + + // do it again - should be 2 blocks + 1000 bytes pending. + for i := 0; i < 100; i++ { + b := make([]byte, 25) + rand.Read(b) + writeBlockAndVerify(ctx, t, bm, b) + } + + // 2 data blocks written, but no index yet. + if got, want := len(data), 2; got != want { + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } + + bm.Flush(ctx) + + // third block gets written, followed by index. + if got, want := len(data), 4; got != want { + dumpBlockManagerData(t, data) + t.Errorf("unexpected number of blocks: %v, wanted %v", got, want) + } +} + +func TestBlockManagerWriteMultiple(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + timeFunc := fakeTimeNowWithAutoAdvance(fakeTime, 1*time.Second) + bm := newTestBlockManager(data, keyTime, timeFunc) + + var blockIDs []string + + for i := 0; i < 5000; i++ { + //t.Logf("i=%v", i) + b := seededRandomData(i, i%113) + blkID, err := bm.WriteBlock(ctx, b, "") + if err != nil { + t.Errorf("err: %v", err) + } + + blockIDs = append(blockIDs, blkID) + + if i%17 == 0 { + //t.Logf("flushing %v", i) + if err := bm.Flush(ctx); err != nil { + t.Fatalf("error flushing: %v", err) + } + //dumpBlockManagerData(t, data) + } + + if i%41 == 0 { + //t.Logf("opening new manager: %v", i) + if err := bm.Flush(ctx); err != nil { + t.Fatalf("error flushing: %v", err) + } + //t.Logf("data block count: %v", len(data)) + //dumpBlockManagerData(t, data) + bm = newTestBlockManager(data, keyTime, timeFunc) + } + + pos := rand.Intn(len(blockIDs)) + if _, err := bm.GetBlock(ctx, blockIDs[pos]); err != nil { + dumpBlockManagerData(t, data) + t.Fatalf("can't read block %q: %v", blockIDs[pos], err) + continue + } + } +} + +// This is regression test for a bug where we would corrupt data when encryption +// was done in place and clobbered pending data in memory. +func TestBlockManagerFailedToWritePack(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + st := storagetesting.NewMapStorage(data, keyTime, nil) + faulty := &storagetesting.FaultyStorage{ + Base: st, + } + st = faulty + + bm, err := newManagerWithOptions(context.Background(), st, FormattingOptions{ + Version: 1, + BlockFormat: "ENCRYPTED_HMAC_SHA256_AES256_SIV", + MaxPackSize: maxPackSize, + HMACSecret: []byte("foo"), + MasterKey: []byte("0123456789abcdef0123456789abcdef"), + }, CachingOptions{}, fakeTimeNowFrozen(fakeTime)) + if err != nil { + t.Fatalf("can't create bm: %v", err) + } + logging.SetLevel(logging.DEBUG, "faulty-storage") + + faulty.Faults = map[string][]*storagetesting.Fault{ + "PutBlock": []*storagetesting.Fault{ + {Err: errors.New("booboo")}, + }, + } + + b1, err := bm.WriteBlock(ctx, seededRandomData(1, 10), "") + if err != nil { + t.Fatalf("can't create block: %v", err) + } + + if err := bm.Flush(ctx); err != nil { + t.Logf("expected flush error: %v", err) + } + + verifyBlock(ctx, t, bm, b1, seededRandomData(1, 10)) +} + +func TestBlockManagerConcurrency(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + preexistingBlock := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100)) + bm.Flush(ctx) + + dumpBlockManagerData(t, data) + bm1 := newTestBlockManager(data, keyTime, nil) + bm2 := newTestBlockManager(data, keyTime, nil) + bm3 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(fakeTime.Add(1), 1*time.Second)) + + // all bm* can see pre-existing block + verifyBlock(ctx, t, bm1, preexistingBlock, seededRandomData(10, 100)) + verifyBlock(ctx, t, bm2, preexistingBlock, seededRandomData(10, 100)) + verifyBlock(ctx, t, bm3, preexistingBlock, seededRandomData(10, 100)) + + // write the same block in all managers. + sharedBlock := writeBlockAndVerify(ctx, t, bm1, seededRandomData(20, 100)) + writeBlockAndVerify(ctx, t, bm2, seededRandomData(20, 100)) + writeBlockAndVerify(ctx, t, bm3, seededRandomData(20, 100)) + + // write unique block per manager. + bm1block := writeBlockAndVerify(ctx, t, bm1, seededRandomData(31, 100)) + bm2block := writeBlockAndVerify(ctx, t, bm2, seededRandomData(32, 100)) + bm3block := writeBlockAndVerify(ctx, t, bm3, seededRandomData(33, 100)) + + // make sure they can't see each other's unflushed blocks. + verifyBlockNotFound(ctx, t, bm1, bm2block) + verifyBlockNotFound(ctx, t, bm1, bm3block) + verifyBlockNotFound(ctx, t, bm2, bm1block) + verifyBlockNotFound(ctx, t, bm2, bm3block) + verifyBlockNotFound(ctx, t, bm3, bm1block) + verifyBlockNotFound(ctx, t, bm3, bm2block) + + // now flush all writers, they still can't see each others' data. + bm1.Flush(ctx) + bm2.Flush(ctx) + bm3.Flush(ctx) + verifyBlockNotFound(ctx, t, bm1, bm2block) + verifyBlockNotFound(ctx, t, bm1, bm3block) + verifyBlockNotFound(ctx, t, bm2, bm1block) + verifyBlockNotFound(ctx, t, bm2, bm3block) + verifyBlockNotFound(ctx, t, bm3, bm1block) + verifyBlockNotFound(ctx, t, bm3, bm2block) + + // new block manager at this point can see all data. + bm4 := newTestBlockManager(data, keyTime, nil) + verifyBlock(ctx, t, bm4, preexistingBlock, seededRandomData(10, 100)) + verifyBlock(ctx, t, bm4, sharedBlock, seededRandomData(20, 100)) + verifyBlock(ctx, t, bm4, bm1block, seededRandomData(31, 100)) + verifyBlock(ctx, t, bm4, bm2block, seededRandomData(32, 100)) + verifyBlock(ctx, t, bm4, bm3block, seededRandomData(33, 100)) + + if got, want := getIndexCount(data), 4; got != want { + t.Errorf("unexpected index count before compaction: %v, wanted %v", got, want) + } + + if err := bm4.CompactIndexes(ctx, CompactOptions{ + MinSmallBlocks: 1, + MaxSmallBlocks: 1, + }); err != nil { + t.Errorf("compaction error: %v", err) + } + if got, want := getIndexCount(data), 1; got != want { + t.Errorf("unexpected index count after compaction: %v, wanted %v", got, want) + } + + // new block manager at this point can see all data. + bm5 := newTestBlockManager(data, keyTime, nil) + verifyBlock(ctx, t, bm5, preexistingBlock, seededRandomData(10, 100)) + verifyBlock(ctx, t, bm5, sharedBlock, seededRandomData(20, 100)) + verifyBlock(ctx, t, bm5, bm1block, seededRandomData(31, 100)) + verifyBlock(ctx, t, bm5, bm2block, seededRandomData(32, 100)) + verifyBlock(ctx, t, bm5, bm3block, seededRandomData(33, 100)) + if err := bm5.CompactIndexes(ctx, CompactOptions{ + MinSmallBlocks: 1, + MaxSmallBlocks: 1, + }); err != nil { + t.Errorf("compaction error: %v", err) + } +} + +func TestDeleteBlock(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100)) + bm.Flush(ctx) + block2 := writeBlockAndVerify(ctx, t, bm, seededRandomData(11, 100)) + if err := bm.DeleteBlock(block1); err != nil { + t.Errorf("unable to delete block: %v", block1) + } + if err := bm.DeleteBlock(block2); err != nil { + t.Errorf("unable to delete block: %v", block1) + } + verifyBlockNotFound(ctx, t, bm, block1) + verifyBlockNotFound(ctx, t, bm, block2) + bm.Flush(ctx) + log.Debugf("-----------") + bm = newTestBlockManager(data, keyTime, nil) + //dumpBlockManagerData(t, data) + verifyBlockNotFound(ctx, t, bm, block1) + verifyBlockNotFound(ctx, t, bm, block2) +} + +func TestRewriteNonDeleted(t *testing.T) { + const stepBehaviors = 3 + + // perform a sequence WriteBlock() RewriteBlock() GetBlock() + // where actionX can be (0=flush and reopen, 1=flush, 2=nothing) + for action1 := 0; action1 < stepBehaviors; action1++ { + for action2 := 0; action2 < stepBehaviors; action2++ { + t.Run(fmt.Sprintf("case-%v-%v", action1, action2), func(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + fakeNow := fakeTimeNowWithAutoAdvance(fakeTime, 1*time.Second) + bm := newTestBlockManager(data, keyTime, fakeNow) + + applyStep := func(action int) { + switch action { + case 0: + t.Logf("flushing and reopening") + bm.Flush(ctx) + bm = newTestBlockManager(data, keyTime, fakeNow) + case 1: + t.Logf("flushing") + bm.Flush(ctx) + case 2: + t.Logf("doing nothing") + } + } + + block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100)) + applyStep(action1) + bm.RewriteBlock(ctx, block1) + applyStep(action2) + verifyBlock(ctx, t, bm, block1, seededRandomData(10, 100)) + dumpBlockManagerData(t, data) + }) + } + } +} + +func TestDisableFlush(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + bm.DisableIndexFlush() + bm.DisableIndexFlush() + for i := 0; i < 500; i++ { + writeBlockAndVerify(ctx, t, bm, seededRandomData(i, 100)) + } + bm.Flush(ctx) // flush will not have effect + bm.EnableIndexFlush() + bm.Flush(ctx) // flush will not have effect + bm.EnableIndexFlush() + + verifyActiveIndexBlockCount(ctx, t, bm, 0) + bm.EnableIndexFlush() + verifyActiveIndexBlockCount(ctx, t, bm, 0) + bm.Flush(ctx) // flush will happen now + verifyActiveIndexBlockCount(ctx, t, bm, 1) +} + +func TestRewriteDeleted(t *testing.T) { + const stepBehaviors = 3 + + // perform a sequence WriteBlock() Delete() RewriteBlock() GetBlock() + // where actionX can be (0=flush and reopen, 1=flush, 2=nothing) + for action1 := 0; action1 < stepBehaviors; action1++ { + for action2 := 0; action2 < stepBehaviors; action2++ { + for action3 := 0; action3 < stepBehaviors; action3++ { + t.Run(fmt.Sprintf("case-%v-%v-%v", action1, action2, action3), func(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + fakeNow := fakeTimeNowWithAutoAdvance(fakeTime, 1*time.Second) + bm := newTestBlockManager(data, keyTime, fakeNow) + + applyStep := func(action int) { + switch action { + case 0: + t.Logf("flushing and reopening") + bm.Flush(ctx) + bm = newTestBlockManager(data, keyTime, fakeNow) + case 1: + t.Logf("flushing") + bm.Flush(ctx) + case 2: + t.Logf("doing nothing") + } + } + + block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100)) + applyStep(action1) + bm.DeleteBlock(block1) + applyStep(action2) + bm.RewriteBlock(ctx, block1) + applyStep(action3) + verifyBlockNotFound(ctx, t, bm, block1) + dumpBlockManagerData(t, data) + }) + } + } + } +} + +func TestDeleteAndRecreate(t *testing.T) { + ctx := context.Background() + // simulate race between delete/recreate and delete + // delete happens at t0+10, recreate at t0+20 and second delete time is parameterized. + // depending on it, the second delete results will be visible. + cases := []struct { + desc string + deletionTime time.Time + isVisible bool + }{ + {"deleted before delete and-recreate", fakeTime.Add(5 * time.Second), true}, + //{"deleted after delete and recreate", fakeTime.Add(25 * time.Second), false}, + } + + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + // write a block + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, fakeTimeNowFrozen(fakeTime)) + block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100)) + bm.Flush(ctx) + + // delete but at given timestamp but don't commit yet. + bm0 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(tc.deletionTime, 1*time.Second)) + bm0.DeleteBlock(block1) + + // delete it at t0+10 + bm1 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(fakeTime.Add(10*time.Second), 1*time.Second)) + verifyBlock(ctx, t, bm1, block1, seededRandomData(10, 100)) + bm1.DeleteBlock(block1) + bm1.Flush(ctx) + + // recreate at t0+20 + bm2 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(fakeTime.Add(20*time.Second), 1*time.Second)) + block2 := writeBlockAndVerify(ctx, t, bm2, seededRandomData(10, 100)) + bm2.Flush(ctx) + + // commit deletion from bm0 (t0+5) + bm0.Flush(ctx) + + //dumpBlockManagerData(t, data) + + if block1 != block2 { + t.Errorf("got invalid block %v, expected %v", block2, block1) + } + + bm3 := newTestBlockManager(data, keyTime, nil) + dumpBlockManagerData(t, data) + if tc.isVisible { + verifyBlock(ctx, t, bm3, block1, seededRandomData(10, 100)) + } else { + verifyBlockNotFound(ctx, t, bm3, block1) + } + }) + } +} + +func TestBlockWriteAliasing(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, fakeTimeNowFrozen(fakeTime)) + + blockData := []byte{100, 0, 0} + id1 := writeBlockAndVerify(ctx, t, bm, blockData) + blockData[0] = 101 + id2 := writeBlockAndVerify(ctx, t, bm, blockData) + bm.Flush(ctx) + blockData[0] = 102 + id3 := writeBlockAndVerify(ctx, t, bm, blockData) + blockData[0] = 103 + id4 := writeBlockAndVerify(ctx, t, bm, blockData) + verifyBlock(ctx, t, bm, id1, []byte{100, 0, 0}) + verifyBlock(ctx, t, bm, id2, []byte{101, 0, 0}) + verifyBlock(ctx, t, bm, id3, []byte{102, 0, 0}) + verifyBlock(ctx, t, bm, id4, []byte{103, 0, 0}) +} + +func TestBlockReadAliasing(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, fakeTimeNowFrozen(fakeTime)) + + blockData := []byte{100, 0, 0} + id1 := writeBlockAndVerify(ctx, t, bm, blockData) + blockData2, err := bm.GetBlock(ctx, id1) + if err != nil { + t.Fatalf("can't get block data: %v", err) + } + + blockData2[0]++ + verifyBlock(ctx, t, bm, id1, blockData) + bm.Flush(ctx) + verifyBlock(ctx, t, bm, id1, blockData) +} + +func TestVersionCompatibility(t *testing.T) { + for writeVer := minSupportedReadVersion; writeVer <= currentWriteVersion; writeVer++ { + t.Run(fmt.Sprintf("version-%v", writeVer), func(t *testing.T) { + verifyVersionCompat(t, writeVer) + }) + } +} + +func verifyVersionCompat(t *testing.T, writeVersion int) { + ctx := context.Background() + + // create block manager that writes 'writeVersion' and reads all versions >= minSupportedReadVersion + data := map[string][]byte{} + keyTime := map[string]time.Time{} + mgr := newTestBlockManager(data, keyTime, nil) + mgr.writeFormatVersion = int32(writeVersion) + + dataSet := map[string][]byte{} + + for i := 0; i < 3000000; i = (i + 1) * 2 { + data := make([]byte, i) + rand.Read(data) + + cid, err := mgr.WriteBlock(ctx, data, "") + if err != nil { + t.Fatalf("unable to write %v bytes: %v", len(data), err) + } + dataSet[cid] = data + } + verifyBlockManagerDataSet(ctx, t, mgr, dataSet) + + // delete random 3 items (map iteration order is random) + cnt := 0 + for blockID := range dataSet { + t.Logf("deleting %v", blockID) + mgr.DeleteBlock(blockID) + delete(dataSet, blockID) + cnt++ + if cnt >= 3 { + break + } + } + if err := mgr.Flush(ctx); err != nil { + t.Fatalf("failed to flush: %v", err) + } + + // create new manager that reads and writes using new version. + mgr = newTestBlockManager(data, keyTime, nil) + + // make sure we can read everything + verifyBlockManagerDataSet(ctx, t, mgr, dataSet) + + if err := mgr.CompactIndexes(ctx, CompactOptions{ + MinSmallBlocks: 1, + MaxSmallBlocks: 1, + }); err != nil { + t.Fatalf("unable to compact indexes: %v", err) + } + if err := mgr.Flush(ctx); err != nil { + t.Fatalf("failed to flush: %v", err) + } + verifyBlockManagerDataSet(ctx, t, mgr, dataSet) + + // now open one more manager + mgr = newTestBlockManager(data, keyTime, nil) + verifyBlockManagerDataSet(ctx, t, mgr, dataSet) +} + +func verifyBlockManagerDataSet(ctx context.Context, t *testing.T, mgr *Manager, dataSet map[string][]byte) { + for blockID, originalPayload := range dataSet { + v, err := mgr.GetBlock(ctx, blockID) + if err != nil { + t.Errorf("unable to read block %q: %v", blockID, err) + continue + } + + if !reflect.DeepEqual(v, originalPayload) { + t.Errorf("payload for %q does not match original: %v", v, originalPayload) + } + } +} + +func newTestBlockManager(data map[string][]byte, keyTime map[string]time.Time, timeFunc func() time.Time) *Manager { + //st = logging.NewWrapper(st) + if timeFunc == nil { + timeFunc = fakeTimeNowWithAutoAdvance(fakeTime, 1*time.Second) + } + st := storagetesting.NewMapStorage(data, keyTime, timeFunc) + bm, err := newManagerWithOptions(context.Background(), st, FormattingOptions{ + BlockFormat: "UNENCRYPTED_HMAC_SHA256", + HMACSecret: hmacSecret, + MaxPackSize: maxPackSize, + }, CachingOptions{}, timeFunc) + if err != nil { + panic("can't create block manager: " + err.Error()) + } + bm.checkInvariantsOnUnlock = true + return bm +} + +func getIndexCount(d map[string][]byte) int { + var cnt int + + for k := range d { + if strings.HasPrefix(k, newIndexBlockPrefix) { + cnt++ + } + } + + return cnt +} + +func fakeTimeNowFrozen(t time.Time) func() time.Time { + return fakeTimeNowWithAutoAdvance(t, 0) +} + +func fakeTimeNowWithAutoAdvance(t time.Time, dt time.Duration) func() time.Time { + var mu sync.Mutex + return func() time.Time { + mu.Lock() + defer mu.Unlock() + ret := t + t = t.Add(dt) + return ret + } +} + +func verifyBlockNotFound(ctx context.Context, t *testing.T, bm *Manager, blockID string) { + t.Helper() + + b, err := bm.GetBlock(ctx, blockID) + if err != storage.ErrBlockNotFound { + t.Errorf("unexpected response from GetBlock(%q), got %v,%v, expected %v", blockID, b, err, storage.ErrBlockNotFound) + } +} + +func verifyBlock(ctx context.Context, t *testing.T, bm *Manager, blockID string, b []byte) { + t.Helper() + + b2, err := bm.GetBlock(ctx, blockID) + if err != nil { + t.Errorf("unable to read block %q: %v", blockID, err) + return + } + + if got, want := b2, b; !reflect.DeepEqual(got, want) { + t.Errorf("block %q data mismatch: got %x (nil:%v), wanted %x (nil:%v)", blockID, got, got == nil, want, want == nil) + } + + bi, err := bm.BlockInfo(ctx, blockID) + if err != nil { + t.Errorf("error getting block info %q: %v", blockID, err) + } + + if got, want := bi.Length, uint32(len(b)); got != want { + t.Errorf("invalid block size for %q: %v, wanted %v", blockID, got, want) + } + +} +func writeBlockAndVerify(ctx context.Context, t *testing.T, bm *Manager, b []byte) string { + t.Helper() + + blockID, err := bm.WriteBlock(ctx, b, "") + if err != nil { + t.Errorf("err: %v", err) + } + + if got, want := blockID, string(hashValue(b)); got != want { + t.Errorf("invalid block ID for %x, got %v, want %v", b, got, want) + } + + verifyBlock(ctx, t, bm, blockID, b) + + return blockID +} + +func seededRandomData(seed int, length int) []byte { + b := make([]byte, length) + rnd := rand.New(rand.NewSource(int64(seed))) + rnd.Read(b) + return b +} + +func hashValue(b []byte) string { + h := hmac.New(sha256.New, hmacSecret) + h.Write(b) + return hex.EncodeToString(h.Sum(nil)) +} + +func dumpBlockManagerData(t *testing.T, data map[string][]byte) { + t.Helper() + for k, v := range data { + if k[0] == 'n' { + ndx, err := packindex.Open(bytes.NewReader(v)) + if err == nil { + t.Logf("index %v (%v bytes)", k, len(v)) + ndx.Iterate("", func(i packindex.Info) error { + t.Logf(" %+v\n", i) + return nil + }) + + } + } else { + t.Logf("data %v (%v bytes)\n", k, len(v)) + } + } +} diff --git a/block/cache_hmac.go b/block/cache_hmac.go new file mode 100644 index 000000000..73fb09908 --- /dev/null +++ b/block/cache_hmac.go @@ -0,0 +1,33 @@ +package block + +import "crypto/hmac" +import "crypto/sha256" +import "errors" + +func appendHMAC(data []byte, secret []byte) []byte { + h := hmac.New(sha256.New, secret) + h.Write(data) // nolint:errcheck + return h.Sum(data) +} + +func verifyAndStripHMAC(b []byte, secret []byte) ([]byte, error) { + if len(b) < sha256.Size { + return nil, errors.New("invalid data - too short") + } + + p := len(b) - sha256.Size + data := b[0:p] + signature := b[p:] + + h := hmac.New(sha256.New, secret) + h.Write(data) // nolint:errcheck + validSignature := h.Sum(nil) + if len(signature) != len(validSignature) { + return nil, errors.New("invalid signature length") + } + if hmac.Equal(validSignature, signature) { + return data, nil + } + + return nil, errors.New("invalid data - corrupted") +} diff --git a/block/caching_options.go b/block/caching_options.go new file mode 100644 index 000000000..bd4b92bf1 --- /dev/null +++ b/block/caching_options.go @@ -0,0 +1,10 @@ +package block + +// CachingOptions specifies configuration of local cache. +type CachingOptions struct { + CacheDirectory string `json:"cacheDirectory,omitempty"` + MaxCacheSizeBytes int64 `json:"maxCacheSize,omitempty"` + MaxListCacheDurationSec int `json:"maxListCacheDuration,omitempty"` + IgnoreListCache bool `json:"-"` + HMACSecret []byte `json:"-"` +} diff --git a/block/committed_block_index.go b/block/committed_block_index.go new file mode 100644 index 000000000..ddc6dac86 --- /dev/null +++ b/block/committed_block_index.go @@ -0,0 +1,139 @@ +package block + +import ( + "fmt" + "path/filepath" + "sync" + + "github.com/kopia/repo/internal/packindex" + "github.com/kopia/repo/storage" +) + +type committedBlockIndex struct { + cache committedBlockIndexCache + + mu sync.Mutex + inUse map[string]packindex.Index + merged packindex.Merged +} + +type committedBlockIndexCache interface { + hasIndexBlockID(indexBlockID string) (bool, error) + addBlockToCache(indexBlockID string, data []byte) error + openIndex(indexBlockID string) (packindex.Index, error) + expireUnused(used []string) error +} + +func (b *committedBlockIndex) getBlock(blockID string) (Info, error) { + b.mu.Lock() + defer b.mu.Unlock() + + info, err := b.merged.GetInfo(blockID) + if info != nil { + return *info, nil + } + if err == nil { + return Info{}, storage.ErrBlockNotFound + } + return Info{}, err +} + +func (b *committedBlockIndex) addBlock(indexBlockID string, data []byte, use bool) error { + if err := b.cache.addBlockToCache(indexBlockID, data); err != nil { + return err + } + + if !use { + return nil + } + + b.mu.Lock() + defer b.mu.Unlock() + + if b.inUse[indexBlockID] != nil { + return nil + } + + ndx, err := b.cache.openIndex(indexBlockID) + if err != nil { + return fmt.Errorf("unable to open pack index %q: %v", indexBlockID, err) + } + b.inUse[indexBlockID] = ndx + b.merged = append(b.merged, ndx) + return nil +} + +func (b *committedBlockIndex) listBlocks(prefix string, cb func(i Info) error) error { + b.mu.Lock() + m := append(packindex.Merged(nil), b.merged...) + b.mu.Unlock() + + return m.Iterate(prefix, cb) +} + +func (b *committedBlockIndex) packFilesChanged(packFiles []string) bool { + if len(packFiles) != len(b.inUse) { + return true + } + + for _, packFile := range packFiles { + if b.inUse[packFile] == nil { + return true + } + } + + return false +} + +func (b *committedBlockIndex) use(packFiles []string) (bool, error) { + b.mu.Lock() + defer b.mu.Unlock() + + if !b.packFilesChanged(packFiles) { + return false, nil + } + log.Debugf("set of index files has changed (had %v, now %v)", len(b.inUse), len(packFiles)) + + var newMerged packindex.Merged + newInUse := map[string]packindex.Index{} + defer func() { + newMerged.Close() //nolint:errcheck + }() + + for _, e := range packFiles { + ndx, err := b.cache.openIndex(e) + if err != nil { + return false, fmt.Errorf("unable to open pack index %q: %v", e, err) + } + + newMerged = append(newMerged, ndx) + newInUse[e] = ndx + } + b.merged = newMerged + b.inUse = newInUse + + if err := b.cache.expireUnused(packFiles); err != nil { + log.Warningf("unable to expire unused block index files: %v", err) + } + newMerged = nil + + return true, nil +} + +func newCommittedBlockIndex(caching CachingOptions) (*committedBlockIndex, error) { + var cache committedBlockIndexCache + + if caching.CacheDirectory != "" { + dirname := filepath.Join(caching.CacheDirectory, "indexes") + cache = &diskCommittedBlockIndexCache{dirname} + } else { + cache = &memoryCommittedBlockIndexCache{ + blocks: map[string]packindex.Index{}, + } + } + + return &committedBlockIndex{ + cache: cache, + inUse: map[string]packindex.Index{}, + }, nil +} diff --git a/block/committed_block_index_disk_cache.go b/block/committed_block_index_disk_cache.go new file mode 100644 index 000000000..909a666bf --- /dev/null +++ b/block/committed_block_index_disk_cache.go @@ -0,0 +1,135 @@ +package block + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strings" + "time" + + "github.com/kopia/repo/internal/packindex" + "golang.org/x/exp/mmap" +) + +const ( + simpleIndexSuffix = ".sndx" + unusedCommittedBlockIndexCleanupTime = 1 * time.Hour // delete unused committed index blocks after 1 hour +) + +type diskCommittedBlockIndexCache struct { + dirname string +} + +func (c *diskCommittedBlockIndexCache) indexBlockPath(indexBlockID string) string { + return filepath.Join(c.dirname, indexBlockID+simpleIndexSuffix) +} + +func (c *diskCommittedBlockIndexCache) openIndex(indexBlockID string) (packindex.Index, error) { + fullpath := c.indexBlockPath(indexBlockID) + + f, err := mmap.Open(fullpath) + if err != nil { + return nil, err + } + + return packindex.Open(f) +} + +func (c *diskCommittedBlockIndexCache) hasIndexBlockID(indexBlockID string) (bool, error) { + _, err := os.Stat(c.indexBlockPath(indexBlockID)) + if err == nil { + return true, nil + } + if os.IsNotExist(err) { + return false, nil + } + + return false, err +} + +func (c *diskCommittedBlockIndexCache) addBlockToCache(indexBlockID string, data []byte) error { + exists, err := c.hasIndexBlockID(indexBlockID) + if err != nil { + return err + } + + if exists { + return nil + } + + tmpFile, err := writeTempFileAtomic(c.dirname, data) + if err != nil { + return err + } + + // rename() is atomic, so one process will succeed, but the other will fail + if err := os.Rename(tmpFile, c.indexBlockPath(indexBlockID)); err != nil { + // verify that the block exists + exists, err := c.hasIndexBlockID(indexBlockID) + if err != nil { + return err + } + if !exists { + return fmt.Errorf("unsuccessful index write of block %q", indexBlockID) + } + } + + return nil +} + +func writeTempFileAtomic(dirname string, data []byte) (string, error) { + // write to a temp file to avoid race where two processes are writing at the same time. + tf, err := ioutil.TempFile(dirname, "tmp") + if err != nil { + if os.IsNotExist(err) { + os.MkdirAll(dirname, 0700) //nolint:errcheck + tf, err = ioutil.TempFile(dirname, "tmp") + } + } + if err != nil { + return "", fmt.Errorf("can't create tmp file: %v", err) + } + + if _, err := tf.Write(data); err != nil { + return "", fmt.Errorf("can't write to temp file: %v", err) + } + if err := tf.Close(); err != nil { + return "", fmt.Errorf("can't close tmp file") + } + + return tf.Name(), nil +} + +func (c *diskCommittedBlockIndexCache) expireUnused(used []string) error { + entries, err := ioutil.ReadDir(c.dirname) + if err != nil { + return fmt.Errorf("can't list cache: %v", err) + } + + remaining := map[string]os.FileInfo{} + + for _, ent := range entries { + if strings.HasSuffix(ent.Name(), simpleIndexSuffix) { + n := strings.TrimSuffix(ent.Name(), simpleIndexSuffix) + remaining[n] = ent + } + } + + for _, u := range used { + delete(remaining, u) + } + + for _, rem := range remaining { + if time.Since(rem.ModTime()) > unusedCommittedBlockIndexCleanupTime { + log.Debugf("removing unused %v %v", rem.Name(), rem.ModTime()) + if err := os.Remove(filepath.Join(c.dirname, rem.Name())); err != nil { + log.Warningf("unable to remove unused index file: %v", err) + } + } else { + log.Debugf("keeping unused %v because it's too new %v", rem.Name(), rem.ModTime()) + } + } + + return nil +} diff --git a/block/committed_block_index_mem_cache.go b/block/committed_block_index_mem_cache.go new file mode 100644 index 000000000..b68d59a5c --- /dev/null +++ b/block/committed_block_index_mem_cache.go @@ -0,0 +1,50 @@ +package block + +import ( + "bytes" + "fmt" + "sync" + + "github.com/kopia/repo/internal/packindex" +) + +type memoryCommittedBlockIndexCache struct { + mu sync.Mutex + blocks map[string]packindex.Index +} + +func (m *memoryCommittedBlockIndexCache) hasIndexBlockID(indexBlockID string) (bool, error) { + m.mu.Lock() + defer m.mu.Unlock() + + return m.blocks[indexBlockID] != nil, nil +} + +func (m *memoryCommittedBlockIndexCache) addBlockToCache(indexBlockID string, data []byte) error { + m.mu.Lock() + defer m.mu.Unlock() + + ndx, err := packindex.Open(bytes.NewReader(data)) + if err != nil { + return err + } + + m.blocks[indexBlockID] = ndx + return nil +} + +func (m *memoryCommittedBlockIndexCache) openIndex(indexBlockID string) (packindex.Index, error) { + m.mu.Lock() + defer m.mu.Unlock() + + v := m.blocks[indexBlockID] + if v == nil { + return nil, fmt.Errorf("block not found in cache: %v", indexBlockID) + } + + return v, nil +} + +func (m *memoryCommittedBlockIndexCache) expireUnused(used []string) error { + return nil +} diff --git a/block/context.go b/block/context.go new file mode 100644 index 000000000..b7f22abd2 --- /dev/null +++ b/block/context.go @@ -0,0 +1,34 @@ +package block + +import "context" + +type contextKey string + +var useBlockCacheContextKey contextKey = "use-block-cache" +var useListCacheContextKey contextKey = "use-list-cache" + +// UsingBlockCache returns a derived context that causes block manager to use cache. +func UsingBlockCache(ctx context.Context, enabled bool) context.Context { + return context.WithValue(ctx, useBlockCacheContextKey, enabled) +} + +// UsingListCache returns a derived context that causes block manager to use cache. +func UsingListCache(ctx context.Context, enabled bool) context.Context { + return context.WithValue(ctx, useListCacheContextKey, enabled) +} + +func shouldUseBlockCache(ctx context.Context) bool { + if enabled, ok := ctx.Value(useBlockCacheContextKey).(bool); ok { + return enabled + } + + return true +} + +func shouldUseListCache(ctx context.Context) bool { + if enabled, ok := ctx.Value(useListCacheContextKey).(bool); ok { + return enabled + } + + return true +} diff --git a/block/list_cache.go b/block/list_cache.go new file mode 100644 index 000000000..f5a44cdcf --- /dev/null +++ b/block/list_cache.go @@ -0,0 +1,123 @@ +package block + +import ( + "context" + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "time" + + "github.com/kopia/repo/storage" +) + +type listCache struct { + st storage.Storage + cacheFile string + listCacheDuration time.Duration + hmacSecret []byte +} + +func (c *listCache) listIndexBlocks(ctx context.Context) ([]IndexInfo, error) { + if c.cacheFile != "" { + ci, err := c.readBlocksFromCache(ctx) + if err == nil { + expirationTime := ci.Timestamp.Add(c.listCacheDuration) + if time.Now().Before(expirationTime) { + log.Debugf("retrieved list of index blocks from cache") + return ci.Blocks, nil + } + } else if err != storage.ErrBlockNotFound { + log.Warningf("unable to open cache file: %v", err) + } + } + + blocks, err := listIndexBlocksFromStorage(ctx, c.st) + if err == nil { + c.saveListToCache(ctx, &cachedList{ + Blocks: blocks, + Timestamp: time.Now(), + }) + } + log.Debugf("found %v index blocks from source", len(blocks)) + + return blocks, err +} + +func (c *listCache) saveListToCache(ctx context.Context, ci *cachedList) { + if c.cacheFile == "" { + return + } + log.Debugf("saving index blocks to cache: %v", len(ci.Blocks)) + if data, err := json.Marshal(ci); err == nil { + mySuffix := fmt.Sprintf(".tmp-%v-%v", os.Getpid(), time.Now().UnixNano()) + if err := ioutil.WriteFile(c.cacheFile+mySuffix, appendHMAC(data, c.hmacSecret), 0600); err != nil { + log.Warningf("unable to write list cache: %v", err) + } + os.Rename(c.cacheFile+mySuffix, c.cacheFile) //nolint:errcheck + os.Remove(c.cacheFile + mySuffix) //nolint:errcheck + } +} + +func (c *listCache) deleteListCache(ctx context.Context) { + if c.cacheFile != "" { + os.Remove(c.cacheFile) //nolint:errcheck + } +} + +func (c *listCache) readBlocksFromCache(ctx context.Context) (*cachedList, error) { + if !shouldUseListCache(ctx) { + return nil, storage.ErrBlockNotFound + } + + ci := &cachedList{} + + data, err := ioutil.ReadFile(c.cacheFile) + if err != nil { + if os.IsNotExist(err) { + return nil, storage.ErrBlockNotFound + } + + return nil, err + } + + data, err = verifyAndStripHMAC(data, c.hmacSecret) + if err != nil { + return nil, fmt.Errorf("invalid file %v: %v", c.cacheFile, err) + } + + if err := json.Unmarshal(data, &ci); err != nil { + return nil, fmt.Errorf("can't unmarshal cached list results: %v", err) + } + + return ci, nil + +} + +func newListCache(ctx context.Context, st storage.Storage, caching CachingOptions) (*listCache, error) { + var listCacheFile string + + if caching.CacheDirectory != "" { + listCacheFile = filepath.Join(caching.CacheDirectory, "list") + + if _, err := os.Stat(caching.CacheDirectory); os.IsNotExist(err) { + if err := os.MkdirAll(caching.CacheDirectory, 0700); err != nil { + return nil, err + } + } + } + + c := &listCache{ + st: st, + cacheFile: listCacheFile, + hmacSecret: caching.HMACSecret, + listCacheDuration: time.Duration(caching.MaxListCacheDurationSec) * time.Second, + } + + if caching.IgnoreListCache { + c.deleteListCache(ctx) + } + + return c, nil +} diff --git a/block/stats.go b/block/stats.go new file mode 100644 index 000000000..b1483506f --- /dev/null +++ b/block/stats.go @@ -0,0 +1,25 @@ +package block + +// Stats exposes statistics about block operation. +type Stats struct { + // Keep int64 fields first to ensure they get aligned to at least 64-bit boundaries + // which is required for atomic access on ARM and x86-32. + ReadBytes int64 `json:"readBytes,omitempty"` + WrittenBytes int64 `json:"writtenBytes,omitempty"` + DecryptedBytes int64 `json:"decryptedBytes,omitempty"` + EncryptedBytes int64 `json:"encryptedBytes,omitempty"` + HashedBytes int64 `json:"hashedBytes,omitempty"` + + ReadBlocks int32 `json:"readBlocks,omitempty"` + WrittenBlocks int32 `json:"writtenBlocks,omitempty"` + CheckedBlocks int32 `json:"checkedBlocks,omitempty"` + HashedBlocks int32 `json:"hashedBlocks,omitempty"` + InvalidBlocks int32 `json:"invalidBlocks,omitempty"` + PresentBlocks int32 `json:"presentBlocks,omitempty"` + ValidBlocks int32 `json:"validBlocks,omitempty"` +} + +// Reset clears all repository statistics. +func (s *Stats) Reset() { + *s = Stats{} +} diff --git a/connect.go b/connect.go new file mode 100644 index 000000000..948218975 --- /dev/null +++ b/connect.go @@ -0,0 +1,111 @@ +package repo + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path/filepath" + + "github.com/kopia/repo/block" + "github.com/kopia/repo/storage" +) + +// ConnectOptions specifies options when persisting configuration to connect to a repository. +type ConnectOptions struct { + block.CachingOptions +} + +// Connect connects to the repository in the specified storage and persists the configuration and credentials in the file provided. +func Connect(ctx context.Context, configFile string, st storage.Storage, password string, opt ConnectOptions) error { + formatBytes, err := st.GetBlock(ctx, FormatBlockID, 0, -1) + if err != nil { + return fmt.Errorf("unable to read format block: %v", err) + } + + f, err := parseFormatBlock(formatBytes) + if err != nil { + return err + } + + var lc LocalConfig + lc.Storage = st.ConnectionInfo() + + if err = setupCaching(configFile, &lc, opt.CachingOptions, f.UniqueID); err != nil { + return fmt.Errorf("unable to set up caching: %v", err) + } + + d, err := json.MarshalIndent(&lc, "", " ") + if err != nil { + return err + } + + if err = os.MkdirAll(filepath.Dir(configFile), 0700); err != nil { + return fmt.Errorf("unable to create config directory: %v", err) + } + + if err = ioutil.WriteFile(configFile, d, 0600); err != nil { + return fmt.Errorf("unable to write config file: %v", err) + } + + // now verify that the repository can be opened with the provided config file. + r, err := Open(ctx, configFile, password, nil) + if err != nil { + return err + } + + return r.Close(ctx) +} + +func setupCaching(configPath string, lc *LocalConfig, opt block.CachingOptions, uniqueID []byte) error { + if opt.MaxCacheSizeBytes == 0 { + lc.Caching = block.CachingOptions{} + return nil + } + + if opt.CacheDirectory == "" { + cacheDir, err := os.UserCacheDir() + if err != nil { + return fmt.Errorf("unable to determine cache directory: %v", err) + } + + h := sha256.New() + h.Write(uniqueID) + h.Write([]byte(configPath)) + lc.Caching.CacheDirectory = filepath.Join(cacheDir, hex.EncodeToString(h.Sum(nil))[0:16]) + } else { + absCacheDir, err := filepath.Abs(opt.CacheDirectory) + if err != nil { + return err + } + + lc.Caching.CacheDirectory = absCacheDir + } + lc.Caching.MaxCacheSizeBytes = opt.MaxCacheSizeBytes + lc.Caching.MaxListCacheDurationSec = opt.MaxListCacheDurationSec + + log.Debugf("Creating cache directory '%v' with max size %v", lc.Caching.CacheDirectory, lc.Caching.MaxCacheSizeBytes) + if err := os.MkdirAll(lc.Caching.CacheDirectory, 0700); err != nil { + log.Warningf("unablet to create cache directory: %v", err) + } + return nil +} + +// Disconnect removes the specified configuration file and any local cache directories. +func Disconnect(configFile string) error { + cfg, err := loadConfigFromFile(configFile) + if err != nil { + return err + } + + if cfg.Caching.CacheDirectory != "" { + if err = os.RemoveAll(cfg.Caching.CacheDirectory); err != nil { + log.Warningf("unable to to remove cache directory: %v", err) + } + } + + return os.Remove(configFile) +} diff --git a/crypto_key_derivation.go b/crypto_key_derivation.go new file mode 100644 index 000000000..593a82858 --- /dev/null +++ b/crypto_key_derivation.go @@ -0,0 +1,37 @@ +package repo + +import ( + "crypto/sha256" + "fmt" + "io" + + "golang.org/x/crypto/hkdf" + "golang.org/x/crypto/pbkdf2" + "golang.org/x/crypto/scrypt" +) + +// DefaultKeyDerivationAlgorithm is the key derivation algorithm for new configurations. +const DefaultKeyDerivationAlgorithm = "scrypt-65536-8-1" + +func (f formatBlock) deriveMasterKeyFromPassword(password string) ([]byte, error) { + const masterKeySize = 32 + + switch f.KeyDerivationAlgorithm { + case "pbkdf2-sha256-100000": + return pbkdf2.Key([]byte(password), f.UniqueID, 100000, masterKeySize, sha256.New), nil + + case "scrypt-65536-8-1": + return scrypt.Key([]byte(password), f.UniqueID, 65536, 8, 1, masterKeySize) + + default: + return nil, fmt.Errorf("unsupported key algorithm: %v", f.KeyDerivationAlgorithm) + } +} + +// deriveKeyFromMasterKey computes a key for a specific purpose and length using HKDF based on the master key. +func deriveKeyFromMasterKey(masterKey, uniqueID, purpose []byte, length int) []byte { + key := make([]byte, length) + k := hkdf.New(sha256.New, masterKey, uniqueID, purpose) + io.ReadFull(k, key) //nolint:errcheck + return key +} diff --git a/doc.go b/doc.go new file mode 100644 index 000000000..f5f82361d --- /dev/null +++ b/doc.go @@ -0,0 +1,2 @@ +// Package repo implements content-addressable Repository on top of BLOB storage. +package repo diff --git a/format_block.go b/format_block.go new file mode 100644 index 000000000..555e53e41 --- /dev/null +++ b/format_block.go @@ -0,0 +1,152 @@ +package repo + +import ( + "bytes" + "context" + "crypto/aes" + "crypto/cipher" + "crypto/rand" + "encoding/json" + "fmt" + "io" + + "github.com/kopia/repo/storage" +) + +// FormatBlockID is the identifier of a storage block that describes repository format. +const FormatBlockID = "kopia.repository" + +var ( + purposeAESKey = []byte("AES") + purposeAuthData = []byte("CHECKSUM") +) + +type formatBlock struct { + Tool string `json:"tool"` + BuildVersion string `json:"buildVersion"` + BuildInfo string `json:"buildInfo"` + + UniqueID []byte `json:"uniqueID"` + KeyDerivationAlgorithm string `json:"keyAlgo"` + + Version string `json:"version"` + EncryptionAlgorithm string `json:"encryption"` + EncryptedFormatBytes []byte `json:"encryptedBlockFormat,omitempty"` + UnencryptedFormat *repositoryObjectFormat `json:"blockFormat,omitempty"` +} + +// encryptedRepositoryConfig contains the configuration of repository that's persisted in encrypted format. +type encryptedRepositoryConfig struct { + Format repositoryObjectFormat `json:"format"` +} + +func parseFormatBlock(b []byte) (*formatBlock, error) { + f := &formatBlock{} + + if err := json.Unmarshal(b, &f); err != nil { + return nil, fmt.Errorf("invalid format block: %v", err) + } + + return f, nil +} + +func writeFormatBlock(ctx context.Context, st storage.Storage, f *formatBlock) error { + var buf bytes.Buffer + e := json.NewEncoder(&buf) + e.SetIndent("", " ") + if err := e.Encode(f); err != nil { + return fmt.Errorf("unable to marshal format block: %v", err) + } + + if err := st.PutBlock(ctx, FormatBlockID, buf.Bytes()); err != nil { + return fmt.Errorf("unable to write format block: %v", err) + } + + return nil +} + +func (f *formatBlock) decryptFormatBytes(masterKey []byte) (*repositoryObjectFormat, error) { + switch f.EncryptionAlgorithm { + case "NONE": // do nothing + return f.UnencryptedFormat, nil + + case "AES256_GCM": + aead, authData, err := initCrypto(masterKey, f.UniqueID) + if err != nil { + return nil, fmt.Errorf("cannot initialize cipher: %v", err) + } + + content := append([]byte(nil), f.EncryptedFormatBytes...) + if len(content) < aead.NonceSize() { + return nil, fmt.Errorf("invalid encrypted payload, too short") + } + nonce := content[0:aead.NonceSize()] + payload := content[aead.NonceSize():] + + plainText, err := aead.Open(payload[:0], nonce, payload, authData) + if err != nil { + return nil, fmt.Errorf("unable to decrypt repository format, invalid credentials?") + } + + var erc encryptedRepositoryConfig + if err := json.Unmarshal(plainText, &erc); err != nil { + return nil, fmt.Errorf("invalid repository format: %v", err) + } + + return &erc.Format, nil + + default: + return nil, fmt.Errorf("unknown encryption algorithm: '%v'", f.EncryptionAlgorithm) + } +} + +func initCrypto(masterKey, repositoryID []byte) (cipher.AEAD, []byte, error) { + aesKey := deriveKeyFromMasterKey(masterKey, repositoryID, purposeAESKey, 32) + authData := deriveKeyFromMasterKey(masterKey, repositoryID, purposeAuthData, 32) + + blk, err := aes.NewCipher(aesKey) + if err != nil { + return nil, nil, fmt.Errorf("cannot create cipher: %v", err) + } + aead, err := cipher.NewGCM(blk) + if err != nil { + return nil, nil, fmt.Errorf("cannot create cipher: %v", err) + } + + return aead, authData, nil +} + +func encryptFormatBytes(f *formatBlock, format *repositoryObjectFormat, masterKey, repositoryID []byte) error { + switch f.EncryptionAlgorithm { + case "NONE": + f.UnencryptedFormat = format + return nil + + case "AES256_GCM": + content, err := json.Marshal(&encryptedRepositoryConfig{Format: *format}) + if err != nil { + return fmt.Errorf("can't marshal format to JSON: %v", err) + } + aead, authData, err := initCrypto(masterKey, repositoryID) + if err != nil { + return fmt.Errorf("unable to initialize crypto: %v", err) + } + nonceLength := aead.NonceSize() + noncePlusContentLength := nonceLength + len(content) + cipherText := make([]byte, noncePlusContentLength+aead.Overhead()) + + // Store nonce at the beginning of ciphertext. + nonce := cipherText[0:nonceLength] + if _, err := io.ReadFull(rand.Reader, nonce); err != nil { + return err + } + + b := aead.Seal(cipherText[nonceLength:nonceLength], nonce, content, authData) + content = nonce[0 : nonceLength+len(b)] + f.EncryptedFormatBytes = content + return nil + + default: + return fmt.Errorf("unknown encryption algorithm: '%v'", f.EncryptionAlgorithm) + } +} diff --git a/initialize.go b/initialize.go new file mode 100644 index 000000000..ec7e87b9f --- /dev/null +++ b/initialize.go @@ -0,0 +1,134 @@ +package repo + +import ( + "context" + "crypto/rand" + "fmt" + "io" + + "github.com/kopia/repo/block" + "github.com/kopia/repo/object" + "github.com/kopia/repo/storage" +) + +// BuildInfo is the build information of Kopia. +var ( + BuildInfo = "unknown" + BuildVersion = "v0-unofficial" +) + +// DefaultEncryptionAlgorithm is the default algorithm for encrypting format block. +var DefaultEncryptionAlgorithm = "AES256_GCM" + +// SupportedEncryptionAlgorithms lists all supported algorithms for encrypting format block. +var SupportedEncryptionAlgorithms = []string{DefaultEncryptionAlgorithm, "NONE"} + +// NewRepositoryOptions specifies options that apply to newly created repositories. +// All fields are optional, when not provided, reasonable defaults will be used. +type NewRepositoryOptions struct { + UniqueID []byte // force the use of particular unique ID for metadata manager + MetadataEncryptionAlgorithm string // identifier of encryption algorithm + KeyDerivationAlgorithm string // identifier of key derivation algorithm + + BlockFormat block.FormattingOptions + DisableHMAC bool + + ObjectFormat object.Format // object format +} + +// Initialize creates initial repository data structures in the specified storage with given credentials. +func Initialize(ctx context.Context, st storage.Storage, opt *NewRepositoryOptions, password string) error { + if opt == nil { + opt = &NewRepositoryOptions{} + } + + // get the block - expect ErrBlockNotFound + _, err := st.GetBlock(ctx, FormatBlockID, 0, -1) + if err == nil { + return fmt.Errorf("repository already initialized") + } + if err != storage.ErrBlockNotFound { + return err + } + + format := formatBlockFromOptions(opt) + masterKey, err := format.deriveMasterKeyFromPassword(password) + if err != nil { + return err + } + + if err := encryptFormatBytes(format, repositoryObjectFormatFromOptions(opt), masterKey, format.UniqueID); err != nil { + return err + } + + if err := writeFormatBlock(ctx, st, format); err != nil { + return err + } + + return nil +} + +func formatBlockFromOptions(opt *NewRepositoryOptions) *formatBlock { + return &formatBlock{ + Tool: "https://github.com/kopia/kopia", + BuildInfo: BuildInfo, + KeyDerivationAlgorithm: applyDefaultString(opt.KeyDerivationAlgorithm, DefaultKeyDerivationAlgorithm), + UniqueID: applyDefaultRandomBytes(opt.UniqueID, 32), + Version: "1", + EncryptionAlgorithm: applyDefaultString(opt.MetadataEncryptionAlgorithm, DefaultEncryptionAlgorithm), + } +} + +func repositoryObjectFormatFromOptions(opt *NewRepositoryOptions) *repositoryObjectFormat { + f := &repositoryObjectFormat{ + FormattingOptions: block.FormattingOptions{ + Version: 1, + BlockFormat: applyDefaultString(opt.BlockFormat.BlockFormat, block.DefaultFormat), + HMACSecret: applyDefaultRandomBytes(opt.BlockFormat.HMACSecret, 32), + MasterKey: applyDefaultRandomBytes(opt.BlockFormat.MasterKey, 32), + MaxPackSize: applyDefaultInt(opt.BlockFormat.MaxPackSize, applyDefaultInt(opt.ObjectFormat.MaxBlockSize, 20<<20)), // 20 MB + }, + Format: object.Format{ + Splitter: applyDefaultString(opt.ObjectFormat.Splitter, object.DefaultSplitter), + MaxBlockSize: applyDefaultInt(opt.ObjectFormat.MaxBlockSize, 20<<20), // 20MiB + MinBlockSize: applyDefaultInt(opt.ObjectFormat.MinBlockSize, 10<<20), // 10MiB + AvgBlockSize: applyDefaultInt(opt.ObjectFormat.AvgBlockSize, 16<<20), // 16MiB + }, + } + + if opt.DisableHMAC { + f.HMACSecret = nil + } + + return f +} + +func randomBytes(n int) []byte { + b := make([]byte, n) + io.ReadFull(rand.Reader, b) //nolint:errcheck + return b +} + +func applyDefaultInt(v, def int) int { + if v == 0 { + return def + } + + return v +} + +func applyDefaultString(v, def string) string { + if v == "" { + return def + } + + return v +} + +func applyDefaultRandomBytes(b []byte, n int) []byte { + if b == nil { + return randomBytes(n) + } + + return b +} diff --git a/internal/jsonstream/doc.go b/internal/jsonstream/doc.go new file mode 100644 index 000000000..157858468 --- /dev/null +++ b/internal/jsonstream/doc.go @@ -0,0 +1,2 @@ +// Package jsonstream implements streaming reader and writer for JSON objects. +package jsonstream diff --git a/internal/jsonstream/reader.go b/internal/jsonstream/reader.go new file mode 100644 index 000000000..c1ddb2fbc --- /dev/null +++ b/internal/jsonstream/reader.go @@ -0,0 +1,112 @@ +package jsonstream + +import ( + "encoding/json" + "fmt" + "io" +) + +// Reader reads a stream of JSON objects. +type Reader struct { + decoder *json.Decoder + summary interface{} +} + +// Read reads the next JSON objects from the stream, returns io.EOF on the end of stream. +func (r *Reader) Read(v interface{}) error { + if r.decoder.More() { + return r.decoder.Decode(v) + } + + if err := ensureDelimiter(r.decoder, json.Delim(']')); err != nil { + return invalidStreamFormatError(err) + } + + tok, err := r.decoder.Token() + if err != nil { + return invalidStreamFormatError(err) + } + + switch tok { + case json.Delim('}'): + // end of stream, all good + return io.EOF + + case "summary": + s := r.summary + if s == nil { + s = map[string]interface{}{} + } + if err := r.decoder.Decode(s); err != nil { + return invalidStreamFormatError(err) + } + } + + if err := ensureDelimiter(r.decoder, json.Delim('}')); err != nil { + return invalidStreamFormatError(err) + } + + return io.EOF +} + +func ensureDelimiter(d *json.Decoder, expected json.Delim) error { + t, err := d.Token() + if err != nil { + return err + } + + if t != expected { + return fmt.Errorf("expected '%v', got %v", expected.String(), t) + } + + return nil +} +func ensureStringToken(d *json.Decoder, expected string) error { + t, err := d.Token() + if err != nil { + return err + } + + if s, ok := t.(string); ok { + if s == expected { + return nil + } + } + + return fmt.Errorf("expected '%v', got '%v'", expected, t) +} + +func invalidStreamFormatError(cause error) error { + return fmt.Errorf("invalid stream format: %v", cause) +} + +// NewReader returns new Reader on top of a given buffered reader. +// The provided header must match the beginning of a stream. +func NewReader(r io.Reader, header string, summary interface{}) (*Reader, error) { + dr := Reader{ + decoder: json.NewDecoder(r), + summary: summary, + } + + if err := ensureDelimiter(dr.decoder, json.Delim('{')); err != nil { + return nil, invalidStreamFormatError(err) + } + + if err := ensureStringToken(dr.decoder, "stream"); err != nil { + return nil, invalidStreamFormatError(err) + } + + if err := ensureStringToken(dr.decoder, header); err != nil { + return nil, invalidStreamFormatError(err) + } + + if err := ensureStringToken(dr.decoder, "entries"); err != nil { + return nil, invalidStreamFormatError(err) + } + + if err := ensureDelimiter(dr.decoder, json.Delim('[')); err != nil { + return nil, invalidStreamFormatError(err) + } + + return &dr, nil +} diff --git a/internal/jsonstream/stream_test.go b/internal/jsonstream/stream_test.go new file mode 100644 index 000000000..c7e24d633 --- /dev/null +++ b/internal/jsonstream/stream_test.go @@ -0,0 +1,117 @@ +package jsonstream + +import ( + "bufio" + "bytes" + "io" + "log" + "strings" + "testing" +) + +type TestObj struct { + Name string `json:"name,omitempty"` +} +type TestSummary struct { + Value int `json:"val"` +} + +var testHeader1 = "01234567" +var testHeader2 = "0123456x" + +func TestStream(t *testing.T) { + var buf bytes.Buffer + + data := []TestObj{ + TestObj{Name: "foo"}, + TestObj{Name: "bar"}, + TestObj{Name: "baz"}, + } + + w := NewWriter(&buf, testHeader1) + for _, d := range data { + if err := w.Write(&d); err != nil { + t.Errorf("write error: %v", err) + } + } + w.Finalize() + log.Printf("wrote: %v", buf.String()) + r, err := NewReader(bufio.NewReader(&buf), testHeader1, nil) + if err != nil { + t.Errorf("err: %v", err) + return + } + for _, d := range data { + v := &TestObj{} + if readerr := r.Read(v); readerr != nil { + t.Errorf("read error: %v", readerr) + } + if v.Name != d.Name { + t.Errorf("invalid value: '%v', expected '%v'", v.Name, d.Name) + } + } + v := &TestObj{} + err = r.Read(v) + if err != io.EOF { + t.Errorf("expected EOF, got %v", err) + } +} + +func TestStreamWithSummary(t *testing.T) { + var buf bytes.Buffer + + data := []TestObj{ + TestObj{Name: "foo"}, + TestObj{Name: "bar"}, + TestObj{Name: "baz"}, + } + + w := NewWriter(&buf, testHeader1) + for _, d := range data { + if err := w.Write(&d); err != nil { + t.Errorf("write error: %v", err) + } + } + w.FinalizeWithSummary(TestSummary{Value: 123}) + log.Printf("wrote: %v", buf.String()) + + var summary TestSummary + r, err := NewReader(bufio.NewReader(&buf), testHeader1, &summary) + if err != nil { + t.Errorf("err: %v", err) + return + } + for _, d := range data { + v := &TestObj{} + if readerr := r.Read(v); readerr != nil { + t.Errorf("read error: %v", readerr) + } + if v.Name != d.Name { + t.Errorf("invalid value: '%v', expected '%v'", v.Name, d.Name) + } + } + v := &TestObj{} + err = r.Read(v) + if err != io.EOF { + t.Errorf("expected EOF, got %v", err) + } + if got, want := summary.Value, 123; got != want { + t.Errorf("unexpected summary value: %v, wanted %v", got, want) + } +} + +func TestInvalidHeader(t *testing.T) { + var buf bytes.Buffer + + w := NewWriter(&buf, testHeader1) + if err := w.Write(&TestObj{Name: "foo"}); err != nil { + t.Errorf("write error: %v", err) + } + + _, err := NewReader(bufio.NewReader(&buf), testHeader2, nil) + if err == nil { + t.Errorf("expected error, got none") + } else if !strings.Contains(err.Error(), "invalid stream format") { + t.Errorf("got incorrect error: %v", err) + } +} diff --git a/internal/jsonstream/writer.go b/internal/jsonstream/writer.go new file mode 100644 index 000000000..a79049bca --- /dev/null +++ b/internal/jsonstream/writer.go @@ -0,0 +1,59 @@ +package jsonstream + +import ( + "encoding/json" + "fmt" + "io" +) + +var commaBytes = []byte(",\n") + +// Writer writes a stream of JSON objects. +type Writer struct { + output io.Writer + header string + separator []byte +} + +// Write JSON object to the output. +func (w *Writer) Write(v interface{}) error { + if _, err := w.output.Write(w.separator); err != nil { + return err + } + j, err := json.Marshal(v) + if err != nil { + return err + } + // log.Printf("*** %v: %v", w.header, string(j)) + if _, err := w.output.Write(j); err != nil { + return err + } + w.separator = commaBytes + + return nil +} + +// FinalizeWithSummary writes the postamble to the JSON stream with a given summary object. +func (w *Writer) FinalizeWithSummary(summary interface{}) error { + b, err := json.Marshal(summary) + if err != nil { + return err + } + _, err = fmt.Fprintf(w.output, "\n],\"summary\":%v}", string(b)) + return err +} + +// Finalize writes the postamble to the JSON stream. +func (w *Writer) Finalize() error { + _, err := fmt.Fprintf(w.output, "\n]}") + return err +} + +// NewWriter creates a new Writer on top of a specified writer with a specified optional header. +func NewWriter(w io.Writer, header string) *Writer { + fmt.Fprintf(w, "{\"stream\":\"%v\",\"entries\":[\n", header) //nolint:errcheck + return &Writer{ + header: header, + output: w, + } +} diff --git a/internal/packindex/builder.go b/internal/packindex/builder.go new file mode 100644 index 000000000..bc71c51f2 --- /dev/null +++ b/internal/packindex/builder.go @@ -0,0 +1,152 @@ +package packindex + +import ( + "bufio" + "encoding/binary" + "fmt" + "io" + "sort" +) + +// Builder prepares and writes block index for writing. +type Builder map[string]*Info + +// Add adds a new entry to the builder or conditionally replaces it if the timestamp is greater. +func (b Builder) Add(i Info) { + old, ok := b[i.BlockID] + if !ok || i.TimestampSeconds >= old.TimestampSeconds { + b[i.BlockID] = &i + } +} + +func (b Builder) sortedBlocks() []*Info { + var allBlocks []*Info + + for _, v := range b { + allBlocks = append(allBlocks, v) + } + + sort.Slice(allBlocks, func(i, j int) bool { + return allBlocks[i].BlockID < allBlocks[j].BlockID + }) + + return allBlocks +} + +type indexLayout struct { + packFileOffsets map[string]uint32 + entryCount int + keyLength int + entryLength int + extraDataOffset uint32 +} + +// Build writes the pack index to the provided output. +func (b Builder) Build(output io.Writer) error { + allBlocks := b.sortedBlocks() + layout := &indexLayout{ + packFileOffsets: map[string]uint32{}, + keyLength: -1, + entryLength: 20, + entryCount: len(allBlocks), + } + + w := bufio.NewWriter(output) + + // prepare extra data to be appended at the end of an index. + extraData := prepareExtraData(allBlocks, layout) + + // write header + header := make([]byte, 8) + header[0] = 1 // version + header[1] = byte(layout.keyLength) + binary.BigEndian.PutUint16(header[2:4], uint16(layout.entryLength)) + binary.BigEndian.PutUint32(header[4:8], uint32(layout.entryCount)) + if _, err := w.Write(header); err != nil { + return fmt.Errorf("unable to write header: %v", err) + } + + // write all sorted blocks. + entry := make([]byte, layout.entryLength) + for _, it := range allBlocks { + if err := writeEntry(w, it, layout, entry); err != nil { + return fmt.Errorf("unable to write entry: %v", err) + } + } + + if _, err := w.Write(extraData); err != nil { + return fmt.Errorf("error writing extra data: %v", err) + } + + return w.Flush() +} + +func prepareExtraData(allBlocks []*Info, layout *indexLayout) []byte { + var extraData []byte + + for i, it := range allBlocks { + if i == 0 { + layout.keyLength = len(contentIDToBytes(it.BlockID)) + } + if it.PackFile != "" { + if _, ok := layout.packFileOffsets[it.PackFile]; !ok { + layout.packFileOffsets[it.PackFile] = uint32(len(extraData)) + extraData = append(extraData, []byte(it.PackFile)...) + } + } + if len(it.Payload) > 0 { + panic("storing payloads in indexes is not supported") + } + } + layout.extraDataOffset = uint32(8 + layout.entryCount*(layout.keyLength+layout.entryLength)) + return extraData +} + +func writeEntry(w io.Writer, it *Info, layout *indexLayout, entry []byte) error { + k := contentIDToBytes(it.BlockID) + if len(k) != layout.keyLength { + return fmt.Errorf("inconsistent key length: %v vs %v", len(k), layout.keyLength) + } + + if err := formatEntry(entry, it, layout); err != nil { + return fmt.Errorf("unable to format entry: %v", err) + } + + if _, err := w.Write(k); err != nil { + return fmt.Errorf("error writing entry key: %v", err) + } + if _, err := w.Write(entry); err != nil { + return fmt.Errorf("error writing entry: %v", err) + } + + return nil +} + +func formatEntry(entry []byte, it *Info, layout *indexLayout) error { + entryTimestampAndFlags := entry[0:8] + entryPackFileOffset := entry[8:12] + entryPackedOffset := entry[12:16] + entryPackedLength := entry[16:20] + timestampAndFlags := uint64(it.TimestampSeconds) << 16 + + if len(it.PackFile) == 0 { + return fmt.Errorf("empty pack block ID for %v", it.BlockID) + } + + binary.BigEndian.PutUint32(entryPackFileOffset, layout.extraDataOffset+layout.packFileOffsets[it.PackFile]) + if it.Deleted { + binary.BigEndian.PutUint32(entryPackedOffset, it.PackOffset|0x80000000) + } else { + binary.BigEndian.PutUint32(entryPackedOffset, it.PackOffset) + } + binary.BigEndian.PutUint32(entryPackedLength, it.Length) + timestampAndFlags |= uint64(it.FormatVersion) << 8 + timestampAndFlags |= uint64(len(it.PackFile)) + binary.BigEndian.PutUint64(entryTimestampAndFlags, timestampAndFlags) + return nil +} + +// NewBuilder creates a new Builder. +func NewBuilder() Builder { + return make(map[string]*Info) +} diff --git a/internal/packindex/content_id_to_bytes.go b/internal/packindex/content_id_to_bytes.go new file mode 100644 index 000000000..731f3b57d --- /dev/null +++ b/internal/packindex/content_id_to_bytes.go @@ -0,0 +1,38 @@ +package packindex + +import ( + "encoding/hex" +) + +func bytesToContentID(b []byte) string { + if len(b) == 0 { + return "" + } + if b[0] == 0xff { + return string(b[1:]) + } + prefix := "" + if b[0] != 0 { + prefix = string(b[0:1]) + } + + return prefix + hex.EncodeToString(b[1:]) +} + +func contentIDToBytes(c string) []byte { + var prefix []byte + var skip int + if len(c)%2 == 1 { + prefix = []byte(c[0:1]) + skip = 1 + } else { + prefix = []byte{0} + } + + b, err := hex.DecodeString(c[skip:]) + if err != nil { + return append([]byte{0xff}, []byte(c)...) + } + + return append(prefix, b...) +} diff --git a/internal/packindex/format.go b/internal/packindex/format.go new file mode 100644 index 000000000..5509556c9 --- /dev/null +++ b/internal/packindex/format.go @@ -0,0 +1,74 @@ +package packindex + +import ( + "encoding/binary" + "fmt" +) + +// Format describes a format of a single pack index. The actual structure is not used, +// it's purely for documentation purposes. +// The struct is byte-aligned. +type Format struct { + Version byte // format version number must be 0x01 + KeySize byte // size of each key in bytes + EntrySize uint16 // size of each entry in bytes, big-endian + EntryCount uint32 // number of sorted (key,value) entries that follow + + Entries []struct { + Key []byte // key bytes (KeySize) + Entry entry + } + + ExtraData []byte // extra data +} + +type entry struct { + // big endian: + // 48 most significant bits - 48-bit timestamp in seconds since 1970/01/01 UTC + // 8 bits - format version (currently == 1) + // 8 least significant bits - length of pack block ID + timestampAndFlags uint64 // + packFileOffset uint32 // 4 bytes, big endian, offset within index file where pack block ID begins + packedOffset uint32 // 4 bytes, big endian, offset within pack file where the contents begin + packedLength uint32 // 4 bytes, big endian, content length +} + +func (e *entry) parse(b []byte) error { + if len(b) < 20 { + return fmt.Errorf("invalid entry length: %v", len(b)) + } + + e.timestampAndFlags = binary.BigEndian.Uint64(b[0:8]) + e.packFileOffset = binary.BigEndian.Uint32(b[8:12]) + e.packedOffset = binary.BigEndian.Uint32(b[12:16]) + e.packedLength = binary.BigEndian.Uint32(b[16:20]) + return nil +} + +func (e *entry) IsDeleted() bool { + return e.packedOffset&0x80000000 != 0 +} + +func (e *entry) TimestampSeconds() int64 { + return int64(e.timestampAndFlags >> 16) +} + +func (e *entry) PackedFormatVersion() byte { + return byte(e.timestampAndFlags >> 8) +} + +func (e *entry) PackFileLength() byte { + return byte(e.timestampAndFlags) +} + +func (e *entry) PackFileOffset() uint32 { + return e.packFileOffset +} + +func (e *entry) PackedOffset() uint32 { + return e.packedOffset & 0x7fffffff +} + +func (e *entry) PackedLength() uint32 { + return e.packedLength +} diff --git a/internal/packindex/index.go b/internal/packindex/index.go new file mode 100644 index 000000000..1ea4c2cb2 --- /dev/null +++ b/internal/packindex/index.go @@ -0,0 +1,196 @@ +package packindex + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "sort" + "strings" +) + +// Index is a read-only index of packed blocks. +type Index interface { + io.Closer + + GetInfo(blockID string) (*Info, error) + Iterate(prefix string, cb func(Info) error) error +} + +type index struct { + hdr headerInfo + readerAt io.ReaderAt +} + +type headerInfo struct { + keySize int + valueSize int + entryCount int +} + +func readHeader(readerAt io.ReaderAt) (headerInfo, error) { + var header [8]byte + + if n, err := readerAt.ReadAt(header[:], 0); err != nil || n != 8 { + return headerInfo{}, fmt.Errorf("invalid header: %v", err) + } + + if header[0] != 1 { + return headerInfo{}, fmt.Errorf("invalid header format: %v", header[0]) + } + + hi := headerInfo{ + keySize: int(header[1]), + valueSize: int(binary.BigEndian.Uint16(header[2:4])), + entryCount: int(binary.BigEndian.Uint32(header[4:8])), + } + + if hi.keySize <= 1 || hi.valueSize < 0 || hi.entryCount < 0 { + return headerInfo{}, fmt.Errorf("invalid header") + } + + return hi, nil +} + +// Iterate invokes the provided callback function for all blocks in the index, sorted alphabetically. +// The iteration ends when the callback returns an error, which is propagated to the caller or when +// all blocks have been visited. +func (b *index) Iterate(prefix string, cb func(Info) error) error { + startPos, err := b.findEntryPosition(prefix) + if err != nil { + return fmt.Errorf("could not find starting position: %v", err) + } + stride := b.hdr.keySize + b.hdr.valueSize + entry := make([]byte, stride) + for i := startPos; i < b.hdr.entryCount; i++ { + n, err := b.readerAt.ReadAt(entry, int64(8+stride*i)) + if err != nil || n != len(entry) { + return fmt.Errorf("unable to read from index: %v", err) + } + + key := entry[0:b.hdr.keySize] + value := entry[b.hdr.keySize:] + + i, err := b.entryToInfo(bytesToContentID(key), value) + if err != nil { + return fmt.Errorf("invalid index data: %v", err) + } + if !strings.HasPrefix(i.BlockID, prefix) { + break + } + if err := cb(i); err != nil { + return err + } + } + return nil +} + +func (b *index) findEntryPosition(blockID string) (int, error) { + stride := b.hdr.keySize + b.hdr.valueSize + entryBuf := make([]byte, stride) + var readErr error + pos := sort.Search(b.hdr.entryCount, func(p int) bool { + if readErr != nil { + return false + } + _, err := b.readerAt.ReadAt(entryBuf, int64(8+stride*p)) + if err != nil { + readErr = err + return false + } + + return bytesToContentID(entryBuf[0:b.hdr.keySize]) >= blockID + }) + + return pos, readErr +} + +func (b *index) findEntry(blockID string) ([]byte, error) { + key := contentIDToBytes(blockID) + if len(key) != b.hdr.keySize { + return nil, fmt.Errorf("invalid block ID: %q", blockID) + } + stride := b.hdr.keySize + b.hdr.valueSize + + position, err := b.findEntryPosition(blockID) + if err != nil { + return nil, err + } + if position >= b.hdr.entryCount { + return nil, nil + } + + entryBuf := make([]byte, stride) + if _, err := b.readerAt.ReadAt(entryBuf, int64(8+stride*position)); err != nil { + return nil, err + } + + if bytes.Equal(entryBuf[0:len(key)], key) { + return entryBuf[len(key):], nil + } + + return nil, nil +} + +// GetInfo returns information about a given block. If a block is not found, nil is returned. +func (b *index) GetInfo(blockID string) (*Info, error) { + e, err := b.findEntry(blockID) + if err != nil { + return nil, err + } + + if e == nil { + return nil, nil + } + + i, err := b.entryToInfo(blockID, e) + if err != nil { + return nil, err + } + return &i, err +} + +func (b *index) entryToInfo(blockID string, entryData []byte) (Info, error) { + if len(entryData) < 20 { + return Info{}, fmt.Errorf("invalid entry length: %v", len(entryData)) + } + + var e entry + if err := e.parse(entryData); err != nil { + return Info{}, err + } + + packFile := make([]byte, e.PackFileLength()) + n, err := b.readerAt.ReadAt(packFile, int64(e.PackFileOffset())) + if err != nil || n != int(e.PackFileLength()) { + return Info{}, fmt.Errorf("can't read pack block ID: %v", err) + } + + return Info{ + BlockID: blockID, + Deleted: e.IsDeleted(), + TimestampSeconds: e.TimestampSeconds(), + FormatVersion: e.PackedFormatVersion(), + PackOffset: e.PackedOffset(), + Length: e.PackedLength(), + PackFile: string(packFile), + }, nil +} + +// Close closes the index and the underlying reader. +func (b *index) Close() error { + if closer, ok := b.readerAt.(io.Closer); ok { + return closer.Close() + } + + return nil +} + +// Open reads an Index from a given reader. The caller must call Close() when the index is no longer used. +func Open(readerAt io.ReaderAt) (Index, error) { + h, err := readHeader(readerAt) + if err != nil { + return nil, fmt.Errorf("invalid header: %v", err) + } + return &index{hdr: h, readerAt: readerAt}, nil +} diff --git a/internal/packindex/info.go b/internal/packindex/info.go new file mode 100644 index 000000000..a37a2ba92 --- /dev/null +++ b/internal/packindex/info.go @@ -0,0 +1,22 @@ +package packindex + +import ( + "time" +) + +// Info is an information about a single block managed by Manager. +type Info struct { + BlockID string `json:"blockID"` + Length uint32 `json:"length"` + TimestampSeconds int64 `json:"time"` + PackFile string `json:"packFile,omitempty"` + PackOffset uint32 `json:"packOffset,omitempty"` + Deleted bool `json:"deleted"` + Payload []byte `json:"payload"` // set for payloads stored inline + FormatVersion byte `json:"formatVersion"` +} + +// Timestamp returns the time when a block was created or deleted. +func (i Info) Timestamp() time.Time { + return time.Unix(i.TimestampSeconds, 0) +} diff --git a/internal/packindex/merged.go b/internal/packindex/merged.go new file mode 100644 index 000000000..76e637972 --- /dev/null +++ b/internal/packindex/merged.go @@ -0,0 +1,132 @@ +package packindex + +import ( + "container/heap" + "errors" +) + +// Merged is an implementation of Index that transparently merges retuns from underlying Indexes. +type Merged []Index + +// Close closes all underlying indexes. +func (m Merged) Close() error { + for _, ndx := range m { + if err := ndx.Close(); err != nil { + return err + } + } + + return nil +} + +// GetInfo returns information about a single block. If a block is not found, returns (nil,nil) +func (m Merged) GetInfo(contentID string) (*Info, error) { + var best *Info + for _, ndx := range m { + i, err := ndx.GetInfo(contentID) + if err != nil { + return nil, err + } + if i != nil { + if best == nil || i.TimestampSeconds > best.TimestampSeconds || (i.TimestampSeconds == best.TimestampSeconds && !i.Deleted) { + best = i + } + } + } + return best, nil +} + +type nextInfo struct { + it Info + ch <-chan Info +} + +type nextInfoHeap []*nextInfo + +func (h nextInfoHeap) Len() int { return len(h) } +func (h nextInfoHeap) Less(i, j int) bool { + if a, b := h[i].it.BlockID, h[j].it.BlockID; a != b { + return a < b + } + + if a, b := h[i].it.TimestampSeconds, h[j].it.TimestampSeconds; a != b { + return a < b + } + + return !h[i].it.Deleted +} + +func (h nextInfoHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } +func (h *nextInfoHeap) Push(x interface{}) { + *h = append(*h, x.(*nextInfo)) +} +func (h *nextInfoHeap) Pop() interface{} { + old := *h + n := len(old) + x := old[n-1] + *h = old[0 : n-1] + return x +} + +func iterateChan(prefix string, ndx Index, done chan bool) <-chan Info { + ch := make(chan Info) + go func() { + defer close(ch) + + _ = ndx.Iterate(prefix, func(i Info) error { + select { + case <-done: + return errors.New("end of iteration") + case ch <- i: + return nil + } + }) + }() + return ch +} + +// Iterate invokes the provided callback for all unique block IDs in the underlying sources until either +// all blocks have been visited or until an error is returned by the callback. +func (m Merged) Iterate(prefix string, cb func(i Info) error) error { + var minHeap nextInfoHeap + done := make(chan bool) + defer close(done) + + for _, ndx := range m { + ch := iterateChan(prefix, ndx, done) + it, ok := <-ch + if ok { + heap.Push(&minHeap, &nextInfo{it, ch}) + } + } + + var pendingItem Info + + for len(minHeap) > 0 { + min := heap.Pop(&minHeap).(*nextInfo) + if pendingItem.BlockID != min.it.BlockID { + if pendingItem.BlockID != "" { + if err := cb(pendingItem); err != nil { + return err + } + } + + pendingItem = min.it + } else if min.it.TimestampSeconds > pendingItem.TimestampSeconds { + pendingItem = min.it + } + + it, ok := <-min.ch + if ok { + heap.Push(&minHeap, &nextInfo{it, min.ch}) + } + } + + if pendingItem.BlockID != "" { + return cb(pendingItem) + } + + return nil +} + +var _ Index = (*Merged)(nil) diff --git a/internal/packindex/merged_test.go b/internal/packindex/merged_test.go new file mode 100644 index 000000000..257efb8f5 --- /dev/null +++ b/internal/packindex/merged_test.go @@ -0,0 +1,94 @@ +package packindex_test + +import ( + "bytes" + "fmt" + "reflect" + "testing" + + "github.com/kopia/repo/internal/packindex" +) + +func TestMerged(t *testing.T) { + i1, err := indexWithItems( + packindex.Info{BlockID: "aabbcc", TimestampSeconds: 1, PackFile: "xx", PackOffset: 11}, + packindex.Info{BlockID: "ddeeff", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, + packindex.Info{BlockID: "z010203", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, + packindex.Info{BlockID: "de1e1e", TimestampSeconds: 4, PackFile: "xx", PackOffset: 111}, + ) + if err != nil { + t.Fatalf("can't create index: %v", err) + } + i2, err := indexWithItems( + packindex.Info{BlockID: "aabbcc", TimestampSeconds: 3, PackFile: "yy", PackOffset: 33}, + packindex.Info{BlockID: "xaabbcc", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, + packindex.Info{BlockID: "de1e1e", TimestampSeconds: 4, PackFile: "xx", PackOffset: 222, Deleted: true}, + ) + if err != nil { + t.Fatalf("can't create index: %v", err) + } + i3, err := indexWithItems( + packindex.Info{BlockID: "aabbcc", TimestampSeconds: 2, PackFile: "zz", PackOffset: 22}, + packindex.Info{BlockID: "ddeeff", TimestampSeconds: 1, PackFile: "zz", PackOffset: 222}, + packindex.Info{BlockID: "k010203", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, + packindex.Info{BlockID: "k020304", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, + ) + if err != nil { + t.Fatalf("can't create index: %v", err) + } + + m := packindex.Merged{i1, i2, i3} + i, err := m.GetInfo("aabbcc") + if err != nil || i == nil { + t.Fatalf("unable to get info: %v", err) + } + if got, want := i.PackOffset, uint32(33); got != want { + t.Errorf("invalid pack offset %v, wanted %v", got, want) + } + + var inOrder []string + m.Iterate("", func(i packindex.Info) error { + inOrder = append(inOrder, i.BlockID) + if i.BlockID == "de1e1e" { + if i.Deleted { + t.Errorf("iteration preferred deleted block over non-deleted") + } + } + return nil + }) + + if i, err := m.GetInfo("de1e1e"); err != nil { + t.Errorf("error getting deleted block info: %v", err) + } else if i.Deleted { + t.Errorf("GetInfo preferred deleted block over non-deleted") + } + + expectedInOrder := []string{ + "aabbcc", + "ddeeff", + "de1e1e", + "k010203", + "k020304", + "xaabbcc", + "z010203", + } + if !reflect.DeepEqual(inOrder, expectedInOrder) { + t.Errorf("unexpected items in order: %v, wanted %v", inOrder, expectedInOrder) + } + + if err := m.Close(); err != nil { + t.Errorf("unexpected error in Close(): %v", err) + } +} + +func indexWithItems(items ...packindex.Info) (packindex.Index, error) { + b := packindex.NewBuilder() + for _, it := range items { + b.Add(it) + } + var buf bytes.Buffer + if err := b.Build(&buf); err != nil { + return nil, fmt.Errorf("build error: %v", err) + } + return packindex.Open(bytes.NewReader(buf.Bytes())) +} diff --git a/internal/packindex/packindex_internal_test.go b/internal/packindex/packindex_internal_test.go new file mode 100644 index 000000000..a80661e82 --- /dev/null +++ b/internal/packindex/packindex_internal_test.go @@ -0,0 +1,26 @@ +package packindex + +import "testing" + +func TestRoundTrip(t *testing.T) { + cases := []string{ + "", + "x", + "aa", + "xaa", + "xaaa", + "a1x", + } + + for _, tc := range cases { + b := contentIDToBytes(tc) + got := bytesToContentID(b) + if got != tc { + t.Errorf("%q did not round trip, got %q, wanted %q", tc, got, tc) + } + } + + if got, want := bytesToContentID(nil), ""; got != want { + t.Errorf("unexpected content id %v, want %v", got, want) + } +} diff --git a/internal/packindex/packindex_test.go b/internal/packindex/packindex_test.go new file mode 100644 index 000000000..50b8644aa --- /dev/null +++ b/internal/packindex/packindex_test.go @@ -0,0 +1,237 @@ +package packindex_test + +import ( + "bytes" + "crypto/sha1" + "encoding/hex" + "fmt" + "math/rand" + "reflect" + "strings" + "testing" + + "github.com/kopia/repo/internal/packindex" +) + +func TestPackIndex(t *testing.T) { + blockNumber := 0 + + deterministicBlockID := func(prefix string, id int) string { + h := sha1.New() + fmt.Fprintf(h, "%v%v", prefix, id) + blockNumber++ + + prefix2 := "" + if id%2 == 0 { + prefix2 = "x" + } + if id%7 == 0 { + prefix2 = "y" + } + if id%5 == 0 { + prefix2 = "m" + } + return string(fmt.Sprintf("%v%x", prefix2, h.Sum(nil))) + } + deterministicPackFile := func(id int) string { + h := sha1.New() + fmt.Fprintf(h, "%v", id) + blockNumber++ + return string(fmt.Sprintf("%x", h.Sum(nil))) + } + + deterministicPackedOffset := func(id int) uint32 { + s := rand.NewSource(int64(id + 1)) + rnd := rand.New(s) + return uint32(rnd.Int31()) + } + deterministicPackedLength := func(id int) uint32 { + s := rand.NewSource(int64(id + 2)) + rnd := rand.New(s) + return uint32(rnd.Int31()) + } + deterministicFormatVersion := func(id int) byte { + return byte(id % 100) + } + + randomUnixTime := func() int64 { + return int64(rand.Int31()) + } + + var infos []packindex.Info + + // deleted blocks with all information + for i := 0; i < 100; i++ { + infos = append(infos, packindex.Info{ + TimestampSeconds: randomUnixTime(), + Deleted: true, + BlockID: deterministicBlockID("deleted-packed", i), + PackFile: deterministicPackFile(i), + PackOffset: deterministicPackedOffset(i), + Length: deterministicPackedLength(i), + FormatVersion: deterministicFormatVersion(i), + }) + } + // non-deleted block + for i := 0; i < 100; i++ { + infos = append(infos, packindex.Info{ + TimestampSeconds: randomUnixTime(), + BlockID: deterministicBlockID("packed", i), + PackFile: deterministicPackFile(i), + PackOffset: deterministicPackedOffset(i), + Length: deterministicPackedLength(i), + FormatVersion: deterministicFormatVersion(i), + }) + } + + infoMap := map[string]packindex.Info{} + b1 := packindex.NewBuilder() + b2 := packindex.NewBuilder() + b3 := packindex.NewBuilder() + + for _, info := range infos { + infoMap[info.BlockID] = info + b1.Add(info) + b2.Add(info) + b3.Add(info) + } + + var buf1 bytes.Buffer + var buf2 bytes.Buffer + var buf3 bytes.Buffer + if err := b1.Build(&buf1); err != nil { + t.Errorf("unable to build: %v", err) + } + if err := b1.Build(&buf2); err != nil { + t.Errorf("unable to build: %v", err) + } + if err := b1.Build(&buf3); err != nil { + t.Errorf("unable to build: %v", err) + } + data1 := buf1.Bytes() + data2 := buf2.Bytes() + data3 := buf3.Bytes() + + if !reflect.DeepEqual(data1, data2) { + t.Errorf("builder output not stable: %x vs %x", hex.Dump(data1), hex.Dump(data2)) + } + if !reflect.DeepEqual(data2, data3) { + t.Errorf("builder output not stable: %x vs %x", hex.Dump(data2), hex.Dump(data3)) + } + + t.Run("FuzzTest", func(t *testing.T) { + fuzzTestIndexOpen(t, data1) + }) + + ndx, err := packindex.Open(bytes.NewReader(data1)) + if err != nil { + t.Fatalf("can't open index: %v", err) + } + defer ndx.Close() + + for _, info := range infos { + info2, err := ndx.GetInfo(info.BlockID) + if err != nil { + t.Errorf("unable to find %v", info.BlockID) + continue + } + if !reflect.DeepEqual(info, *info2) { + t.Errorf("invalid value retrieved: %+v, wanted %+v", info2, info) + } + } + + cnt := 0 + ndx.Iterate("", func(info2 packindex.Info) error { + info := infoMap[info2.BlockID] + if !reflect.DeepEqual(info, info2) { + t.Errorf("invalid value retrieved: %+v, wanted %+v", info2, info) + } + cnt++ + return nil + }) + if cnt != len(infoMap) { + t.Errorf("invalid number of iterations: %v, wanted %v", cnt, len(infoMap)) + } + + prefixes := []string{"a", "b", "f", "0", "3", "aa", "aaa", "aab", "fff", "m", "x", "y", "m0", "ma"} + + for i := 0; i < 100; i++ { + blockID := deterministicBlockID("no-such-block", i) + v, err := ndx.GetInfo(blockID) + if err != nil { + t.Errorf("unable to get block %v: %v", blockID, err) + } + if v != nil { + t.Errorf("unexpected result when getting block %v: %v", blockID, v) + } + } + + for _, prefix := range prefixes { + cnt2 := 0 + ndx.Iterate(string(prefix), func(info2 packindex.Info) error { + cnt2++ + if !strings.HasPrefix(string(info2.BlockID), string(prefix)) { + t.Errorf("unexpected item %v when iterating prefix %v", info2.BlockID, prefix) + } + return nil + }) + t.Logf("found %v elements with prefix %q", cnt2, prefix) + } +} + +func fuzzTestIndexOpen(t *testing.T, originalData []byte) { + // use consistent random + rnd := rand.New(rand.NewSource(12345)) + + fuzzTest(rnd, originalData, 50000, func(d []byte) { + ndx, err := packindex.Open(bytes.NewReader(d)) + if err != nil { + return + } + defer ndx.Close() + cnt := 0 + ndx.Iterate("", func(cb packindex.Info) error { + if cnt < 10 { + ndx.GetInfo(cb.BlockID) + } + cnt++ + return nil + }) + }) +} + +func fuzzTest(rnd *rand.Rand, originalData []byte, rounds int, callback func(d []byte)) { + for round := 0; round < rounds; round++ { + data := append([]byte(nil), originalData...) + + // mutate small number of bytes + bytesToMutate := rnd.Intn(3) + for i := 0; i < bytesToMutate; i++ { + pos := rnd.Intn(len(data)) + data[pos] = byte(rnd.Int()) + } + + sectionsToInsert := rnd.Intn(3) + for i := 0; i < sectionsToInsert; i++ { + pos := rnd.Intn(len(data)) + insertedLength := rnd.Intn(20) + insertedData := make([]byte, insertedLength) + rnd.Read(insertedData) + + data = append(append(append([]byte(nil), data[0:pos]...), insertedData...), data[pos:]...) + } + + sectionsToDelete := rnd.Intn(3) + for i := 0; i < sectionsToDelete; i++ { + pos := rnd.Intn(len(data)) + deletedLength := rnd.Intn(10) + if pos+deletedLength > len(data) { + continue + } + + data = append(append([]byte(nil), data[0:pos]...), data[pos+deletedLength:]...) + } + + callback(data) + } +} diff --git a/internal/packindex/subset.go b/internal/packindex/subset.go new file mode 100644 index 000000000..5c6e55299 --- /dev/null +++ b/internal/packindex/subset.go @@ -0,0 +1,28 @@ +package packindex + +// IsSubset returns true if all entries in index 'a' are contained in index 'b'. +func IsSubset(a, b Index) bool { + done := make(chan bool) + defer close(done) + + ach := iterateChan("", a, done) + bch := iterateChan("", b, done) + + for ait := range ach { + bit, ok := <-bch + if !ok { + return false + } + for bit.BlockID < ait.BlockID { + bit, ok = <-bch + if !ok { + return false + } + } + + if bit.BlockID != ait.BlockID { + return false + } + } + return true +} diff --git a/internal/packindex/subset_test.go b/internal/packindex/subset_test.go new file mode 100644 index 000000000..aaad99295 --- /dev/null +++ b/internal/packindex/subset_test.go @@ -0,0 +1,60 @@ +package packindex_test + +import ( + "bytes" + "fmt" + "testing" + + "github.com/kopia/repo/internal/packindex" +) + +func TestSubset(t *testing.T) { + cases := []struct { + aBlocks, bBlocks []string + want bool + }{ + {[]string{}, []string{"aa"}, true}, + {[]string{}, []string{"aa", "bb"}, true}, + {[]string{"aa"}, []string{"aa"}, true}, + {[]string{"aa"}, []string{"bb"}, false}, + {[]string{"aa"}, []string{"aa", "bb"}, true}, + {[]string{"aa"}, []string{"aa", "bb", "cc"}, true}, + {[]string{"aa", "bb"}, []string{"bb", "cc"}, false}, + {[]string{"aa", "bb"}, []string{"aa"}, false}, + {[]string{"aa", "bb"}, []string{}, false}, + {[]string{"aa", "bb", "cc", "dd", "ee", "ff"}, []string{"aa", "bb", "cc", "dd", "ee", "ff"}, true}, + {[]string{"aa", "bb", "cc", "dd", "ee", "ff"}, []string{"aa", "bb", "cc", "dd", "ef", "ff"}, false}, + {[]string{"aa", "bb", "cc", "dd", "ee", "ff"}, []string{"aa", "bb", "cc", "dd", "ee", "ef", "ff"}, true}, + } + + for _, tc := range cases { + a, err := indexWithBlockIDs(tc.aBlocks) + if err != nil { + t.Fatalf("error building index: %v", err) + } + b, err := indexWithBlockIDs(tc.bBlocks) + if err != nil { + t.Fatalf("error building index: %v", err) + } + + if got, want := packindex.IsSubset(a, b), tc.want; got != want { + t.Errorf("invalid value of IsSubset(%v,%v): %v, wanted %v", tc.aBlocks, tc.bBlocks, got, want) + } + } +} +func indexWithBlockIDs(items []string) (packindex.Index, error) { + b := packindex.NewBuilder() + for _, it := range items { + b.Add(packindex.Info{ + BlockID: it, + PackFile: "x", + PackOffset: 1, + Length: 1, + }) + } + var buf bytes.Buffer + if err := b.Build(&buf); err != nil { + return nil, fmt.Errorf("build error: %v", err) + } + return packindex.Open(bytes.NewReader(buf.Bytes())) +} diff --git a/internal/repologging/logging.go b/internal/repologging/logging.go new file mode 100644 index 000000000..d72eb0110 --- /dev/null +++ b/internal/repologging/logging.go @@ -0,0 +1,9 @@ +// Package repologging provides loggers. +package repologging + +import "github.com/op/go-logging" + +// Logger returns an instance of a logger used throughout repository codebase. +func Logger(module string) *logging.Logger { + return logging.MustGetLogger(module) +} diff --git a/internal/repotesting/repotesting.go b/internal/repotesting/repotesting.go new file mode 100644 index 000000000..cfb1d879b --- /dev/null +++ b/internal/repotesting/repotesting.go @@ -0,0 +1,131 @@ +// Package repotesting contains test utilities for working with repositories. +package repotesting + +import ( + "context" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/kopia/repo/object" + + "github.com/kopia/repo/block" + + "github.com/kopia/repo" + "github.com/kopia/repo/storage" + "github.com/kopia/repo/storage/filesystem" +) + +const masterPassword = "foobarbazfoobarbaz" + +// Environment encapsulates details of a test environment. +type Environment struct { + Repository *repo.Repository + + configDir string + storageDir string +} + +// Setup sets up a test environment. +func (e *Environment) Setup(t *testing.T, opts ...func(*repo.NewRepositoryOptions)) *Environment { + var err error + + ctx := context.Background() + + e.configDir, err = ioutil.TempDir("", "") + if err != nil { + t.Fatalf("err: %v", err) + } + e.storageDir, err = ioutil.TempDir("", "") + if err != nil { + t.Fatalf("err: %v", err) + } + + opt := &repo.NewRepositoryOptions{ + BlockFormat: block.FormattingOptions{ + HMACSecret: []byte{}, + BlockFormat: "UNENCRYPTED_HMAC_SHA256", + }, + ObjectFormat: object.Format{ + Splitter: "FIXED", + MaxBlockSize: 400, + }, + MetadataEncryptionAlgorithm: "NONE", + } + + for _, mod := range opts { + mod(opt) + } + + st, err := filesystem.New(ctx, &filesystem.Options{ + Path: e.storageDir, + }) + if err != nil { + t.Fatalf("err: %v", err) + } + + if err = repo.Initialize(ctx, st, opt, masterPassword); err != nil { + t.Fatalf("err: %v", err) + } + + connOpts := repo.ConnectOptions{ + //TraceStorage: log.Printf, + } + + if err = repo.Connect(ctx, e.configFile(), st, masterPassword, connOpts); err != nil { + t.Fatalf("can't connect: %v", err) + } + + e.Repository, err = repo.Open(ctx, e.configFile(), masterPassword, &repo.Options{}) + if err != nil { + t.Fatalf("can't open: %v", err) + } + + return e +} + +// Close closes testing environment +func (e *Environment) Close(t *testing.T) { + if err := e.Repository.Close(context.Background()); err != nil { + t.Fatalf("unable to close: %v", err) + } + + if err := os.RemoveAll(e.configDir); err != nil { + t.Errorf("error removing config directory: %v", err) + } + if err := os.RemoveAll(e.storageDir); err != nil { + t.Errorf("error removing storage directory: %v", err) + } +} + +func (e *Environment) configFile() string { + return filepath.Join(e.configDir, "kopia.config") +} + +// MustReopen closes and reopens the repository. +func (e *Environment) MustReopen(t *testing.T) { + err := e.Repository.Close(context.Background()) + if err != nil { + t.Fatalf("close error: %v", err) + } + + e.Repository, err = repo.Open(context.Background(), e.configFile(), masterPassword, &repo.Options{}) + if err != nil { + t.Fatalf("err: %v", err) + } +} + +// VerifyStorageBlockCount verifies that the underlying storage contains the specified number of blocks. +func (e *Environment) VerifyStorageBlockCount(t *testing.T, want int) { + var got int + + _ = e.Repository.Storage.ListBlocks(context.Background(), "", func(_ storage.BlockMetadata) error { + got++ + return nil + }) + + if got != want { + t.Errorf("got unexpected number of storage blocks: %v, wanted %v", got, want) + } +} diff --git a/internal/retry/retry.go b/internal/retry/retry.go new file mode 100644 index 000000000..3bcb53b5c --- /dev/null +++ b/internal/retry/retry.go @@ -0,0 +1,44 @@ +// Package retry implements exponential retry policy. +package retry + +import ( + "fmt" + "time" + + "github.com/kopia/repo/internal/repologging" +) + +var log = repologging.Logger("repo/retry") + +const ( + maxAttempts = 10 + retryInitialSleepAmount = 1 * time.Second + retryMaxSleepAmount = 32 * time.Second +) + +// AttemptFunc performs an attempt and returns a value (optional, may be nil) and an error. +type AttemptFunc func() (interface{}, error) + +// IsRetriableFunc is a function that determines whether an error is retriable. +type IsRetriableFunc func(err error) bool + +// WithExponentialBackoff runs the provided attempt until it succeeds, retrying on all errors that are +// deemed retriable by the provided function. The delay between retries grows exponentially up to +// a certain limit. +func WithExponentialBackoff(desc string, attempt AttemptFunc, isRetriableError IsRetriableFunc) (interface{}, error) { + sleepAmount := retryInitialSleepAmount + for i := 0; i < maxAttempts; i++ { + v, err := attempt() + if !isRetriableError(err) { + return v, err + } + log.Debugf("got error %v when %v (#%v), sleeping for %v before retrying", err, desc, i, sleepAmount) + time.Sleep(sleepAmount) + sleepAmount *= 2 + if sleepAmount > retryMaxSleepAmount { + sleepAmount = retryMaxSleepAmount + } + } + + return nil, fmt.Errorf("unable to complete %v despite %v retries", desc, maxAttempts) +} diff --git a/internal/storagetesting/asserts.go b/internal/storagetesting/asserts.go new file mode 100644 index 000000000..03e797c01 --- /dev/null +++ b/internal/storagetesting/asserts.go @@ -0,0 +1,84 @@ +package storagetesting + +import ( + "bytes" + "context" + "fmt" + "path/filepath" + "reflect" + "runtime" + "testing" + + "github.com/kopia/repo/storage" +) + +// AssertGetBlock asserts that the specified storage block has correct content. +func AssertGetBlock(ctx context.Context, t *testing.T, s storage.Storage, block string, expected []byte) { + b, err := s.GetBlock(ctx, block, 0, -1) + if err != nil { + t.Errorf(errorPrefix()+"GetBlock(%v) returned error %v, expected data: %v", block, err, expected) + return + } + + if !bytes.Equal(b, expected) { + t.Errorf(errorPrefix()+"GetBlock(%v) returned %x, but expected %x", block, b, expected) + } + + half := int64(len(expected) / 2) + if half == 0 { + return + } + + b, err = s.GetBlock(ctx, block, 0, half) + if err != nil { + t.Errorf(errorPrefix()+"GetBlock(%v) returned error %v, expected data: %v", block, err, expected) + return + } + + if !bytes.Equal(b, expected[0:half]) { + t.Errorf(errorPrefix()+"GetBlock(%v) returned %x, but expected %x", block, b, expected[0:half]) + } + + b, err = s.GetBlock(ctx, block, half, int64(len(expected))-half) + if err != nil { + t.Errorf(errorPrefix()+"GetBlock(%v) returned error %v, expected data: %v", block, err, expected) + return + } + + if !bytes.Equal(b, expected[len(expected)-int(half):]) { + t.Errorf(errorPrefix()+"GetBlock(%v) returned %x, but expected %x", block, b, expected[len(expected)-int(half):]) + } + +} + +// AssertGetBlockNotFound asserts that GetBlock() for specified storage block returns ErrBlockNotFound. +func AssertGetBlockNotFound(ctx context.Context, t *testing.T, s storage.Storage, block string) { + b, err := s.GetBlock(ctx, block, 0, -1) + if err != storage.ErrBlockNotFound || b != nil { + t.Errorf(errorPrefix()+"GetBlock(%v) returned %v, %v but expected ErrBlockNotFound", block, b, err) + } +} + +// AssertListResults asserts that the list results with given prefix return the specified list of names in order. +func AssertListResults(ctx context.Context, t *testing.T, s storage.Storage, prefix string, expected ...string) { + var names []string + + if err := s.ListBlocks(ctx, prefix, func(e storage.BlockMetadata) error { + names = append(names, e.BlockID) + return nil + }); err != nil { + t.Fatalf("err: %v", err) + } + + if !reflect.DeepEqual(names, expected) { + t.Errorf(errorPrefix()+"ListBlocks(%v) returned %v, but expected %v", prefix, names, expected) + } +} + +func errorPrefix() string { + if _, fn, line, ok := runtime.Caller(2); ok { + return fmt.Sprintf("called from %v:%v: ", filepath.Base(fn), line) + } + + return "" +} diff --git a/internal/storagetesting/doc.go b/internal/storagetesting/doc.go new file mode 100644 index 000000000..0c2582e88 --- /dev/null +++ b/internal/storagetesting/doc.go @@ -0,0 +1,2 @@ +// Package storagetesting is used for testing Storage implementations. +package storagetesting diff --git a/internal/storagetesting/faulty.go b/internal/storagetesting/faulty.go new file mode 100644 index 000000000..aa5970bc7 --- /dev/null +++ b/internal/storagetesting/faulty.go @@ -0,0 +1,115 @@ +package storagetesting + +import ( + "context" + "sync" + "time" + + "github.com/kopia/repo/internal/repologging" + "github.com/kopia/repo/storage" +) + +var log = repologging.Logger("faulty-storage") + +// Fault describes the behavior of a single fault. +type Fault struct { + Repeat int // how many times to repeat this fault + Sleep time.Duration // sleep before returning + ErrCallback func() error + WaitFor chan struct{} // waits until the given channel is closed before returning + Err error // error to return (can be nil in combination with Sleep and WaitFor) +} + +// FaultyStorage implements fault injection for Storage. +type FaultyStorage struct { + Base storage.Storage + Faults map[string][]*Fault + + mu sync.Mutex +} + +// GetBlock implements storage.Storage +func (s *FaultyStorage) GetBlock(ctx context.Context, id string, offset, length int64) ([]byte, error) { + if err := s.getNextFault("GetBlock", id, offset, length); err != nil { + return nil, err + } + return s.Base.GetBlock(ctx, id, offset, length) +} + +// PutBlock implements storage.Storage +func (s *FaultyStorage) PutBlock(ctx context.Context, id string, data []byte) error { + if err := s.getNextFault("PutBlock", id, len(data)); err != nil { + return err + } + return s.Base.PutBlock(ctx, id, data) +} + +// DeleteBlock implements storage.Storage +func (s *FaultyStorage) DeleteBlock(ctx context.Context, id string) error { + if err := s.getNextFault("DeleteBlock", id); err != nil { + return err + } + return s.Base.DeleteBlock(ctx, id) +} + +// ListBlocks implements storage.Storage +func (s *FaultyStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error { + if err := s.getNextFault("ListBlocks", prefix); err != nil { + return err + } + + return s.Base.ListBlocks(ctx, prefix, func(bm storage.BlockMetadata) error { + if err := s.getNextFault("ListBlocksItem", prefix); err != nil { + return err + } + return callback(bm) + }) +} + +// Close implements storage.Storage +func (s *FaultyStorage) Close(ctx context.Context) error { + if err := s.getNextFault("Close"); err != nil { + return err + } + return s.Base.Close(ctx) +} + +// ConnectionInfo implements storage.Storage +func (s *FaultyStorage) ConnectionInfo() storage.ConnectionInfo { + return s.Base.ConnectionInfo() +} + +func (s *FaultyStorage) getNextFault(method string, args ...interface{}) error { + s.mu.Lock() + faults := s.Faults[method] + if len(faults) == 0 { + s.mu.Unlock() + log.Debugf("no faults for %v %v", method, args) + return nil + } + + f := faults[0] + if f.Repeat > 0 { + f.Repeat-- + log.Debugf("will repeat %v more times the fault for %v %v", f.Repeat, method, args) + } else { + s.Faults[method] = faults[1:] + } + s.mu.Unlock() + if f.WaitFor != nil { + log.Debugf("waiting for channel to be closed in %v %v", method, args) + <-f.WaitFor + } + if f.Sleep > 0 { + log.Debugf("sleeping for %v in %v %v", f.Sleep, method, args) + } + if f.ErrCallback != nil { + err := f.ErrCallback() + log.Debugf("returning %v for %v %v", err, method, args) + return err + } + log.Debugf("returning %v for %v %v", f.Err, method, args) + return f.Err +} + +var _ storage.Storage = (*FaultyStorage)(nil) diff --git a/internal/storagetesting/map.go b/internal/storagetesting/map.go new file mode 100644 index 000000000..438e1dc77 --- /dev/null +++ b/internal/storagetesting/map.go @@ -0,0 +1,118 @@ +package storagetesting + +import ( + "context" + "errors" + "sort" + "strings" + "sync" + "time" + + "github.com/kopia/repo/storage" +) + +type mapStorage struct { + data map[string][]byte + keyTime map[string]time.Time + timeNow func() time.Time + mutex sync.RWMutex +} + +func (s *mapStorage) GetBlock(ctx context.Context, id string, offset, length int64) ([]byte, error) { + s.mutex.RLock() + defer s.mutex.RUnlock() + + data, ok := s.data[id] + if ok { + data = append([]byte(nil), data...) + if length < 0 { + return data, nil + } + + if int(offset) > len(data) || offset < 0 { + return nil, errors.New("invalid offset") + } + + data = data[offset:] + if int(length) > len(data) { + return data, nil + } + return data[0:length], nil + } + + return nil, storage.ErrBlockNotFound +} + +func (s *mapStorage) PutBlock(ctx context.Context, id string, data []byte) error { + s.mutex.Lock() + defer s.mutex.Unlock() + + if _, ok := s.data[id]; ok { + return nil + } + + s.keyTime[id] = s.timeNow() + s.data[id] = append([]byte{}, data...) + return nil +} + +func (s *mapStorage) DeleteBlock(ctx context.Context, id string) error { + s.mutex.Lock() + defer s.mutex.Unlock() + + delete(s.data, id) + return nil +} + +func (s *mapStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error { + s.mutex.RLock() + + keys := []string{} + for k := range s.data { + if strings.HasPrefix(k, prefix) { + keys = append(keys, k) + } + } + s.mutex.RUnlock() + + sort.Strings(keys) + + for _, k := range keys { + s.mutex.RLock() + v, ok := s.data[k] + ts := s.keyTime[k] + s.mutex.RUnlock() + if !ok { + continue + } + if err := callback(storage.BlockMetadata{ + BlockID: k, + Length: int64(len(v)), + Timestamp: ts, + }); err != nil { + return err + } + } + return nil +} + +func (s *mapStorage) Close(ctx context.Context) error { + return nil +} + +func (s *mapStorage) ConnectionInfo() storage.ConnectionInfo { + // unsupported + return storage.ConnectionInfo{} +} + +// NewMapStorage returns an implementation of Storage backed by the contents of given map. +// Used primarily for testing. +func NewMapStorage(data map[string][]byte, keyTime map[string]time.Time, timeNow func() time.Time) storage.Storage { + if keyTime == nil { + keyTime = make(map[string]time.Time) + } + if timeNow == nil { + timeNow = time.Now + } + return &mapStorage{data: data, keyTime: keyTime, timeNow: timeNow} +} diff --git a/internal/storagetesting/map_test.go b/internal/storagetesting/map_test.go new file mode 100644 index 000000000..238276cbd --- /dev/null +++ b/internal/storagetesting/map_test.go @@ -0,0 +1,15 @@ +package storagetesting + +import ( + "context" + "testing" +) + +func TestMapStorage(t *testing.T) { + data := map[string][]byte{} + r := NewMapStorage(data, nil, nil) + if r == nil { + t.Errorf("unexpected result: %v", r) + } + VerifyStorage(context.Background(), t, r) +} diff --git a/internal/storagetesting/verify.go b/internal/storagetesting/verify.go new file mode 100644 index 000000000..7640c18c0 --- /dev/null +++ b/internal/storagetesting/verify.go @@ -0,0 +1,58 @@ +package storagetesting + +import ( + "bytes" + "context" + "reflect" + "testing" + + "github.com/kopia/repo/storage" +) + +// VerifyStorage verifies the behavior of the specified storage. +func VerifyStorage(ctx context.Context, t *testing.T, r storage.Storage) { + blocks := []struct { + blk string + contents []byte + }{ + {blk: string("abcdbbf4f0507d054ed5a80a5b65086f602b"), contents: []byte{}}, + {blk: string("zxce0e35630770c54668a8cfb4e414c6bf8f"), contents: []byte{1}}, + {blk: string("abff4585856ebf0748fd989e1dd623a8963d"), contents: bytes.Repeat([]byte{1}, 1000)}, + {blk: string("abgc3dca496d510f492c858a2df1eb824e62"), contents: bytes.Repeat([]byte{1}, 10000)}, + } + + // First verify that blocks don't exist. + for _, b := range blocks { + AssertGetBlockNotFound(ctx, t, r, b.blk) + } + + // Now add blocks. + for _, b := range blocks { + if err := r.PutBlock(ctx, b.blk, b.contents); err != nil { + t.Errorf("can't put block: %v", err) + } + + AssertGetBlock(ctx, t, r, b.blk, b.contents) + } + + AssertListResults(ctx, t, r, "ab", blocks[0].blk, blocks[2].blk, blocks[3].blk) +} + +// AssertConnectionInfoRoundTrips verifies that the ConnectionInfo returned by a given storage can be used to create +// equivalent storage +func AssertConnectionInfoRoundTrips(ctx context.Context, t *testing.T, s storage.Storage) storage.Storage { + t.Helper() + + ci := s.ConnectionInfo() + s2, err := storage.NewStorage(ctx, ci) + if err != nil { + t.Fatalf("err: %v", err) + } + + ci2 := s2.ConnectionInfo() + if !reflect.DeepEqual(ci, ci2) { + t.Errorf("connection info does not round-trip: %v vs %v", ci, ci2) + } + + return s2 +} diff --git a/internal/throttle/round_tripper.go b/internal/throttle/round_tripper.go new file mode 100644 index 000000000..c59a2deb3 --- /dev/null +++ b/internal/throttle/round_tripper.go @@ -0,0 +1,44 @@ +package throttle + +import ( + "io" + "net/http" +) + +type throttlerPool interface { + AddReader(io.ReadCloser) (io.ReadCloser, error) +} + +type throttlingRoundTripper struct { + base http.RoundTripper + downloadPool throttlerPool + uploadPool throttlerPool +} + +func (rt *throttlingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + if req.Body != nil && rt.uploadPool != nil { + var err error + req.Body, err = rt.uploadPool.AddReader(req.Body) + if err != nil { + return nil, err + } + } + resp, err := rt.base.RoundTrip(req) + if resp != nil && resp.Body != nil && rt.downloadPool != nil { + resp.Body, err = rt.downloadPool.AddReader(resp.Body) + } + return resp, err +} + +// NewRoundTripper returns http.RoundTripper that throttles upload and downloads. +func NewRoundTripper(base http.RoundTripper, downloadPool throttlerPool, uploadPool throttlerPool) http.RoundTripper { + if base == nil { + base = http.DefaultTransport + } + + return &throttlingRoundTripper{ + base: base, + downloadPool: downloadPool, + uploadPool: uploadPool, + } +} diff --git a/internal/throttle/round_tripper_test.go b/internal/throttle/round_tripper_test.go new file mode 100644 index 000000000..50812f3b4 --- /dev/null +++ b/internal/throttle/round_tripper_test.go @@ -0,0 +1,103 @@ +package throttle + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "net/http" + "testing" +) + +type baseRoundTripper struct { + responses map[*http.Request]*http.Response +} + +func (rt *baseRoundTripper) add(req *http.Request, resp *http.Response) (*http.Request, *http.Response) { + rt.responses[req] = resp + return req, resp +} + +func (rt *baseRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + resp := rt.responses[req] + if resp != nil { + return resp, nil + } + + return nil, fmt.Errorf("error occurred") +} + +type fakePool struct { + readers []io.ReadCloser +} + +func (fp *fakePool) reset() { + fp.readers = nil +} + +func (fp *fakePool) AddReader(r io.ReadCloser) (io.ReadCloser, error) { + fp.readers = append(fp.readers, r) + return r, nil +} + +func TestRoundTripper(t *testing.T) { + downloadBody := ioutil.NopCloser(bytes.NewReader([]byte("data1"))) + uploadBody := ioutil.NopCloser(bytes.NewReader([]byte("data1"))) + + base := &baseRoundTripper{ + responses: make(map[*http.Request]*http.Response), + } + downloadPool := &fakePool{} + uploadPool := &fakePool{} + rt := NewRoundTripper(base, downloadPool, uploadPool) + + // Empty request (no request, no response) + uploadPool.reset() + downloadPool.reset() + req1, resp1 := base.add(&http.Request{}, &http.Response{}) + resp, err := rt.RoundTrip(req1) + if resp != resp1 || err != nil { + t.Errorf("invalid response or error: %v", err) + } + if len(downloadPool.readers) != 0 || len(uploadPool.readers) != 0 { + t.Errorf("invalid pool contents: %v %v", downloadPool.readers, uploadPool.readers) + } + + // Upload request + uploadPool.reset() + downloadPool.reset() + req2, resp2 := base.add(&http.Request{ + Body: uploadBody, + }, &http.Response{}) + resp, err = rt.RoundTrip(req2) + if resp != resp2 || err != nil { + t.Errorf("invalid response or error: %v", err) + } + if len(downloadPool.readers) != 0 || len(uploadPool.readers) != 1 { + t.Errorf("invalid pool contents: %v %v", downloadPool.readers, uploadPool.readers) + } + + // Download request + uploadPool.reset() + downloadPool.reset() + req3, resp3 := base.add(&http.Request{}, &http.Response{Body: downloadBody}) + resp, err = rt.RoundTrip(req3) + if resp != resp3 || err != nil { + t.Errorf("invalid response or error: %v", err) + } + if len(downloadPool.readers) != 1 || len(uploadPool.readers) != 0 { + t.Errorf("invalid pool contents: %v %v", downloadPool.readers, uploadPool.readers) + } + + // Upload/Download request + uploadPool.reset() + downloadPool.reset() + req4, resp4 := base.add(&http.Request{Body: uploadBody}, &http.Response{Body: downloadBody}) + resp, err = rt.RoundTrip(req4) + if resp != resp4 || err != nil { + t.Errorf("invalid response or error: %v", err) + } + if len(downloadPool.readers) != 1 || len(uploadPool.readers) != 1 { + t.Errorf("invalid pool contents: %v %v", downloadPool.readers, uploadPool.readers) + } +} diff --git a/local_config.go b/local_config.go new file mode 100644 index 000000000..ba11a6096 --- /dev/null +++ b/local_config.go @@ -0,0 +1,56 @@ +package repo + +import ( + "encoding/json" + "io" + "os" + + "github.com/kopia/repo/block" + "github.com/kopia/repo/object" + "github.com/kopia/repo/storage" +) + +// LocalConfig is a configuration of Kopia stored in a configuration file. +type LocalConfig struct { + Storage storage.ConnectionInfo `json:"storage"` + Caching block.CachingOptions `json:"caching"` +} + +// repositoryObjectFormat describes the format of objects in a repository. +type repositoryObjectFormat struct { + block.FormattingOptions + object.Format +} + +// Load reads local configuration from the specified reader. +func (lc *LocalConfig) Load(r io.Reader) error { + *lc = LocalConfig{} + return json.NewDecoder(r).Decode(lc) +} + +// Save writes the configuration to the specified writer. +func (lc *LocalConfig) Save(w io.Writer) error { + b, err := json.MarshalIndent(lc, "", " ") + if err != nil { + return nil + } + _, err = w.Write(b) + return err +} + +// loadConfigFromFile reads the local configuration from the specified file. +func loadConfigFromFile(fileName string) (*LocalConfig, error) { + f, err := os.Open(fileName) + if err != nil { + return nil, err + } + defer f.Close() //nolint:errcheck + + var lc LocalConfig + + if err := lc.Load(f); err != nil { + return nil, err + } + + return &lc, nil +} diff --git a/manifest/manifest_entry.go b/manifest/manifest_entry.go new file mode 100644 index 000000000..09778b4e3 --- /dev/null +++ b/manifest/manifest_entry.go @@ -0,0 +1,21 @@ +package manifest + +import "time" + +// EntryMetadata contains metadata about manifest item. Each manifest item has one or more labels +// Including required "type" label. +type EntryMetadata struct { + ID string + Length int + Labels map[string]string + ModTime time.Time +} + +// EntryIDs returns the list of IDs for the provided list of EntryMetadata. +func EntryIDs(entries []*EntryMetadata) []string { + var ids []string + for _, e := range entries { + ids = append(ids, e.ID) + } + return ids +} diff --git a/manifest/manifest_manager.go b/manifest/manifest_manager.go new file mode 100644 index 000000000..9dcd0f11e --- /dev/null +++ b/manifest/manifest_manager.go @@ -0,0 +1,514 @@ +// Package manifest implements support for managing JSON-based manifests in repository. +package manifest + +import ( + "bytes" + "compress/gzip" + "context" + "crypto/rand" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "sort" + "sync" + "time" + + "github.com/kopia/repo/block" + "github.com/kopia/repo/internal/repologging" + "github.com/kopia/repo/storage" +) + +var log = repologging.Logger("kopia/manifest") + +// ErrNotFound is returned when the metadata item is not found. +var ErrNotFound = errors.New("not found") + +const manifestBlockPrefix = "m" +const autoCompactionBlockCount = 16 + +// Manager organizes JSON manifests of various kinds, including snapshot manifests +type Manager struct { + mu sync.Mutex + b *block.Manager + + initialized bool + pendingEntries map[string]*manifestEntry + + committedEntries map[string]*manifestEntry + committedBlockIDs map[string]bool +} + +// Put serializes the provided payload to JSON and persists it. Returns unique handle that represents the object. +func (m *Manager) Put(ctx context.Context, labels map[string]string, payload interface{}) (string, error) { + if labels["type"] == "" { + return "", fmt.Errorf("'type' label is required") + } + + if err := m.ensureInitialized(ctx); err != nil { + return "", err + } + m.mu.Lock() + defer m.mu.Unlock() + + random := make([]byte, 16) + if _, err := rand.Read(random); err != nil { + return "", fmt.Errorf("can't initialize randomness: %v", err) + } + + b, err := json.Marshal(payload) + if err != nil { + return "", err + } + + e := &manifestEntry{ + ID: hex.EncodeToString(random), + ModTime: time.Now().UTC(), + Labels: copyLabels(labels), + Content: b, + } + + m.pendingEntries[e.ID] = e + + return e.ID, nil +} + +// GetMetadata returns metadata about provided manifest item or ErrNotFound if the item can't be found. +func (m *Manager) GetMetadata(ctx context.Context, id string) (*EntryMetadata, error) { + if err := m.ensureInitialized(ctx); err != nil { + return nil, err + } + + m.mu.Lock() + defer m.mu.Unlock() + + e := m.pendingEntries[id] + if e == nil { + e = m.committedEntries[id] + } + + if e == nil || e.Deleted { + return nil, ErrNotFound + } + + return &EntryMetadata{ + ID: id, + ModTime: e.ModTime, + Length: len(e.Content), + Labels: copyLabels(e.Labels), + }, nil +} + +// Get retrieves the contents of the provided manifest item by deserializing it as JSON to provided object. +// If the manifest is not found, returns ErrNotFound. +func (m *Manager) Get(ctx context.Context, id string, data interface{}) error { + if err := m.ensureInitialized(ctx); err != nil { + return err + } + + b, err := m.GetRaw(ctx, id) + if err != nil { + return err + } + + if err := json.Unmarshal(b, data); err != nil { + return fmt.Errorf("unable to unmashal %q: %v", id, err) + } + + return nil +} + +// GetRaw returns raw contents of the provided manifest (JSON bytes) or ErrNotFound if not found. +func (m *Manager) GetRaw(ctx context.Context, id string) ([]byte, error) { + if err := m.ensureInitialized(ctx); err != nil { + return nil, err + } + + m.mu.Lock() + defer m.mu.Unlock() + + e := m.pendingEntries[id] + if e == nil { + e = m.committedEntries[id] + } + if e == nil || e.Deleted { + return nil, ErrNotFound + } + + return e.Content, nil +} + +// Find returns the list of EntryMetadata for manifest entries matching all provided labels. +func (m *Manager) Find(ctx context.Context, labels map[string]string) ([]*EntryMetadata, error) { + if err := m.ensureInitialized(ctx); err != nil { + return nil, err + } + + m.mu.Lock() + defer m.mu.Unlock() + + var matches []*EntryMetadata + for _, e := range m.pendingEntries { + if matchesLabels(e.Labels, labels) { + matches = append(matches, cloneEntryMetadata(e)) + } + } + for _, e := range m.committedEntries { + if m.pendingEntries[e.ID] != nil { + // ignore committed that are also in pending + continue + } + + if matchesLabels(e.Labels, labels) { + matches = append(matches, cloneEntryMetadata(e)) + } + } + + sort.Slice(matches, func(i, j int) bool { + return matches[i].ModTime.Before(matches[j].ModTime) + }) + return matches, nil +} + +func cloneEntryMetadata(e *manifestEntry) *EntryMetadata { + return &EntryMetadata{ + ID: e.ID, + Labels: copyLabels(e.Labels), + Length: len(e.Content), + ModTime: e.ModTime, + } +} + +// matchesLabels returns true when all entries in 'b' are found in the 'a'. +func matchesLabels(a, b map[string]string) bool { + for k, v := range b { + if a[k] != v { + return false + } + } + + return true +} + +// Flush persists changes to manifest manager. +func (m *Manager) Flush(ctx context.Context) error { + m.mu.Lock() + defer m.mu.Unlock() + + _, err := m.flushPendingEntriesLocked(ctx) + return err +} + +func (m *Manager) flushPendingEntriesLocked(ctx context.Context) (string, error) { + if len(m.pendingEntries) == 0 { + return "", nil + } + + man := manifest{} + + for _, e := range m.pendingEntries { + man.Entries = append(man.Entries, e) + } + + var buf bytes.Buffer + + gz := gzip.NewWriter(&buf) + if err := json.NewEncoder(gz).Encode(man); err != nil { + return "", fmt.Errorf("unable to marshal: %v", err) + } + if err := gz.Flush(); err != nil { + return "", fmt.Errorf("unable to flush: %v", err) + } + if err := gz.Close(); err != nil { + return "", fmt.Errorf("unable to close: %v", err) + } + + blockID, err := m.b.WriteBlock(ctx, buf.Bytes(), manifestBlockPrefix) + if err != nil { + return "", err + } + + for _, e := range m.pendingEntries { + m.committedEntries[e.ID] = e + delete(m.pendingEntries, e.ID) + } + + m.committedBlockIDs[blockID] = true + + return blockID, nil +} + +// Delete marks the specified manifest ID for deletion. +func (m *Manager) Delete(ctx context.Context, id string) error { + if err := m.ensureInitialized(ctx); err != nil { + return err + } + + if m.pendingEntries[id] == nil && m.committedEntries[id] == nil { + return nil + } + + m.pendingEntries[id] = &manifestEntry{ + ID: id, + ModTime: time.Now().UTC(), + Deleted: true, + } + return nil +} + +// Refresh updates the committed blocks from the underlying storage. +func (m *Manager) Refresh(ctx context.Context) error { + m.mu.Lock() + defer m.mu.Unlock() + + return m.loadCommittedBlocksLocked(ctx) +} + +func (m *Manager) loadCommittedBlocksLocked(ctx context.Context) error { + log.Debugf("listing manifest blocks") + for { + blocks, err := m.b.ListBlocks(manifestBlockPrefix) + if err != nil { + return fmt.Errorf("unable to list manifest blocks: %v", err) + } + + m.committedEntries = map[string]*manifestEntry{} + m.committedBlockIDs = map[string]bool{} + + log.Debugf("found %v manifest blocks", len(blocks)) + err = m.loadManifestBlocks(ctx, blocks) + if err == nil { + // success + break + } + if err == storage.ErrBlockNotFound { + // try again, lost a race with another manifest manager which just did compaction + continue + } + return fmt.Errorf("unable to load manifest blocks: %v", err) + } + + if err := m.maybeCompactLocked(ctx); err != nil { + return fmt.Errorf("error auto-compacting blocks") + } + + return nil +} + +func (m *Manager) loadManifestBlocks(ctx context.Context, blockIDs []string) error { + t0 := time.Now() + + for _, b := range blockIDs { + m.committedBlockIDs[b] = true + } + + manifests, err := m.loadBlocksInParallel(ctx, blockIDs) + if err != nil { + return err + } + + for _, man := range manifests { + for _, e := range man.Entries { + m.mergeEntry(e) + } + } + + // after merging, remove blocks marked as deleted. + for k, e := range m.committedEntries { + if e.Deleted { + delete(m.committedEntries, k) + } + } + + log.Debugf("finished loading manifest blocks in %v.", time.Since(t0)) + + return nil +} + +func (m *Manager) loadBlocksInParallel(ctx context.Context, blockIDs []string) ([]manifest, error) { + errors := make(chan error, len(blockIDs)) + manifests := make(chan manifest, len(blockIDs)) + ch := make(chan string, len(blockIDs)) + var wg sync.WaitGroup + + for i := 0; i < 8; i++ { + wg.Add(1) + go func(workerID int) { + defer wg.Done() + + for blk := range ch { + t1 := time.Now() + man, err := m.loadManifestBlock(ctx, blk) + + if err != nil { + errors <- err + log.Debugf("block %v failed to be loaded by worker %v in %v: %v.", blk, workerID, time.Since(t1), err) + } else { + log.Debugf("block %v loaded by worker %v in %v.", blk, workerID, time.Since(t1)) + manifests <- man + } + } + }(i) + } + + // feed block IDs for goroutines + for _, b := range blockIDs { + ch <- b + } + close(ch) + + // wait for workers to complete + wg.Wait() + close(errors) + close(manifests) + + // if there was any error, forward it + if err := <-errors; err != nil { + return nil, err + } + + var man []manifest + for m := range manifests { + man = append(man, m) + } + + return man, nil +} + +func (m *Manager) loadManifestBlock(ctx context.Context, blockID string) (manifest, error) { + man := manifest{} + blk, err := m.b.GetBlock(ctx, blockID) + if err != nil { + // do not wrap the error here, we want to propagate original ErrBlockNotFound + // which causes a retry if we lose list/delete race. + return man, err + } + + if len(blk) > 2 && blk[0] == '{' { + if err := json.Unmarshal(blk, &man); err != nil { + return man, fmt.Errorf("unable to parse block %q: %v", blockID, err) + } + } else { + gz, err := gzip.NewReader(bytes.NewReader(blk)) + if err != nil { + return man, fmt.Errorf("unable to unpack block %q: %v", blockID, err) + } + + if err := json.NewDecoder(gz).Decode(&man); err != nil { + return man, fmt.Errorf("unable to parse block %q: %v", blockID, err) + } + } + + return man, nil +} + +// Compact performs compaction of manifest blocks. +func (m *Manager) Compact(ctx context.Context) error { + m.mu.Lock() + defer m.mu.Unlock() + + return m.compactLocked(ctx) +} + +func (m *Manager) maybeCompactLocked(ctx context.Context) error { + if len(m.committedBlockIDs) < autoCompactionBlockCount { + return nil + } + + log.Debugf("performing automatic compaction of %v blocks", len(m.committedBlockIDs)) + if err := m.compactLocked(ctx); err != nil { + return fmt.Errorf("unable to compact manifest blocks: %v", err) + } + + if err := m.b.Flush(ctx); err != nil { + return fmt.Errorf("unable to flush blocks after auto-compaction: %v", err) + } + + return nil +} + +func (m *Manager) compactLocked(ctx context.Context) error { + log.Debugf("compactLocked: pendingEntries=%v blockIDs=%v", len(m.pendingEntries), len(m.committedBlockIDs)) + + if len(m.committedBlockIDs) == 1 && len(m.pendingEntries) == 0 { + return nil + } + + // compaction needs to be atomic (deletes and rewrite should show up in one index block or not show up at all) + // that's why we want to prevent index flushes while we're d. + m.b.DisableIndexFlush() + defer m.b.EnableIndexFlush() + + for _, e := range m.committedEntries { + m.pendingEntries[e.ID] = e + } + + blockID, err := m.flushPendingEntriesLocked(ctx) + if err != nil { + return err + } + + // add the newly-created block to the list, could be duplicate + for b := range m.committedBlockIDs { + if b == blockID { + // do not delete block that was just written. + continue + } + + if err := m.b.DeleteBlock(b); err != nil { + return fmt.Errorf("unable to delete block %q: %v", b, err) + } + + delete(m.committedBlockIDs, b) + } + + return nil +} + +func (m *Manager) mergeEntry(e *manifestEntry) { + prev := m.committedEntries[e.ID] + if prev == nil { + m.committedEntries[e.ID] = e + return + } + + if e.ModTime.After(prev.ModTime) { + m.committedEntries[e.ID] = e + } +} + +func (m *Manager) ensureInitialized(ctx context.Context) error { + m.mu.Lock() + defer m.mu.Unlock() + + if m.initialized { + return nil + } + + if err := m.loadCommittedBlocksLocked(ctx); err != nil { + return err + } + + m.initialized = true + return nil +} + +func copyLabels(m map[string]string) map[string]string { + r := map[string]string{} + for k, v := range m { + r[k] = v + } + return r +} + +// NewManager returns new manifest manager for the provided block manager. +func NewManager(ctx context.Context, b *block.Manager) (*Manager, error) { + m := &Manager{ + b: b, + pendingEntries: map[string]*manifestEntry{}, + committedEntries: map[string]*manifestEntry{}, + committedBlockIDs: map[string]bool{}, + } + + return m, nil +} diff --git a/manifest/manifest_manager_test.go b/manifest/manifest_manager_test.go new file mode 100644 index 000000000..2e6b3bd70 --- /dev/null +++ b/manifest/manifest_manager_test.go @@ -0,0 +1,193 @@ +package manifest + +import ( + "context" + "fmt" + "reflect" + "sort" + "testing" + "time" + + "github.com/kopia/repo/block" + "github.com/kopia/repo/internal/storagetesting" +) + +func TestManifest(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + mgr, setupErr := newManagerForTesting(ctx, t, data) + if setupErr != nil { + t.Fatalf("unable to open block manager: %v", setupErr) + } + + item1 := map[string]int{"foo": 1, "bar": 2} + item2 := map[string]int{"foo": 2, "bar": 3} + item3 := map[string]int{"foo": 3, "bar": 4} + + labels1 := map[string]string{"type": "item", "color": "red"} + labels2 := map[string]string{"type": "item", "color": "blue", "shape": "square"} + labels3 := map[string]string{"type": "item", "shape": "square", "color": "red"} + + id1 := addAndVerify(ctx, t, mgr, labels1, item1) + id2 := addAndVerify(ctx, t, mgr, labels2, item2) + id3 := addAndVerify(ctx, t, mgr, labels3, item3) + + cases := []struct { + criteria map[string]string + expected []string + }{ + {map[string]string{"color": "red"}, []string{id1, id3}}, + {map[string]string{"color": "blue"}, []string{id2}}, + {map[string]string{"color": "green"}, nil}, + {map[string]string{"color": "red", "shape": "square"}, []string{id3}}, + {map[string]string{"color": "blue", "shape": "square"}, []string{id2}}, + {map[string]string{"color": "red", "shape": "circle"}, nil}, + } + + // verify before flush + for _, tc := range cases { + verifyMatches(ctx, t, mgr, tc.criteria, tc.expected) + } + verifyItem(ctx, t, mgr, id1, labels1, item1) + verifyItem(ctx, t, mgr, id2, labels2, item2) + verifyItem(ctx, t, mgr, id3, labels3, item3) + + if err := mgr.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + } + if err := mgr.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + } + + // verify after flush + for _, tc := range cases { + verifyMatches(ctx, t, mgr, tc.criteria, tc.expected) + } + verifyItem(ctx, t, mgr, id1, labels1, item1) + verifyItem(ctx, t, mgr, id2, labels2, item2) + verifyItem(ctx, t, mgr, id3, labels3, item3) + + // flush underlying block manager and verify in new manifest manager. + mgr.b.Flush(ctx) + mgr2, setupErr := newManagerForTesting(ctx, t, data) + if setupErr != nil { + t.Fatalf("can't open block manager: %v", setupErr) + } + for _, tc := range cases { + verifyMatches(ctx, t, mgr2, tc.criteria, tc.expected) + } + verifyItem(ctx, t, mgr2, id1, labels1, item1) + verifyItem(ctx, t, mgr2, id2, labels2, item2) + verifyItem(ctx, t, mgr2, id3, labels3, item3) + if err := mgr2.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + } + + // delete from one + time.Sleep(1 * time.Second) + if err := mgr.Delete(ctx, id3); err != nil { + t.Errorf("delete error: %v", err) + } + verifyItemNotFound(ctx, t, mgr, id3) + mgr.Flush(ctx) + verifyItemNotFound(ctx, t, mgr, id3) + + // still found in another + verifyItem(ctx, t, mgr2, id3, labels3, item3) + if err := mgr2.loadCommittedBlocksLocked(ctx); err != nil { + t.Errorf("unable to load: %v", err) + } + + if err := mgr.Compact(ctx); err != nil { + t.Errorf("can't compact: %v", err) + } + + blks, err := mgr.b.ListBlocks(manifestBlockPrefix) + if err != nil { + t.Errorf("unable to list manifest blocks: %v", err) + } + if got, want := len(blks), 1; got != want { + t.Errorf("unexpected number of blocks: %v, want %v", got, want) + } + + mgr.b.Flush(ctx) + + mgr3, err := newManagerForTesting(ctx, t, data) + if err != nil { + t.Fatalf("can't open manager: %v", err) + } + + verifyItem(ctx, t, mgr3, id1, labels1, item1) + verifyItem(ctx, t, mgr3, id2, labels2, item2) + verifyItemNotFound(ctx, t, mgr3, id3) +} + +func addAndVerify(ctx context.Context, t *testing.T, mgr *Manager, labels map[string]string, data map[string]int) string { + t.Helper() + id, err := mgr.Put(ctx, labels, data) + if err != nil { + t.Errorf("unable to add %v (%v): %v", labels, data, err) + return "" + } + + verifyItem(ctx, t, mgr, id, labels, data) + return id +} + +func verifyItem(ctx context.Context, t *testing.T, mgr *Manager, id string, labels map[string]string, data map[string]int) { + t.Helper() + + l, err := mgr.GetMetadata(ctx, id) + if err != nil { + t.Errorf("unable to retrieve %q: %v", id, err) + return + } + + if !reflect.DeepEqual(l.Labels, labels) { + t.Errorf("invalid labels retrieved %v, wanted %v", l.Labels, labels) + } +} + +func verifyItemNotFound(ctx context.Context, t *testing.T, mgr *Manager, id string) { + t.Helper() + + _, err := mgr.GetMetadata(ctx, id) + if got, want := err, ErrNotFound; got != want { + t.Errorf("invalid error when getting %q %v, expected %v", id, err, ErrNotFound) + return + } +} + +func verifyMatches(ctx context.Context, t *testing.T, mgr *Manager, labels map[string]string, expected []string) { + t.Helper() + + var matches []string + items, err := mgr.Find(ctx, labels) + if err != nil { + t.Errorf("error in Find(): %v", err) + return + } + for _, m := range items { + matches = append(matches, m.ID) + } + sort.Strings(matches) + sort.Strings(expected) + + if !reflect.DeepEqual(matches, expected) { + t.Errorf("invalid matches for %v: %v, expected %v", labels, matches, expected) + } +} + +func newManagerForTesting(ctx context.Context, t *testing.T, data map[string][]byte) (*Manager, error) { + st := storagetesting.NewMapStorage(data, nil, nil) + + bm, err := block.NewManager(ctx, st, block.FormattingOptions{ + BlockFormat: "UNENCRYPTED_HMAC_SHA256_128", + MaxPackSize: 100000, + }, block.CachingOptions{}) + if err != nil { + return nil, fmt.Errorf("can't create block manager: %v", err) + } + + return NewManager(ctx, bm) +} diff --git a/manifest/serialized.go b/manifest/serialized.go new file mode 100644 index 000000000..34be024c9 --- /dev/null +++ b/manifest/serialized.go @@ -0,0 +1,18 @@ +package manifest + +import ( + "encoding/json" + "time" +) + +type manifest struct { + Entries []*manifestEntry `json:"entries"` +} + +type manifestEntry struct { + ID string `json:"id"` + Labels map[string]string `json:"labels"` + ModTime time.Time `json:"modified"` + Deleted bool `json:"deleted,omitempty"` + Content json.RawMessage `json:"data"` +} diff --git a/object/indirect.go b/object/indirect.go new file mode 100644 index 000000000..2f217e536 --- /dev/null +++ b/object/indirect.go @@ -0,0 +1,10 @@ +package object + +var indirectStreamType = "kopia:indirect" + +// indirectObjectEntry represents an entry in indirect object stream. +type indirectObjectEntry struct { + Start int64 `json:"s,omitempty"` + Length int64 `json:"l,omitempty"` + Object ID `json:"o,omitempty"` +} diff --git a/object/object_manager.go b/object/object_manager.go new file mode 100644 index 000000000..f56d1c1ee --- /dev/null +++ b/object/object_manager.go @@ -0,0 +1,280 @@ +// Package object implements repository support for content-addressable objects of arbitrary size. +package object + +import ( + "bufio" + "bytes" + "context" + "fmt" + "io" + "sync" + + "github.com/kopia/repo/block" + "github.com/kopia/repo/internal/jsonstream" +) + +// Reader allows reading, seeking, getting the length of and closing of a repository object. +type Reader interface { + io.Reader + io.Seeker + io.Closer + Length() int64 +} + +type blockManager interface { + BlockInfo(ctx context.Context, blockID string) (block.Info, error) + GetBlock(ctx context.Context, blockID string) ([]byte, error) + WriteBlock(ctx context.Context, data []byte, prefix string) (string, error) +} + +// Format describes the format of objects in a repository. +type Format struct { + Splitter string `json:"splitter,omitempty"` // splitter used to break objects into storage blocks + MinBlockSize int `json:"minBlockSize,omitempty"` // minimum block size used with dynamic splitter + AvgBlockSize int `json:"avgBlockSize,omitempty"` // approximate size of storage block (used with dynamic splitter) + MaxBlockSize int `json:"maxBlockSize,omitempty"` // maximum size of storage block +} + +// Manager implements a content-addressable storage on top of blob storage. +type Manager struct { + Format Format + + blockMgr blockManager + + async bool + writeBackWG sync.WaitGroup + writeBackSemaphore semaphore + + trace func(message string, args ...interface{}) + + newSplitter func() objectSplitter +} + +// Close closes the connection to the underlying blob storage and releases any resources. +func (om *Manager) Close(ctx context.Context) error { + om.writeBackWG.Wait() + return om.Flush(ctx) +} + +// NewWriter creates an ObjectWriter for writing to the repository. +func (om *Manager) NewWriter(ctx context.Context, opt WriterOptions) Writer { + w := &objectWriter{ + ctx: ctx, + repo: om, + splitter: om.newSplitter(), + description: opt.Description, + prefix: opt.Prefix, + } + + if opt.splitter != nil { + w.splitter = opt.splitter + } + + return w +} + +// Open creates new ObjectReader for reading given object from a repository. +func (om *Manager) Open(ctx context.Context, objectID ID) (Reader, error) { + // log.Printf("Repository::Open %v", objectID.String()) + // defer log.Printf("finished Repository::Open() %v", objectID.String()) + + // Flush any pending writes. + om.writeBackWG.Wait() + + if indexObjectID, ok := objectID.IndexObjectID(); ok { + rd, err := om.Open(ctx, indexObjectID) + if err != nil { + return nil, err + } + defer rd.Close() //nolint:errcheck + + seekTable, err := om.flattenListChunk(rd) + if err != nil { + return nil, err + } + + totalLength := seekTable[len(seekTable)-1].endOffset() + + return &objectReader{ + ctx: ctx, + repo: om, + seekTable: seekTable, + totalLength: totalLength, + }, nil + } + + return om.newRawReader(ctx, objectID) +} + +// VerifyObject ensures that all objects backing ObjectID are present in the repository +// and returns the total length of the object and storage blocks of which it is composed. +func (om *Manager) VerifyObject(ctx context.Context, oid ID) (int64, []string, error) { + // Flush any pending writes. + om.writeBackWG.Wait() + + blocks := &blockTracker{} + l, err := om.verifyObjectInternal(ctx, oid, blocks) + if err != nil { + return 0, nil, err + } + + return l, blocks.blockIDs(), nil +} + +func (om *Manager) verifyIndirectObjectInternal(ctx context.Context, indexObjectID ID, blocks *blockTracker) (int64, error) { + if _, err := om.verifyObjectInternal(ctx, indexObjectID, blocks); err != nil { + return 0, fmt.Errorf("unable to read index: %v", err) + } + rd, err := om.Open(ctx, indexObjectID) + if err != nil { + return 0, err + } + defer rd.Close() //nolint:errcheck + + seekTable, err := om.flattenListChunk(rd) + if err != nil { + return 0, err + } + + for i, m := range seekTable { + l, err := om.verifyObjectInternal(ctx, m.Object, blocks) + if err != nil { + return 0, err + } + + if l != m.Length { + return 0, fmt.Errorf("unexpected length of part %#v of indirect object %q: %v %v, expected %v", i, indexObjectID, m.Object, l, m.Length) + } + } + + totalLength := seekTable[len(seekTable)-1].endOffset() + return totalLength, nil +} + +func (om *Manager) verifyObjectInternal(ctx context.Context, oid ID, blocks *blockTracker) (int64, error) { + if indexObjectID, ok := oid.IndexObjectID(); ok { + return om.verifyIndirectObjectInternal(ctx, indexObjectID, blocks) + } + + if blockID, ok := oid.BlockID(); ok { + p, err := om.blockMgr.BlockInfo(ctx, blockID) + if err != nil { + return 0, err + } + blocks.addBlock(blockID) + return int64(p.Length), nil + } + + return 0, fmt.Errorf("unrecognized object type: %v", oid) + +} + +// Flush closes any pending pack files. Once this method returns, ObjectIDs returned by ObjectManager are +// ok to be used. +func (om *Manager) Flush(ctx context.Context) error { + om.writeBackWG.Wait() + return nil +} + +func nullTrace(message string, args ...interface{}) { +} + +// ManagerOptions specifies object manager options. +type ManagerOptions struct { + WriteBack int + Trace func(message string, args ...interface{}) +} + +// NewObjectManager creates an ObjectManager with the specified block manager and format. +func NewObjectManager(ctx context.Context, bm blockManager, f Format, opts ManagerOptions) (*Manager, error) { + om := &Manager{ + blockMgr: bm, + Format: f, + trace: nullTrace, + } + + splitterID := f.Splitter + if splitterID == "" { + splitterID = "FIXED" + } + + os := splitterFactories[splitterID] + if os == nil { + return nil, fmt.Errorf("unsupported splitter %q", f.Splitter) + } + + om.newSplitter = func() objectSplitter { + return os(&f) + } + + if opts.Trace != nil { + om.trace = opts.Trace + } else { + om.trace = nullTrace + } + + if opts.WriteBack > 0 { + om.async = true + om.writeBackSemaphore = make(semaphore, opts.WriteBack) + } + + return om, nil +} + +func (om *Manager) flattenListChunk(rawReader io.Reader) ([]indirectObjectEntry, error) { + pr, err := jsonstream.NewReader(bufio.NewReader(rawReader), indirectStreamType, nil) + if err != nil { + return nil, err + } + var seekTable []indirectObjectEntry + + for { + var oe indirectObjectEntry + + err := pr.Read(&oe) + if err == io.EOF { + break + } + + if err != nil { + return nil, fmt.Errorf("failed to read indirect object: %v", err) + } + + seekTable = append(seekTable, oe) + } + + return seekTable, nil +} + +func (om *Manager) newRawReader(ctx context.Context, objectID ID) (Reader, error) { + if blockID, ok := objectID.BlockID(); ok { + payload, err := om.blockMgr.GetBlock(ctx, blockID) + if err != nil { + return nil, err + } + + return newObjectReaderWithData(payload), nil + } + + return nil, fmt.Errorf("unsupported object ID: %v", objectID) +} + +type readerWithData struct { + io.ReadSeeker + length int64 +} + +func (rwd *readerWithData) Close() error { + return nil +} + +func (rwd *readerWithData) Length() int64 { + return rwd.length +} + +func newObjectReaderWithData(data []byte) Reader { + return &readerWithData{ + ReadSeeker: bytes.NewReader(data), + length: int64(len(data)), + } +} diff --git a/object/object_manager_test.go b/object/object_manager_test.go new file mode 100644 index 000000000..5b14345ea --- /dev/null +++ b/object/object_manager_test.go @@ -0,0 +1,354 @@ +package object + +import ( + "bytes" + "context" + cryptorand "crypto/rand" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "io/ioutil" + "math/rand" + "reflect" + "runtime/debug" + "sync" + "testing" + + "github.com/kopia/repo/block" + "github.com/kopia/repo/internal/jsonstream" + "github.com/kopia/repo/storage" +) + +type fakeBlockManager struct { + mu sync.Mutex + data map[string][]byte +} + +func (f *fakeBlockManager) GetBlock(ctx context.Context, blockID string) ([]byte, error) { + f.mu.Lock() + defer f.mu.Unlock() + + if d, ok := f.data[blockID]; ok { + return append([]byte(nil), d...), nil + } + + return nil, storage.ErrBlockNotFound +} + +func (f *fakeBlockManager) WriteBlock(ctx context.Context, data []byte, prefix string) (string, error) { + h := sha256.New() + h.Write(data) + blockID := prefix + string(hex.EncodeToString(h.Sum(nil))) + + f.mu.Lock() + defer f.mu.Unlock() + + f.data[blockID] = append([]byte(nil), data...) + return blockID, nil +} + +func (f *fakeBlockManager) BlockInfo(ctx context.Context, blockID string) (block.Info, error) { + f.mu.Lock() + defer f.mu.Unlock() + + if d, ok := f.data[blockID]; ok { + return block.Info{BlockID: blockID, Length: uint32(len(d))}, nil + } + + return block.Info{}, storage.ErrBlockNotFound +} + +func (f *fakeBlockManager) Flush(ctx context.Context) error { + return nil +} + +func setupTest(t *testing.T) (map[string][]byte, *Manager) { + return setupTestWithData(t, map[string][]byte{}, ManagerOptions{}) +} + +func setupTestWithData(t *testing.T, data map[string][]byte, opts ManagerOptions) (map[string][]byte, *Manager) { + r, err := NewObjectManager(context.Background(), &fakeBlockManager{data: data}, Format{ + MaxBlockSize: 400, + Splitter: "FIXED", + }, opts) + if err != nil { + t.Fatalf("can't create object manager: %v", err) + } + + return data, r +} + +func TestWriters(t *testing.T) { + ctx := context.Background() + cases := []struct { + data []byte + objectID ID + }{ + { + []byte("the quick brown fox jumps over the lazy dog"), + "05c6e08f1d9fdafa03147fcb8f82f124c76d2f70e3d989dc8aadb5e7d7450bec", + }, + {make([]byte, 100), "cd00e292c5970d3c5e2f0ffa5171e555bc46bfc4faddfb4a418b6840b86e79a3"}, // 100 zero bytes + } + + for _, c := range cases { + data, om := setupTest(t) + + writer := om.NewWriter(ctx, WriterOptions{}) + + writer.Write(c.data) + + result, err := writer.Result() + if err != nil { + t.Errorf("error getting writer results for %v, expected: %v", c.data, c.objectID.String()) + continue + } + + om.writeBackWG.Wait() + + if !objectIDsEqual(result, c.objectID) { + t.Errorf("incorrect result for %v, expected: %v got: %v", c.data, c.objectID.String(), result.String()) + } + + if _, ok := c.objectID.BlockID(); !ok { + if len(data) != 0 { + t.Errorf("unexpected data written to the storage: %v", data) + } + } else { + if len(data) != 1 { + // 1 data block + t.Errorf("unexpected data written to the storage: %v", data) + } + } + } +} + +func objectIDsEqual(o1 ID, o2 ID) bool { + return reflect.DeepEqual(o1, o2) +} + +func TestWriterCompleteChunkInTwoWrites(t *testing.T) { + ctx := context.Background() + _, om := setupTest(t) + + bytes := make([]byte, 100) + writer := om.NewWriter(ctx, WriterOptions{}) + writer.Write(bytes[0:50]) + writer.Write(bytes[0:50]) + result, err := writer.Result() + if !objectIDsEqual(result, "cd00e292c5970d3c5e2f0ffa5171e555bc46bfc4faddfb4a418b6840b86e79a3") { + t.Errorf("unexpected result: %v err: %v", result, err) + } +} + +func verifyIndirectBlock(ctx context.Context, t *testing.T, r *Manager, oid ID) { + for indexBlockID, isIndirect := oid.IndexObjectID(); isIndirect; indexBlockID, isIndirect = indexBlockID.IndexObjectID() { + rd, err := r.Open(ctx, indexBlockID) + if err != nil { + t.Errorf("unable to open %v: %v", oid.String(), err) + return + } + defer rd.Close() + + pr, err := jsonstream.NewReader(rd, indirectStreamType, nil) + if err != nil { + t.Errorf("cannot open indirect stream: %v", err) + return + } + for { + v := indirectObjectEntry{} + if err := pr.Read(&v); err != nil { + if err == io.EOF { + break + } + t.Errorf("err: %v", err) + break + } + } + } +} + +func TestIndirection(t *testing.T) { + ctx := context.Background() + cases := []struct { + dataLength int + expectedBlockCount int + expectedIndirection int + }{ + {dataLength: 200, expectedBlockCount: 1, expectedIndirection: 0}, + {dataLength: 1400, expectedBlockCount: 3, expectedIndirection: 1}, + {dataLength: 2000, expectedBlockCount: 4, expectedIndirection: 2}, + {dataLength: 3000, expectedBlockCount: 5, expectedIndirection: 2}, + {dataLength: 4000, expectedBlockCount: 5, expectedIndirection: 2}, + {dataLength: 10000, expectedBlockCount: 10, expectedIndirection: 3}, + } + + for _, c := range cases { + data, om := setupTest(t) + + contentBytes := make([]byte, c.dataLength) + + writer := om.NewWriter(ctx, WriterOptions{}) + writer.Write(contentBytes) + result, err := writer.Result() + if err != nil { + t.Errorf("error getting writer results: %v", err) + } + + if indirectionLevel(result) != c.expectedIndirection { + t.Errorf("incorrect indirection level for size: %v: %v, expected %v", c.dataLength, indirectionLevel(result), c.expectedIndirection) + } + + if got, want := len(data), c.expectedBlockCount; got != want { + t.Errorf("unexpected block count for %v: %v, expected %v", c.dataLength, got, want) + } + + om.Flush(ctx) + + l, b, err := om.VerifyObject(ctx, result) + if err != nil { + t.Errorf("error verifying %q: %v", result, err) + } + + if got, want := int(l), len(contentBytes); got != want { + t.Errorf("got invalid byte count for %q: %v, wanted %v", result, got, want) + } + + if got, want := len(b), c.expectedBlockCount; got != want { + t.Errorf("invalid block count for %v, got %v, wanted %v", result, got, want) + } + + verifyIndirectBlock(ctx, t, om, result) + } +} + +func indirectionLevel(oid ID) int { + indexObjectID, ok := oid.IndexObjectID() + if !ok { + return 0 + } + + return 1 + indirectionLevel(indexObjectID) +} + +func TestHMAC(t *testing.T) { + ctx := context.Background() + content := bytes.Repeat([]byte{0xcd}, 50) + + _, om := setupTest(t) + + w := om.NewWriter(ctx, WriterOptions{}) + w.Write(content) + result, err := w.Result() + if result.String() != "cad29ff89951a3c085c86cb7ed22b82b51f7bdfda24f932c7f9601f51d5975ba" { + t.Errorf("unexpected result: %v err: %v", result.String(), err) + } +} + +func TestReader(t *testing.T) { + ctx := context.Background() + data, om := setupTest(t) + + storedPayload := []byte("foo\nbar") + data["a76999788386641a3ec798554f1fe7e6"] = storedPayload + + cases := []struct { + text string + payload []byte + }{ + {"a76999788386641a3ec798554f1fe7e6", storedPayload}, + } + + for _, c := range cases { + objectID, err := ParseID(c.text) + if err != nil { + t.Errorf("cannot parse object ID: %v", err) + continue + } + + reader, err := om.Open(ctx, objectID) + if err != nil { + t.Errorf("cannot create reader for %v: %v", objectID, err) + continue + } + + d, err := ioutil.ReadAll(reader) + if err != nil { + t.Errorf("cannot read all data for %v: %v", objectID, err) + continue + } + if !bytes.Equal(d, c.payload) { + t.Errorf("incorrect payload for %v: expected: %v got: %v", objectID, c.payload, d) + continue + } + } +} + +func TestReaderStoredBlockNotFound(t *testing.T) { + ctx := context.Background() + _, om := setupTest(t) + + objectID, err := ParseID("deadbeef") + if err != nil { + t.Errorf("cannot parse object ID: %v", err) + } + reader, err := om.Open(ctx, objectID) + if err != storage.ErrBlockNotFound || reader != nil { + t.Errorf("unexpected result: reader: %v err: %v", reader, err) + } +} + +func TestEndToEndReadAndSeek(t *testing.T) { + ctx := context.Background() + _, om := setupTest(t) + + for _, size := range []int{1, 199, 200, 201, 9999, 512434} { + // Create some random data sample of the specified size. + randomData := make([]byte, size) + cryptorand.Read(randomData) + + writer := om.NewWriter(ctx, WriterOptions{}) + writer.Write(randomData) + objectID, err := writer.Result() + writer.Close() + if err != nil { + t.Errorf("cannot get writer result for %v: %v", size, err) + continue + } + + verify(ctx, t, om, objectID, randomData, fmt.Sprintf("%v %v", objectID, size)) + } +} + +func verify(ctx context.Context, t *testing.T, om *Manager, objectID ID, expectedData []byte, testCaseID string) { + t.Helper() + reader, err := om.Open(ctx, objectID) + if err != nil { + t.Errorf("cannot get reader for %v (%v): %v %v", testCaseID, objectID, err, string(debug.Stack())) + return + } + + for i := 0; i < 20; i++ { + sampleSize := int(rand.Int31n(300)) + seekOffset := int(rand.Int31n(int32(len(expectedData)))) + if seekOffset+sampleSize > len(expectedData) { + sampleSize = len(expectedData) - seekOffset + } + if sampleSize > 0 { + got := make([]byte, sampleSize) + if offset, err := reader.Seek(int64(seekOffset), 0); err != nil || offset != int64(seekOffset) { + t.Errorf("seek error: %v offset=%v expected:%v", err, offset, seekOffset) + } + if n, err := reader.Read(got); err != nil || n != sampleSize { + t.Errorf("invalid data: n=%v, expected=%v, err:%v", n, sampleSize, err) + } + + expected := expectedData[seekOffset : seekOffset+sampleSize] + + if !bytes.Equal(expected, got) { + t.Errorf("incorrect data read for %v: expected: %x, got: %x", testCaseID, expected, got) + } + } + } +} diff --git a/object/object_reader.go b/object/object_reader.go new file mode 100644 index 000000000..5097d5fff --- /dev/null +++ b/object/object_reader.go @@ -0,0 +1,159 @@ +package object + +import ( + "context" + "fmt" + "io" +) + +func (i *indirectObjectEntry) endOffset() int64 { + return i.Start + i.Length +} + +type objectReader struct { + ctx context.Context + repo *Manager + + seekTable []indirectObjectEntry + + currentPosition int64 // Overall position in the objectReader + totalLength int64 // Overall length + + currentChunkIndex int // Index of current chunk in the seek table + currentChunkData []byte // Current chunk data + currentChunkPosition int // Read position in the current chunk +} + +func (r *objectReader) Read(buffer []byte) (int, error) { + readBytes := 0 + remaining := len(buffer) + + for remaining > 0 { + if r.currentChunkData != nil { + toCopy := len(r.currentChunkData) - r.currentChunkPosition + if toCopy == 0 { + // EOF on curren chunk + r.closeCurrentChunk() + r.currentChunkIndex++ + continue + } + + if toCopy > remaining { + toCopy = remaining + } + + copy(buffer[readBytes:], + r.currentChunkData[r.currentChunkPosition:r.currentChunkPosition+toCopy]) + r.currentChunkPosition += toCopy + r.currentPosition += int64(toCopy) + readBytes += toCopy + remaining -= toCopy + } else if r.currentChunkIndex < len(r.seekTable) { + err := r.openCurrentChunk() + if err != nil { + return 0, err + } + } else { + break + } + } + + if readBytes == 0 { + return readBytes, io.EOF + } + + return readBytes, nil +} + +func (r *objectReader) openCurrentChunk() error { + st := r.seekTable[r.currentChunkIndex] + blockData, err := r.repo.Open(r.ctx, st.Object) + if err != nil { + return err + } + defer blockData.Close() //nolint:errcheck + + b := make([]byte, st.Length) + if _, err := io.ReadFull(blockData, b); err != nil { + return err + } + + r.currentChunkData = b + r.currentChunkPosition = 0 + return nil +} + +func (r *objectReader) closeCurrentChunk() { + r.currentChunkData = nil +} + +func (r *objectReader) findChunkIndexForOffset(offset int64) (int, error) { + left := 0 + right := len(r.seekTable) - 1 + for left <= right { + middle := (left + right) / 2 + + if offset < r.seekTable[middle].Start { + right = middle - 1 + continue + } + + if offset >= r.seekTable[middle].endOffset() { + left = middle + 1 + continue + } + + return middle, nil + } + + return 0, fmt.Errorf("can't find chunk for offset %v", offset) +} + +func (r *objectReader) Seek(offset int64, whence int) (int64, error) { + if whence == 1 { + return r.Seek(r.currentPosition+offset, 0) + } + + if whence == 2 { + return r.Seek(r.totalLength+offset, 0) + } + + if offset < 0 { + return -1, fmt.Errorf("invalid seek %v %v", offset, whence) + } + + if offset > r.totalLength { + offset = r.totalLength + } + + index, err := r.findChunkIndexForOffset(offset) + if err != nil { + return -1, fmt.Errorf("invalid seek %v %v: %v", offset, whence, err) + } + + chunkStartOffset := r.seekTable[index].Start + + if index != r.currentChunkIndex { + r.closeCurrentChunk() + r.currentChunkIndex = index + } + + if r.currentChunkData == nil { + if err := r.openCurrentChunk(); err != nil { + return 0, err + } + } + + r.currentChunkPosition = int(offset - chunkStartOffset) + r.currentPosition = offset + + return r.currentPosition, nil +} + +func (r *objectReader) Close() error { + return nil +} + +func (r *objectReader) Length() int64 { + return r.totalLength +} diff --git a/object/object_splitter.go b/object/object_splitter.go new file mode 100644 index 000000000..085274687 --- /dev/null +++ b/object/object_splitter.go @@ -0,0 +1,110 @@ +package object + +import ( + "math" + "sort" + + "github.com/silvasur/buzhash" +) + +type objectSplitter interface { + add(b byte) bool +} + +// SupportedSplitters is a list of supported object splitters including: +// +// NEVER - prevents objects from ever splitting +// FIXED - always splits large objects exactly at the maximum block size boundary +// DYNAMIC - dynamically splits large objects based on rolling hash of contents. +var SupportedSplitters []string + +var splitterFactories = map[string]func(*Format) objectSplitter{ + "NEVER": func(f *Format) objectSplitter { + return newNeverSplitter() + }, + "FIXED": func(f *Format) objectSplitter { + return newFixedSplitter(f.MaxBlockSize) + }, + "DYNAMIC": func(f *Format) objectSplitter { + return newRollingHashSplitter(buzhash.NewBuzHash(32), f.MinBlockSize, f.AvgBlockSize, f.MaxBlockSize) + }, +} + +func init() { + for k := range splitterFactories { + SupportedSplitters = append(SupportedSplitters, k) + } + sort.Strings(SupportedSplitters) +} + +// DefaultSplitter is the name of the splitter used by default for new repositories. +const DefaultSplitter = "DYNAMIC" + +type neverSplitter struct{} + +func (s *neverSplitter) add(b byte) bool { + return false +} + +func newNeverSplitter() objectSplitter { + return &neverSplitter{} +} + +type fixedSplitter struct { + cur int + chunkLength int +} + +func (s *fixedSplitter) add(b byte) bool { + s.cur++ + if s.cur >= s.chunkLength { + s.cur = 0 + return true + } + + return false +} + +func newFixedSplitter(chunkLength int) objectSplitter { + return &fixedSplitter{chunkLength: chunkLength} +} + +type rollingHash interface { + HashByte(b byte) uint32 +} + +type rollingHashSplitter struct { + rh rollingHash + mask uint32 + + currentBlockSize int + minBlockSize int + maxBlockSize int +} + +func (rs *rollingHashSplitter) add(b byte) bool { + sum := rs.rh.HashByte(b) + rs.currentBlockSize++ + if rs.currentBlockSize >= rs.maxBlockSize { + rs.currentBlockSize = 0 + return true + } + if sum&rs.mask == 0 && rs.currentBlockSize > rs.minBlockSize && sum != 0 { + //log.Printf("splitting %v on sum %x mask %x", rs.currentBlockSize, sum, rs.mask) + rs.currentBlockSize = 0 + return true + } + return false +} + +func newRollingHashSplitter(rh rollingHash, minBlockSize int, approxBlockSize int, maxBlockSize int) objectSplitter { + bits := rollingHashBits(approxBlockSize) + mask := ^(^uint32(0) << bits) + return &rollingHashSplitter{rh, mask, 0, minBlockSize, maxBlockSize} +} + +func rollingHashBits(n int) uint { + e := math.Log2(float64(n)) + exp := math.Floor(e + 0.5) + return uint(exp) +} diff --git a/object/object_splitter_test.go b/object/object_splitter_test.go new file mode 100644 index 000000000..1b0e3592b --- /dev/null +++ b/object/object_splitter_test.go @@ -0,0 +1,134 @@ +package object + +import ( + "math" + "math/rand" + "testing" + + "github.com/silvasur/buzhash" +) + +func TestSplitters(t *testing.T) { + cases := []struct { + desc string + newSplitter func() objectSplitter + }{ + {"rolling buzhash with 3 bits", func() objectSplitter { return newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 8, 20) }}, + {"rolling buzhash with 5 bits", func() objectSplitter { return newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 32, 20) }}, + } + + for _, tc := range cases { + s1 := tc.newSplitter() + s2 := tc.newSplitter() + + rnd := make([]byte, 50000000) + rand.Read(rnd) + + for i, p := range rnd { + if got, want := s1.add(p), s2.add(p); got != want { + t.Errorf("incorrect add() result for %v at offset %v", tc.desc, i) + } + } + } +} + +func TestSplitterStability(t *testing.T) { + r := rand.New(rand.NewSource(5)) + rnd := make([]byte, 5000000) + if n, err := r.Read(rnd); n != len(rnd) || err != nil { + t.Fatalf("can't initialize random data: %v", err) + } + + cases := []struct { + splitter objectSplitter + count int + avg int + minSplit int + maxSplit int + }{ + {newFixedSplitter(1000), 5000, 1000, 1000, 1000}, + {newFixedSplitter(10000), 500, 10000, 10000, 10000}, + + {newNeverSplitter(), 0, 0, math.MaxInt32, 0}, + + {newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 32, math.MaxInt32), 156262, 31, 1, 404}, + {newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 1024, math.MaxInt32), 4933, 1013, 1, 8372}, + {newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 2048, math.MaxInt32), 2476, 2019, 1, 19454}, + {newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 32768, math.MaxInt32), 185, 27027, 1, 177510}, + {newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 65536, math.MaxInt32), 99, 50505, 418, 230449}, + + // min and max + {newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 32, 64), 179921, 27, 1, 64}, + {newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 1024, 10000), 4933, 1013, 1, 8372}, + {newRollingHashSplitter(buzhash.NewBuzHash(32), 0, 2048, 10000), 2490, 2008, 1, 10000}, + {newRollingHashSplitter(buzhash.NewBuzHash(32), 500, 32768, 100000), 183, 27322, 522, 100000}, + {newRollingHashSplitter(buzhash.NewBuzHash(32), 500, 65536, 100000), 113, 44247, 522, 100000}, + } + + for _, tc := range cases { + s := tc.splitter + + lastSplit := -1 + maxSplit := 0 + minSplit := int(math.MaxInt32) + count := 0 + for i, p := range rnd { + if s.add(p) { + l := i - lastSplit + if l >= maxSplit { + maxSplit = l + } + if l < minSplit { + minSplit = l + } + count++ + lastSplit = i + } + } + + var avg int + if count > 0 { + avg = len(rnd) / count + } + + if got, want := avg, tc.avg; got != want { + t.Errorf("invalid split average size %v, wanted %v", got, want) + } + + if got, want := count, tc.count; got != want { + t.Errorf("invalid split count %v, wanted %v", got, want) + } + if got, want := minSplit, tc.minSplit; got != want { + t.Errorf("min split %v, wanted %v", got, want) + } + if got, want := maxSplit, tc.maxSplit; got != want { + t.Errorf("max split %v, wanted %v", got, want) + } + } +} + +func TestRollingHashBits(t *testing.T) { + cases := []struct { + blockSize int + bits uint + }{ + {256, 8}, + {128, 7}, + {100, 7}, + {500, 9}, + {700, 9}, + {724, 9}, + {725, 10}, + {768, 10}, + {1000, 10}, + {1000000, 20}, + {10000000, 23}, + {20000000, 24}, + } + + for _, tc := range cases { + if got, want := rollingHashBits(tc.blockSize), tc.bits; got != want { + t.Errorf("rollingHashBits(%v) = %v, wanted %v", tc.blockSize, got, want) + } + } +} diff --git a/object/object_writer.go b/object/object_writer.go new file mode 100644 index 000000000..2f11bd966 --- /dev/null +++ b/object/object_writer.go @@ -0,0 +1,176 @@ +package object + +import ( + "bytes" + "context" + "fmt" + "io" + "sync" + + "github.com/kopia/repo/internal/jsonstream" +) + +// Writer allows writing content to the storage and supports automatic deduplication and encryption +// of written data. +type Writer interface { + io.WriteCloser + + Result() (ID, error) +} + +type blockTracker struct { + mu sync.Mutex + blocks map[string]bool +} + +func (t *blockTracker) addBlock(blockID string) { + t.mu.Lock() + defer t.mu.Unlock() + + if t.blocks == nil { + t.blocks = make(map[string]bool) + } + t.blocks[blockID] = true +} + +func (t *blockTracker) blockIDs() []string { + t.mu.Lock() + defer t.mu.Unlock() + + result := make([]string, 0, len(t.blocks)) + for k := range t.blocks { + result = append(result, k) + } + return result +} + +type objectWriter struct { + ctx context.Context + repo *Manager + + prefix string + buffer bytes.Buffer + totalLength int64 + + currentPosition int64 + blockIndex []indirectObjectEntry + + description string + + splitter objectSplitter + pendingBlocksWG sync.WaitGroup + + err asyncErrors +} + +func (w *objectWriter) Close() error { + w.pendingBlocksWG.Wait() + return w.err.check() +} + +func (w *objectWriter) Write(data []byte) (n int, err error) { + dataLen := len(data) + w.totalLength += int64(dataLen) + + for _, d := range data { + w.buffer.WriteByte(d) + + if w.splitter.add(d) { + if err := w.flushBuffer(); err != nil { + return 0, err + } + } + } + + return dataLen, nil +} + +func (w *objectWriter) flushBuffer() error { + length := w.buffer.Len() + chunkID := len(w.blockIndex) + w.blockIndex = append(w.blockIndex, indirectObjectEntry{}) + w.blockIndex[chunkID].Start = w.currentPosition + w.blockIndex[chunkID].Length = int64(length) + w.currentPosition += int64(length) + + var b2 bytes.Buffer + w.buffer.WriteTo(&b2) //nolint:errcheck + w.buffer.Reset() + + do := func() { + blockID, err := w.repo.blockMgr.WriteBlock(w.ctx, b2.Bytes(), w.prefix) + w.repo.trace("OBJECT_WRITER(%q) stored %v (%v bytes)", w.description, blockID, length) + if err != nil { + w.err.add(fmt.Errorf("error when flushing chunk %d of %s: %v", chunkID, w.description, err)) + return + } + + w.blockIndex[chunkID].Object = DirectObjectID(blockID) + } + + if w.repo.async { + w.repo.writeBackSemaphore.Lock() + w.pendingBlocksWG.Add(1) + w.repo.writeBackWG.Add(1) + + go func() { + defer w.pendingBlocksWG.Done() + defer w.repo.writeBackWG.Done() + defer w.repo.writeBackSemaphore.Unlock() + do() + }() + + return nil + } + + do() + return w.err.check() +} + +func (w *objectWriter) Result() (ID, error) { + if w.buffer.Len() > 0 || len(w.blockIndex) == 0 { + if err := w.flushBuffer(); err != nil { + return "", err + } + } + w.pendingBlocksWG.Wait() + + if err := w.err.check(); err != nil { + return "", err + } + + if len(w.blockIndex) == 1 { + return w.blockIndex[0].Object, nil + } + + iw := &objectWriter{ + ctx: w.ctx, + repo: w.repo, + description: "LIST(" + w.description + ")", + splitter: w.repo.newSplitter(), + prefix: w.prefix, + } + + jw := jsonstream.NewWriter(iw, indirectStreamType) + for _, e := range w.blockIndex { + if err := jw.Write(&e); err != nil { + return "", fmt.Errorf("unable to write indirect block index: %v", err) + } + } + if err := jw.Finalize(); err != nil { + return "", fmt.Errorf("unable to finalize indirect block index: %v", err) + } + oid, err := iw.Result() + if err != nil { + return "", err + } + return IndirectObjectID(oid), nil +} + +// WriterOptions can be passed to Repository.NewWriter() +type WriterOptions struct { + Description string + Prefix string // empty string or a single-character ('g'..'z') + + splitter objectSplitter +} diff --git a/object/objectid.go b/object/objectid.go new file mode 100644 index 000000000..1f9b7b150 --- /dev/null +++ b/object/objectid.go @@ -0,0 +1,94 @@ +package object + +import ( + "encoding/hex" + "fmt" + "strings" +) + +// ID is an identifier of a repository object. Repository objects can be stored. +// +// 1. In a single content block, this is the most common case for small objects. +// 2. In a series of content blocks with an indirect block pointing at them (multiple indirections are allowed). +// This is used for larger files. Object IDs using indirect blocks start with "I" +type ID string + +// HasObjectID exposes the identifier of an object. +type HasObjectID interface { + ObjectID() ID +} + +// String returns string representation of ObjectID that is suitable for displaying in the UI. +func (i ID) String() string { + return strings.Replace(string(i), "D", "", -1) +} + +// IndexObjectID returns the object ID of the underlying index object. +func (i ID) IndexObjectID() (ID, bool) { + if strings.HasPrefix(string(i), "I") { + return i[1:], true + } + + return "", false +} + +// BlockID returns the block ID of the underlying content storage block. +func (i ID) BlockID() (string, bool) { + if strings.HasPrefix(string(i), "D") { + return string(i[1:]), true + } + if strings.HasPrefix(string(i), "I") { + return "", false + } + + return string(i), true +} + +// Validate checks the ID format for validity and reports any errors. +func (i ID) Validate() error { + if indexObjectID, ok := i.IndexObjectID(); ok { + if err := indexObjectID.Validate(); err != nil { + return fmt.Errorf("invalid indirect object ID %v: %v", i, err) + } + + return nil + } + + if blockID, ok := i.BlockID(); ok { + if len(blockID) < 2 { + return fmt.Errorf("missing block ID") + } + + // odd length - firstcharacter must be a single character between 'g' and 'z' + if len(blockID)%2 == 1 { + if blockID[0] < 'g' || blockID[0] > 'z' { + return fmt.Errorf("invalid block ID prefix: %v", blockID) + } + blockID = blockID[1:] + } + + if _, err := hex.DecodeString(blockID); err != nil { + return fmt.Errorf("invalid blockID suffix, must be base-16 encoded: %v", blockID) + } + + return nil + } + + return fmt.Errorf("invalid object ID: %v", i) +} + +// DirectObjectID returns direct object ID based on the provided block ID. +func DirectObjectID(blockID string) ID { + return ID(blockID) +} + +// IndirectObjectID returns indirect object ID based on the underlying index object ID. +func IndirectObjectID(indexObjectID ID) ID { + return "I" + indexObjectID +} + +// ParseID converts the specified string into object ID +func ParseID(s string) (ID, error) { + i := ID(s) + return i, i.Validate() +} diff --git a/object/objectid_test.go b/object/objectid_test.go new file mode 100644 index 000000000..1aa29484b --- /dev/null +++ b/object/objectid_test.go @@ -0,0 +1,48 @@ +package object + +import ( + "testing" +) + +type rawObjectID ID + +func TestParseObjectID(t *testing.T) { + cases := []struct { + text string + isValid bool + }{ + {"Df0f0", true}, + {"IDf0f0", true}, + {"IDf0f0", true}, + {"IIDf0f0", true}, + {"Dxf0f0", true}, + {"IDxf0f0", true}, + {"IDxf0f0", true}, + {"IIDxf0f0", true}, + {"Dxf0f", false}, + {"IDxf0f", false}, + {"Da", false}, + {"Daf0f0", false}, + {"", false}, + {"B!$@#$!@#$", false}, + {"X", false}, + {"I.", false}, + {"I.x", false}, + {"I.af", false}, + {"Ix.ag", false}, + {"Iab.", false}, + {"I1", false}, + {"I1,", false}, + {"I-1,X", false}, + {"Xsomething", false}, + } + + for _, tc := range cases { + _, err := ParseID(tc.text) + if err != nil && tc.isValid { + t.Errorf("error parsing %q: %v", tc.text, err) + } else if err == nil && !tc.isValid { + t.Errorf("unexpected success parsing %v", tc.text) + } + } +} diff --git a/object/semaphore.go b/object/semaphore.go new file mode 100644 index 000000000..4afa7ce24 --- /dev/null +++ b/object/semaphore.go @@ -0,0 +1,12 @@ +package object + +type empty struct{} +type semaphore chan empty + +func (s semaphore) Lock() { + s <- empty{} +} + +func (s semaphore) Unlock() { + <-s +} diff --git a/object/writeback.go b/object/writeback.go new file mode 100644 index 000000000..d320f2c58 --- /dev/null +++ b/object/writeback.go @@ -0,0 +1,37 @@ +package object + +import ( + "fmt" + "strings" + "sync" +) + +type asyncErrors struct { + sync.RWMutex + errors []error +} + +func (e *asyncErrors) add(err error) { + e.Lock() + e.errors = append(e.errors, err) + e.Unlock() +} + +func (e *asyncErrors) check() error { + e.RLock() + defer e.RUnlock() + + switch len(e.errors) { + case 0: + return nil + case 1: + return e.errors[0] + default: + msg := make([]string, len(e.errors)) + for i, err := range e.errors { + msg[i] = err.Error() + } + + return fmt.Errorf("%v errors: %v", len(e.errors), strings.Join(msg, ";")) + } +} diff --git a/open.go b/open.go new file mode 100644 index 000000000..eced33ac1 --- /dev/null +++ b/open.go @@ -0,0 +1,194 @@ +package repo + +import ( + "context" + "encoding/json" + "fmt" + "io/ioutil" + "path/filepath" + + "github.com/kopia/repo/block" + "github.com/kopia/repo/internal/repologging" + "github.com/kopia/repo/manifest" + "github.com/kopia/repo/object" + "github.com/kopia/repo/storage" + "github.com/kopia/repo/storage/logging" +) + +var log = repologging.Logger("kopia/repo") + +// Options provides configuration parameters for connection to a repository. +type Options struct { + TraceStorage func(f string, args ...interface{}) // Logs all storage access using provided Printf-style function + ObjectManagerOptions object.ManagerOptions +} + +// Open opens a Repository specified in the configuration file. +func Open(ctx context.Context, configFile string, password string, options *Options) (rep *Repository, err error) { + log.Debugf("opening repository from %v", configFile) + defer func() { + if err == nil { + log.Debugf("opened repository") + } else { + log.Errorf("failed to open repository: %v", err) + } + }() + + if options == nil { + options = &Options{} + } + + configFile, err = filepath.Abs(configFile) + if err != nil { + return nil, err + } + + log.Debugf("loading config from file: %v", configFile) + lc, err := loadConfigFromFile(configFile) + if err != nil { + return nil, err + } + + log.Debugf("opening storage: %v", lc.Storage.Type) + + st, err := storage.NewStorage(ctx, lc.Storage) + if err != nil { + return nil, fmt.Errorf("cannot open storage: %v", err) + } + + if options.TraceStorage != nil { + st = logging.NewWrapper(st, logging.Prefix("[STORAGE] "), logging.Output(options.TraceStorage)) + } + + r, err := OpenWithConfig(ctx, st, lc, password, options, lc.Caching) + if err != nil { + st.Close(ctx) //nolint:errcheck + return nil, err + } + + r.ConfigFile = configFile + + return r, nil +} + +// OpenWithConfig opens the repository with a given configuration, avoiding the need for a config file. +func OpenWithConfig(ctx context.Context, st storage.Storage, lc *LocalConfig, password string, options *Options, caching block.CachingOptions) (*Repository, error) { + log.Debugf("reading encrypted format block") + // Read cache block, potentially from cache. + f, err := readAndCacheFormatBlock(ctx, st, caching.CacheDirectory) + if err != nil { + return nil, fmt.Errorf("unable to read format block: %v", err) + } + + masterKey, err := f.deriveMasterKeyFromPassword(password) + if err != nil { + return nil, err + } + + repoConfig, err := f.decryptFormatBytes(masterKey) + if err != nil { + return nil, fmt.Errorf("unable to decrypt repository config: %v", err) + } + + caching.HMACSecret = deriveKeyFromMasterKey(masterKey, f.UniqueID, []byte("local-cache-integrity"), 16) + + fo := repoConfig.FormattingOptions + if fo.MaxPackSize == 0 { + fo.MaxPackSize = repoConfig.MaxBlockSize + } + + log.Debugf("initializing block manager") + bm, err := block.NewManager(ctx, st, fo, caching) + if err != nil { + return nil, fmt.Errorf("unable to open block manager: %v", err) + } + + log.Debugf("initializing object manager") + om, err := object.NewObjectManager(ctx, bm, repoConfig.Format, options.ObjectManagerOptions) + if err != nil { + return nil, fmt.Errorf("unable to open object manager: %v", err) + } + + log.Debugf("initializing manifest manager") + manifests, err := manifest.NewManager(ctx, bm) + if err != nil { + return nil, fmt.Errorf("unable to open manifests: %v", err) + } + + return &Repository{ + Blocks: bm, + Objects: om, + Storage: st, + Manifests: manifests, + CacheDirectory: caching.CacheDirectory, + UniqueID: f.UniqueID, + }, nil +} + +// SetCachingConfig changes caching configuration for a given repository config file. +func SetCachingConfig(ctx context.Context, configFile string, opt block.CachingOptions) error { + configFile, err := filepath.Abs(configFile) + if err != nil { + return err + } + + lc, err := loadConfigFromFile(configFile) + if err != nil { + return err + } + + st, err := storage.NewStorage(ctx, lc.Storage) + if err != nil { + return fmt.Errorf("cannot open storage: %v", err) + } + + f, err := readAndCacheFormatBlock(ctx, st, "") + if err != nil { + return fmt.Errorf("can't read format block: %v", err) + } + + if err = setupCaching(configFile, lc, opt, f.UniqueID); err != nil { + return fmt.Errorf("unable to set up caching: %v", err) + } + + d, err := json.MarshalIndent(&lc, "", " ") + if err != nil { + return err + } + + if err := ioutil.WriteFile(configFile, d, 0600); err != nil { + return nil + } + + return nil +} + +func readAndCacheFormatBlock(ctx context.Context, st storage.Storage, cacheDirectory string) (*formatBlock, error) { + cachedFile := filepath.Join(cacheDirectory, "kopia.repository") + if cacheDirectory != "" { + b, err := ioutil.ReadFile(cachedFile) + if err == nil { + // read from cache. + return parseFormatBlock(b) + } + } + + b, err := st.GetBlock(ctx, FormatBlockID, 0, -1) + if err != nil { + return nil, err + } + + // block successfully read from storage. + f, err := parseFormatBlock(b) + if err != nil { + return nil, err + } + + if cacheDirectory != "" { + if err := ioutil.WriteFile(cachedFile, b, 0600); err != nil { + log.Warningf("warning: unable to write cache: %v", err) + } + } + + return f, nil +} diff --git a/repository.go b/repository.go new file mode 100644 index 000000000..e63c1598e --- /dev/null +++ b/repository.go @@ -0,0 +1,90 @@ +package repo + +import ( + "context" + "fmt" + "time" + + "github.com/kopia/repo/block" + "github.com/kopia/repo/manifest" + "github.com/kopia/repo/object" + "github.com/kopia/repo/storage" +) + +// Repository represents storage where both content-addressable and user-addressable data is kept. +type Repository struct { + Blocks *block.Manager + Objects *object.Manager + Storage storage.Storage + Manifests *manifest.Manager + UniqueID []byte + + ConfigFile string + CacheDirectory string +} + +// Close closes the repository and releases all resources. +func (r *Repository) Close(ctx context.Context) error { + if err := r.Manifests.Flush(ctx); err != nil { + return err + } + if err := r.Objects.Close(ctx); err != nil { + return err + } + if err := r.Blocks.Flush(ctx); err != nil { + return err + } + if err := r.Storage.Close(ctx); err != nil { + return err + } + return nil +} + +// Flush waits for all in-flight writes to complete. +func (r *Repository) Flush(ctx context.Context) error { + if err := r.Manifests.Flush(ctx); err != nil { + return err + } + if err := r.Objects.Flush(ctx); err != nil { + return err + } + + return r.Blocks.Flush(ctx) +} + +// Refresh periodically makes external changes visible to repository. +func (r *Repository) Refresh(ctx context.Context) error { + updated, err := r.Blocks.Refresh(ctx) + if err != nil { + return fmt.Errorf("error refreshing block index: %v", err) + } + + if !updated { + return nil + } + + log.Debugf("block index refreshed") + + if err := r.Manifests.Refresh(ctx); err != nil { + return fmt.Errorf("error reloading manifests: %v", err) + } + + log.Debugf("manifests refreshed") + + return nil +} + +// RefreshPeriodically periodically refreshes the repository to reflect the changes made by other hosts. +func (r *Repository) RefreshPeriodically(ctx context.Context, interval time.Duration) { + for { + select { + case <-ctx.Done(): + return + + case <-time.After(interval): + if err := r.Refresh(ctx); err != nil { + log.Warningf("error refreshing repository: %v", err) + } + } + } +} diff --git a/storage/config.go b/storage/config.go new file mode 100644 index 000000000..5f8f77bcd --- /dev/null +++ b/storage/config.go @@ -0,0 +1,47 @@ +package storage + +import ( + "encoding/json" + "fmt" +) + +// ConnectionInfo represents JSON-serializable configuration of a blob storage. +type ConnectionInfo struct { + Type string + Config interface{} +} + +// UnmarshalJSON parses the JSON-encoded data into ConnectionInfo. +func (c *ConnectionInfo) UnmarshalJSON(b []byte) error { + raw := struct { + Type string `json:"type"` + Data json.RawMessage `json:"config"` + }{} + + if err := json.Unmarshal(b, &raw); err != nil { + return err + } + + c.Type = raw.Type + f := factories[raw.Type] + if f == nil { + return fmt.Errorf("storage type '%v' not registered", raw.Type) + } + c.Config = f.defaultConfigFunc() + if err := json.Unmarshal(raw.Data, c.Config); err != nil { + return fmt.Errorf("unable to unmarshal config: %v", err) + } + + return nil +} + +// MarshalJSON returns JSON-encoded storage configuration. +func (c ConnectionInfo) MarshalJSON() ([]byte, error) { + return json.Marshal(struct { + Type string `json:"type"` + Data interface{} `json:"config"` + }{ + Type: c.Type, + Data: c.Config, + }) +} diff --git a/storage/doc.go b/storage/doc.go new file mode 100644 index 000000000..a2c193b6f --- /dev/null +++ b/storage/doc.go @@ -0,0 +1,2 @@ +// Package storage implements simple storage of immutable, unstructured binary large objects (BLOBs). +package storage diff --git a/storage/filesystem/filesystem_options.go b/storage/filesystem/filesystem_options.go new file mode 100644 index 000000000..2ca3cf385 --- /dev/null +++ b/storage/filesystem/filesystem_options.go @@ -0,0 +1,40 @@ +package filesystem + +import "os" + +// Options defines options for Filesystem-backed storage. +type Options struct { + Path string `json:"path"` + + DirectoryShards []int `json:"dirShards"` + + FileMode os.FileMode `json:"fileMode,omitempty"` + DirectoryMode os.FileMode `json:"dirMode,omitempty"` + + FileUID *int `json:"uid,omitempty"` + FileGID *int `json:"gid,omitempty"` +} + +func (fso *Options) fileMode() os.FileMode { + if fso.FileMode == 0 { + return fsDefaultFileMode + } + + return fso.FileMode +} + +func (fso *Options) dirMode() os.FileMode { + if fso.DirectoryMode == 0 { + return fsDefaultDirMode + } + + return fso.DirectoryMode +} + +func (fso *Options) shards() []int { + if fso.DirectoryShards == nil { + return fsDefaultShards + } + + return fso.DirectoryShards +} diff --git a/storage/filesystem/filesystem_storage.go b/storage/filesystem/filesystem_storage.go new file mode 100644 index 000000000..a65dd4ca8 --- /dev/null +++ b/storage/filesystem/filesystem_storage.go @@ -0,0 +1,241 @@ +// Package filesystem implements filesystem-based Storage. +package filesystem + +import ( + "context" + "fmt" + "io" + "io/ioutil" + "math/rand" + "os" + "path/filepath" + "strings" + "time" + + "github.com/kopia/repo/internal/repologging" + "github.com/kopia/repo/storage" +) + +var log = repologging.Logger("repo/filesystem") + +const ( + fsStorageType = "filesystem" + fsStorageChunkSuffix = ".f" +) + +var ( + fsDefaultShards = []int{3, 3} + fsDefaultFileMode os.FileMode = 0600 + fsDefaultDirMode os.FileMode = 0700 +) + +type fsStorage struct { + Options +} + +func (fs *fsStorage) GetBlock(ctx context.Context, blockID string, offset, length int64) ([]byte, error) { + _, path := fs.getShardedPathAndFilePath(blockID) + + f, err := os.Open(path) + if os.IsNotExist(err) { + return nil, storage.ErrBlockNotFound + } + + if err != nil { + return nil, err + } + defer f.Close() //nolint:errcheck + + if length < 0 { + return ioutil.ReadAll(f) + } + + if _, err := f.Seek(offset, io.SeekStart); err != nil { + return nil, err + } + return ioutil.ReadAll(io.LimitReader(f, length)) +} + +func getstringFromFileName(name string) (string, bool) { + if strings.HasSuffix(name, fsStorageChunkSuffix) { + return name[0 : len(name)-len(fsStorageChunkSuffix)], true + } + + return string(""), false +} + +func makeFileName(blockID string) string { + return blockID + fsStorageChunkSuffix +} + +func (fs *fsStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error { + var walkDir func(string, string) error + + walkDir = func(directory string, currentPrefix string) error { + entries, err := ioutil.ReadDir(directory) + if err != nil { + return err + } + + for _, e := range entries { + if e.IsDir() { + newPrefix := currentPrefix + e.Name() + var match bool + + if len(prefix) > len(newPrefix) { + match = strings.HasPrefix(prefix, newPrefix) + } else { + match = strings.HasPrefix(newPrefix, prefix) + } + + if match { + if err := walkDir(directory+"/"+e.Name(), currentPrefix+e.Name()); err != nil { + return err + } + } + } else if fullID, ok := getstringFromFileName(currentPrefix + e.Name()); ok { + if strings.HasPrefix(fullID, prefix) { + if err := callback(storage.BlockMetadata{ + BlockID: fullID, + Length: e.Size(), + Timestamp: e.ModTime(), + }); err != nil { + return err + } + } + } + } + + return nil + } + + return walkDir(fs.Path, "") +} + +// TouchBlock updates file modification time to current time if it's sufficiently old. +func (fs *fsStorage) TouchBlock(ctx context.Context, blockID string, threshold time.Duration) error { + _, path := fs.getShardedPathAndFilePath(blockID) + st, err := os.Stat(path) + if err != nil { + return err + } + + n := time.Now() + age := n.Sub(st.ModTime()) + if age < threshold { + return nil + } + + log.Debugf("updating timestamp on %v to %v", path, n) + return os.Chtimes(path, n, n) +} + +func (fs *fsStorage) PutBlock(ctx context.Context, blockID string, data []byte) error { + _, path := fs.getShardedPathAndFilePath(blockID) + + tempFile := fmt.Sprintf("%s.tmp.%d", path, rand.Int()) + f, err := fs.createTempFileAndDir(tempFile) + if err != nil { + return fmt.Errorf("cannot create temporary file: %v", err) + } + + if _, err = f.Write(data); err != nil { + return fmt.Errorf("can't write temporary file: %v", err) + } + if err = f.Close(); err != nil { + return fmt.Errorf("can't close temporary file: %v", err) + } + + err = os.Rename(tempFile, path) + if err != nil { + if removeErr := os.Remove(tempFile); removeErr != nil { + log.Warningf("can't remove temp file: %v", removeErr) + } + return err + } + + if fs.FileUID != nil && fs.FileGID != nil && os.Geteuid() == 0 { + if chownErr := os.Chown(path, *fs.FileUID, *fs.FileGID); chownErr != nil { + log.Warningf("can't change file permissions: %v", chownErr) + } + } + + return nil +} + +func (fs *fsStorage) createTempFileAndDir(tempFile string) (*os.File, error) { + flags := os.O_CREATE | os.O_WRONLY | os.O_EXCL + f, err := os.OpenFile(tempFile, flags, fs.fileMode()) + if os.IsNotExist(err) { + if err = os.MkdirAll(filepath.Dir(tempFile), fs.dirMode()); err != nil { + return nil, fmt.Errorf("cannot create directory: %v", err) + } + return os.OpenFile(tempFile, flags, fs.fileMode()) + } + + return f, err +} + +func (fs *fsStorage) DeleteBlock(ctx context.Context, blockID string) error { + _, path := fs.getShardedPathAndFilePath(blockID) + err := os.Remove(path) + if err == nil || os.IsNotExist(err) { + return nil + } + + return err +} + +func (fs *fsStorage) getShardDirectory(blockID string) (string, string) { + shardPath := fs.Path + if len(blockID) < 20 { + return shardPath, blockID + } + for _, size := range fs.shards() { + shardPath = filepath.Join(shardPath, blockID[0:size]) + blockID = blockID[size:] + } + + return shardPath, blockID +} + +func (fs *fsStorage) getShardedPathAndFilePath(blockID string) (string, string) { + shardPath, blockID := fs.getShardDirectory(blockID) + result := filepath.Join(shardPath, makeFileName(blockID)) + return shardPath, result +} + +func (fs *fsStorage) ConnectionInfo() storage.ConnectionInfo { + return storage.ConnectionInfo{ + Type: fsStorageType, + Config: &fs.Options, + } +} + +func (fs *fsStorage) Close(ctx context.Context) error { + return nil +} + +// New creates new filesystem-backed storage in a specified directory. +func New(ctx context.Context, opts *Options) (storage.Storage, error) { + var err error + + if _, err = os.Stat(opts.Path); err != nil { + return nil, fmt.Errorf("cannot access storage path: %v", err) + } + + r := &fsStorage{ + Options: *opts, + } + + return r, nil +} + +func init() { + storage.AddSupportedStorage( + fsStorageType, + func() interface{} { return &Options{} }, + func(ctx context.Context, o interface{}) (storage.Storage, error) { + return New(ctx, o.(*Options)) + }) +} diff --git a/storage/filesystem/filesystem_storage_test.go b/storage/filesystem/filesystem_storage_test.go new file mode 100644 index 000000000..bccdc3072 --- /dev/null +++ b/storage/filesystem/filesystem_storage_test.go @@ -0,0 +1,110 @@ +package filesystem + +import ( + "context" + "io/ioutil" + "os" + "reflect" + "sort" + "testing" + "time" + + "github.com/kopia/repo/storage" + + "github.com/kopia/repo/internal/storagetesting" +) + +func TestFileStorage(t *testing.T) { + t.Parallel() + ctx := context.Background() + + // Test varioush shard configurations. + for _, shardSpec := range [][]int{ + []int{0}, + []int{1}, + []int{3, 3}, + []int{2}, + []int{1, 1}, + []int{1, 2}, + []int{2, 2, 2}, + } { + path, _ := ioutil.TempDir("", "r-fs") + defer os.RemoveAll(path) + + r, err := New(ctx, &Options{ + Path: path, + DirectoryShards: shardSpec, + }) + + if r == nil || err != nil { + t.Errorf("unexpected result: %v %v", r, err) + } + + storagetesting.VerifyStorage(ctx, t, r) + storagetesting.AssertConnectionInfoRoundTrips(ctx, t, r) + } +} + +func TestFileStorageTouch(t *testing.T) { + t.Parallel() + ctx := context.Background() + + t1 := "392ee1bc299db9f235e046a62625afb84902" + t2 := "2a7ff4f29eddbcd4c18fa9e73fec20bbb71f" + t3 := "0dae5918f83e6a24c8b3e274ca1026e43f24" + + path, _ := ioutil.TempDir("", "r-fs") + defer os.RemoveAll(path) + + r, err := New(ctx, &Options{ + Path: path, + }) + + if r == nil || err != nil { + t.Errorf("unexpected result: %v %v", r, err) + } + + fs := r.(*fsStorage) + fs.PutBlock(ctx, t1, []byte{1}) + time.Sleep(1 * time.Second) // sleep a bit to accomodate Apple filesystems with low timestamp resolution + fs.PutBlock(ctx, t2, []byte{1}) + time.Sleep(1 * time.Second) + fs.PutBlock(ctx, t3, []byte{1}) + + verifyBlockTimestampOrder(t, fs, t1, t2, t3) + + fs.TouchBlock(ctx, t2, 1*time.Hour) // has no effect, all timestamps are very new + verifyBlockTimestampOrder(t, fs, t1, t2, t3) + + fs.TouchBlock(ctx, t1, 0) // moves t1 to the top of the pile + verifyBlockTimestampOrder(t, fs, t2, t3, t1) + time.Sleep(1 * time.Second) + + fs.TouchBlock(ctx, t2, 0) // moves t2 to the top of the pile + verifyBlockTimestampOrder(t, fs, t3, t1, t2) + time.Sleep(1 * time.Second) + + fs.TouchBlock(ctx, t1, 0) // moves t1 to the top of the pile + verifyBlockTimestampOrder(t, fs, t3, t2, t1) +} + +func verifyBlockTimestampOrder(t *testing.T, st storage.Storage, want ...string) { + blocks, err := storage.ListAllBlocks(context.Background(), st, "") + if err != nil { + t.Errorf("error listing blocks: %v", err) + return + } + + sort.Slice(blocks, func(i, j int) bool { + return blocks[i].Timestamp.Before(blocks[j].Timestamp) + }) + + var got []string + for _, b := range blocks { + got = append(got, b.BlockID) + } + + if !reflect.DeepEqual(got, want) { + t.Errorf("incorrect block order: %v, wanted %v", blocks, want) + } +} diff --git a/storage/gcs/gcs_options.go b/storage/gcs/gcs_options.go new file mode 100644 index 000000000..9935fb8e3 --- /dev/null +++ b/storage/gcs/gcs_options.go @@ -0,0 +1,20 @@ +package gcs + +// Options defines options Google Cloud Storage-backed storage. +type Options struct { + // BucketName is the name of the GCS bucket where data is stored. + BucketName string `json:"bucket"` + + // Prefix specifies additional string to prepend to all objects. + Prefix string `json:"prefix,omitempty"` + + // ServiceAccountCredentials specifies the name of the file with GCS credentials. + ServiceAccountCredentials string `json:"credentialsFile,omitempty"` + + // ReadOnly causes GCS connection to be opened with read-only scope to prevent accidental mutations. + ReadOnly bool `json:"readOnly,omitempty"` + + MaxUploadSpeedBytesPerSecond int `json:"maxUploadSpeedBytesPerSecond,omitempty"` + + MaxDownloadSpeedBytesPerSecond int `json:"maxDownloadSpeedBytesPerSecond,omitempty"` +} diff --git a/storage/gcs/gcs_storage.go b/storage/gcs/gcs_storage.go new file mode 100644 index 000000000..eca7b5ff2 --- /dev/null +++ b/storage/gcs/gcs_storage.go @@ -0,0 +1,254 @@ +// Package gcs implements Storage based on Google Cloud Storage bucket. +package gcs + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "io/ioutil" + + "github.com/efarrer/iothrottler" + "github.com/kopia/repo/internal/retry" + "github.com/kopia/repo/internal/throttle" + "github.com/kopia/repo/storage" + "golang.org/x/oauth2" + "golang.org/x/oauth2/google" + "google.golang.org/api/iterator" + "google.golang.org/api/option" + + gcsclient "cloud.google.com/go/storage" +) + +const ( + gcsStorageType = "gcs" +) + +type gcsStorage struct { + Options + + ctx context.Context + storageClient *gcsclient.Client + bucket *gcsclient.BucketHandle + + downloadThrottler *iothrottler.IOThrottlerPool + uploadThrottler *iothrottler.IOThrottlerPool +} + +func (gcs *gcsStorage) GetBlock(ctx context.Context, b string, offset, length int64) ([]byte, error) { + attempt := func() (interface{}, error) { + reader, err := gcs.bucket.Object(gcs.getObjectNameString(b)).NewRangeReader(gcs.ctx, offset, length) + if err != nil { + return nil, err + } + defer reader.Close() //nolint:errcheck + + return ioutil.ReadAll(reader) + } + + v, err := exponentialBackoff(fmt.Sprintf("GetBlock(%q,%v,%v)", b, offset, length), attempt) + if err != nil { + return nil, translateError(err) + } + + return v.([]byte), nil +} + +func exponentialBackoff(desc string, att retry.AttemptFunc) (interface{}, error) { + return retry.WithExponentialBackoff(desc, att, isRetriableError) +} + +func isRetriableError(err error) bool { + switch err { + case nil: + return false + case gcsclient.ErrObjectNotExist: + return false + case gcsclient.ErrBucketNotExist: + return false + default: + return true + } +} + +func translateError(err error) error { + switch err { + case nil: + return nil + case gcsclient.ErrObjectNotExist: + return storage.ErrBlockNotFound + case gcsclient.ErrBucketNotExist: + return storage.ErrBlockNotFound + default: + return fmt.Errorf("unexpected GCS error: %v", err) + } +} +func (gcs *gcsStorage) PutBlock(ctx context.Context, b string, data []byte) error { + ctx, cancel := context.WithCancel(ctx) + + obj := gcs.bucket.Object(gcs.getObjectNameString(b)) + writer := obj.NewWriter(ctx) + writer.ChunkSize = 1 << 20 + writer.ContentType = "application/x-kopia" + + progressCallback := storage.ProgressCallback(ctx) + + if progressCallback != nil { + progressCallback(b, 0, int64(len(data))) + defer progressCallback(b, int64(len(data)), int64(len(data))) + + writer.ProgressFunc = func(completed int64) { + if completed != int64(len(data)) { + progressCallback(b, completed, int64(len(data))) + } + } + } + + _, err := io.Copy(writer, bytes.NewReader(data)) + if err != nil { + // cancel context before closing the writer causes it to abandon the upload. + cancel() + writer.Close() //nolint:errcheck + return translateError(err) + } + defer cancel() + + // calling close before cancel() causes it to commit the upload. + return translateError(writer.Close()) +} + +func (gcs *gcsStorage) DeleteBlock(ctx context.Context, b string) error { + attempt := func() (interface{}, error) { + return nil, gcs.bucket.Object(gcs.getObjectNameString(b)).Delete(gcs.ctx) + } + + _, err := exponentialBackoff(fmt.Sprintf("DeleteBlock(%q)", b), attempt) + return translateError(err) +} + +func (gcs *gcsStorage) getObjectNameString(blockID string) string { + return gcs.Prefix + blockID +} + +func (gcs *gcsStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error { + lst := gcs.bucket.Objects(gcs.ctx, &gcsclient.Query{ + Prefix: gcs.getObjectNameString(prefix), + }) + + oa, err := lst.Next() + for err == nil { + if err = callback(storage.BlockMetadata{ + BlockID: oa.Name[len(gcs.Prefix):], + Length: oa.Size, + Timestamp: oa.Created, + }); err != nil { + return err + } + oa, err = lst.Next() + } + + if err != iterator.Done { + return err + } + + return nil +} + +func (gcs *gcsStorage) ConnectionInfo() storage.ConnectionInfo { + return storage.ConnectionInfo{ + Type: gcsStorageType, + Config: &gcs.Options, + } +} + +func (gcs *gcsStorage) Close(ctx context.Context) error { + gcs.storageClient.Close() //nolint:errcheck + return nil +} + +func (gcs *gcsStorage) String() string { + return fmt.Sprintf("gcs://%v/%v", gcs.BucketName, gcs.Prefix) +} + +func toBandwidth(bytesPerSecond int) iothrottler.Bandwidth { + if bytesPerSecond <= 0 { + return iothrottler.Unlimited + } + + return iothrottler.Bandwidth(bytesPerSecond) * iothrottler.BytesPerSecond +} + +func tokenSourceFromCredentialsFile(ctx context.Context, fn string, scopes ...string) (oauth2.TokenSource, error) { + data, err := ioutil.ReadFile(fn) + if err != nil { + return nil, err + } + + cfg, err := google.JWTConfigFromJSON(data, scopes...) + if err != nil { + return nil, fmt.Errorf("google.JWTConfigFromJSON: %v", err) + } + return cfg.TokenSource(ctx), nil +} + +// New creates new Google Cloud Storage-backed storage with specified options: +// +// - the 'BucketName' field is required and all other parameters are optional. +// +// By default the connection reuses credentials managed by (https://cloud.google.com/sdk/), +// but this can be disabled by setting IgnoreDefaultCredentials to true. +func New(ctx context.Context, opt *Options) (storage.Storage, error) { + var ts oauth2.TokenSource + var err error + + scope := gcsclient.ScopeReadWrite + if opt.ReadOnly { + scope = gcsclient.ScopeReadOnly + } + + if sa := opt.ServiceAccountCredentials; sa != "" { + ts, err = tokenSourceFromCredentialsFile(ctx, sa, scope) + } else { + ts, err = google.DefaultTokenSource(ctx, scope) + } + + if err != nil { + return nil, err + } + + downloadThrottler := iothrottler.NewIOThrottlerPool(toBandwidth(opt.MaxDownloadSpeedBytesPerSecond)) + uploadThrottler := iothrottler.NewIOThrottlerPool(toBandwidth(opt.MaxUploadSpeedBytesPerSecond)) + + hc := oauth2.NewClient(ctx, ts) + hc.Transport = throttle.NewRoundTripper(hc.Transport, downloadThrottler, uploadThrottler) + + cli, err := gcsclient.NewClient(ctx, option.WithHTTPClient(hc)) + if err != nil { + return nil, err + } + + if opt.BucketName == "" { + return nil, errors.New("bucket name must be specified") + } + + return &gcsStorage{ + Options: *opt, + ctx: ctx, + storageClient: cli, + bucket: cli.Bucket(opt.BucketName), + downloadThrottler: downloadThrottler, + uploadThrottler: uploadThrottler, + }, nil +} + +func init() { + storage.AddSupportedStorage( + gcsStorageType, + func() interface{} { + return &Options{} + }, + func(ctx context.Context, o interface{}) (storage.Storage, error) { + return New(ctx, o.(*Options)) + }) +} diff --git a/storage/logging/logging_storage.go b/storage/logging/logging_storage.go new file mode 100644 index 000000000..79cd7818d --- /dev/null +++ b/storage/logging/logging_storage.go @@ -0,0 +1,96 @@ +// Package logging implements wrapper around Storage that logs all activity. +package logging + +import ( + "context" + "time" + + "github.com/kopia/repo/internal/repologging" + "github.com/kopia/repo/storage" +) + +var log = repologging.Logger("repo/storage") + +type loggingStorage struct { + base storage.Storage + printf func(string, ...interface{}) + prefix string +} + +func (s *loggingStorage) GetBlock(ctx context.Context, id string, offset, length int64) ([]byte, error) { + t0 := time.Now() + result, err := s.base.GetBlock(ctx, id, offset, length) + dt := time.Since(t0) + if len(result) < 20 { + s.printf(s.prefix+"GetBlock(%q,%v,%v)=(%#v, %#v) took %v", id, offset, length, result, err, dt) + } else { + s.printf(s.prefix+"GetBlock(%q,%v,%v)=({%#v bytes}, %#v) took %v", id, offset, length, len(result), err, dt) + } + return result, err +} + +func (s *loggingStorage) PutBlock(ctx context.Context, id string, data []byte) error { + t0 := time.Now() + err := s.base.PutBlock(ctx, id, data) + dt := time.Since(t0) + s.printf(s.prefix+"PutBlock(%q,len=%v)=%#v took %v", id, len(data), err, dt) + return err +} + +func (s *loggingStorage) DeleteBlock(ctx context.Context, id string) error { + t0 := time.Now() + err := s.base.DeleteBlock(ctx, id) + dt := time.Since(t0) + s.printf(s.prefix+"DeleteBlock(%q)=%#v took %v", id, err, dt) + return err +} + +func (s *loggingStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error { + t0 := time.Now() + cnt := 0 + err := s.base.ListBlocks(ctx, prefix, func(bi storage.BlockMetadata) error { + cnt++ + return callback(bi) + }) + s.printf(s.prefix+"ListBlocks(%q)=%v returned %v items and took %v", prefix, err, cnt, time.Since(t0)) + return err +} + +func (s *loggingStorage) Close(ctx context.Context) error { + t0 := time.Now() + err := s.base.Close(ctx) + dt := time.Since(t0) + s.printf(s.prefix+"Close()=%#v took %v", err, dt) + return err +} + +func (s *loggingStorage) ConnectionInfo() storage.ConnectionInfo { + return s.base.ConnectionInfo() +} + +// Option modifies the behavior of logging storage wrapper. +type Option func(s *loggingStorage) + +// NewWrapper returns a Storage wrapper that logs all storage commands. +func NewWrapper(wrapped storage.Storage, options ...Option) storage.Storage { + s := &loggingStorage{base: wrapped, printf: log.Debugf} + for _, o := range options { + o(s) + } + + return s +} + +// Output is a logging storage option that causes all output to be sent to a given function instead of log.Printf() +func Output(outputFunc func(fmt string, args ...interface{})) Option { + return func(s *loggingStorage) { + s.printf = outputFunc + } +} + +// Prefix specifies prefix to be prepended to all log output. +func Prefix(prefix string) Option { + return func(s *loggingStorage) { + s.prefix = prefix + } +} diff --git a/storage/logging/logging_storage_test.go b/storage/logging/logging_storage_test.go new file mode 100644 index 000000000..084b4311a --- /dev/null +++ b/storage/logging/logging_storage_test.go @@ -0,0 +1,17 @@ +package logging + +import ( + "context" + "testing" + + "github.com/kopia/repo/internal/storagetesting" +) + +func TestLoggingStorage(t *testing.T) { + data := map[string][]byte{} + r := NewWrapper(storagetesting.NewMapStorage(data, nil, nil)) + if r == nil { + t.Errorf("unexpected result: %v", r) + } + storagetesting.VerifyStorage(context.Background(), t, r) +} diff --git a/storage/progress.go b/storage/progress.go new file mode 100644 index 000000000..fcf56d6f3 --- /dev/null +++ b/storage/progress.go @@ -0,0 +1,21 @@ +package storage + +import "context" + +type contextKey string + +var progressCallbackContextKey contextKey = "progress-callback" + +// ProgressFunc is used to report progress of a long-running storage operation. +type ProgressFunc func(desc string, completed, total int64) + +// WithUploadProgressCallback returns a context that passes callback function to be used storage upload progress. +func WithUploadProgressCallback(ctx context.Context, callback ProgressFunc) context.Context { + return context.WithValue(ctx, progressCallbackContextKey, callback) +} + +// ProgressCallback gets the progress callback function from the context. +func ProgressCallback(ctx context.Context) ProgressFunc { + pf, _ := ctx.Value(progressCallbackContextKey).(ProgressFunc) + return pf +} diff --git a/storage/providers/providers.go b/storage/providers/providers.go new file mode 100644 index 000000000..c06c01378 --- /dev/null +++ b/storage/providers/providers.go @@ -0,0 +1,8 @@ +// Package providers registers all storage providers that are included as part of Kopia. +package providers + +import ( + // Register well-known blob storage providers + _ "github.com/kopia/repo/storage/filesystem" + _ "github.com/kopia/repo/storage/gcs" +) diff --git a/storage/registry.go b/storage/registry.go new file mode 100644 index 000000000..776e42cc0 --- /dev/null +++ b/storage/registry.go @@ -0,0 +1,39 @@ +package storage + +import ( + "context" + "fmt" +) + +var ( + factories = map[string]*storageFactory{} +) + +// StorageFactory allows creation of repositories in a generic way. +type storageFactory struct { + defaultConfigFunc func() interface{} + createStorageFunc func(context.Context, interface{}) (Storage, error) +} + +// AddSupportedStorage registers factory function to create storage with a given type name. +func AddSupportedStorage( + urlScheme string, + defaultConfigFunc func() interface{}, + createStorageFunc func(context.Context, interface{}) (Storage, error)) { + + f := &storageFactory{ + defaultConfigFunc: defaultConfigFunc, + createStorageFunc: createStorageFunc, + } + factories[urlScheme] = f +} + +// NewStorage creates new storage based on ConnectionInfo. +// The storage type must be previously registered using AddSupportedStorage. +func NewStorage(ctx context.Context, cfg ConnectionInfo) (Storage, error) { + if factory, ok := factories[cfg.Type]; ok { + return factory.createStorageFunc(ctx, cfg.Config) + } + + return nil, fmt.Errorf("unknown storage type: %s", cfg.Type) +} diff --git a/storage/s3/s3_options.go b/storage/s3/s3_options.go new file mode 100644 index 000000000..c2d675953 --- /dev/null +++ b/storage/s3/s3_options.go @@ -0,0 +1,20 @@ +package s3 + +// Options defines options for S3-based storage. +type Options struct { + // BucketName is the name of the bucket where data is stored. + BucketName string `json:"bucket"` + + // Prefix specifies additional string to prepend to all objects. + Prefix string `json:"prefix,omitempty"` + + Endpoint string `json:"endpoint"` + DoNotUseTLS bool `json:"doNotUseTLS,omitempyy"` + + AccessKeyID string `json:"accessKeyID"` + SecretAccessKey string `json:"secretAccessKey" kopia:"sensitive"` + + MaxUploadSpeedBytesPerSecond int `json:"maxUploadSpeedBytesPerSecond,omitempty"` + + MaxDownloadSpeedBytesPerSecond int `json:"maxDownloadSpeedBytesPerSecond,omitempty"` +} diff --git a/storage/s3/s3_storage.go b/storage/s3/s3_storage.go new file mode 100644 index 000000000..5f17b71b3 --- /dev/null +++ b/storage/s3/s3_storage.go @@ -0,0 +1,241 @@ +// Package s3 implements Storage based on an S3 bucket. +package s3 + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "io/ioutil" + + "github.com/efarrer/iothrottler" + "github.com/kopia/repo/internal/retry" + "github.com/kopia/repo/storage" + "github.com/minio/minio-go" +) + +const ( + s3storageType = "s3" +) + +type s3Storage struct { + Options + + ctx context.Context + + cli *minio.Client + + downloadThrottler *iothrottler.IOThrottlerPool + uploadThrottler *iothrottler.IOThrottlerPool +} + +func (s *s3Storage) GetBlock(ctx context.Context, b string, offset, length int64) ([]byte, error) { + attempt := func() (interface{}, error) { + var opt minio.GetObjectOptions + if length > 0 { + if err := opt.SetRange(offset, offset+length-1); err != nil { + return nil, fmt.Errorf("unable to set range: %v", err) + } + } + + o, err := s.cli.GetObject(s.BucketName, s.getObjectNameString(b), opt) + if err != nil { + return 0, err + } + + defer o.Close() //nolint:errcheck + throttled, err := s.downloadThrottler.AddReader(o) + if err != nil { + return nil, err + } + + return ioutil.ReadAll(throttled) + } + + v, err := exponentialBackoff(fmt.Sprintf("GetBlock(%q,%v,%v)", b, offset, length), attempt) + if err != nil { + return nil, translateError(err) + } + + return v.([]byte), nil +} + +func exponentialBackoff(desc string, att retry.AttemptFunc) (interface{}, error) { + return retry.WithExponentialBackoff(desc, att, isRetriableError) +} + +func isRetriableError(err error) bool { + if me, ok := err.(minio.ErrorResponse); ok { + // retry on server errors, not on client errors + return me.StatusCode >= 500 + } + + switch err { + case nil: + return false + default: + return true + } +} + +func translateError(err error) error { + if me, ok := err.(minio.ErrorResponse); ok { + if me.StatusCode == 200 { + return nil + } + if me.StatusCode == 404 { + return storage.ErrBlockNotFound + } + } + + switch err { + case nil: + return nil + default: + return fmt.Errorf("unexpected S3 error: %v", err) + } +} + +func (s *s3Storage) PutBlock(ctx context.Context, b string, data []byte) error { + throttled, err := s.uploadThrottler.AddReader(ioutil.NopCloser(bytes.NewReader(data))) + if err != nil { + return err + } + + progressCallback := storage.ProgressCallback(ctx) + if progressCallback != nil { + progressCallback(b, 0, int64(len(data))) + defer progressCallback(b, int64(len(data)), int64(len(data))) + } + n, err := s.cli.PutObject(s.BucketName, s.getObjectNameString(b), throttled, -1, minio.PutObjectOptions{ + ContentType: "application/x-kopia", + Progress: newProgressReader(progressCallback, b, int64(len(data))), + }) + if err == io.EOF && n == 0 { + // special case empty stream + _, err = s.cli.PutObject(s.BucketName, s.getObjectNameString(b), bytes.NewBuffer(nil), 0, minio.PutObjectOptions{ + ContentType: "application/x-kopia", + }) + } + + return translateError(err) +} + +func (s *s3Storage) DeleteBlock(ctx context.Context, b string) error { + attempt := func() (interface{}, error) { + return nil, s.cli.RemoveObject(s.BucketName, s.getObjectNameString(b)) + } + + _, err := exponentialBackoff(fmt.Sprintf("DeleteBlock(%q)", b), attempt) + return translateError(err) +} + +func (s *s3Storage) getObjectNameString(b string) string { + return s.Prefix + b +} + +func (s *s3Storage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error { + oi := s.cli.ListObjects(s.BucketName, s.Prefix+prefix, false, ctx.Done()) + for o := range oi { + if err := o.Err; err != nil { + return err + } + + bm := storage.BlockMetadata{ + BlockID: o.Key[len(s.Prefix):], + Length: o.Size, + Timestamp: o.LastModified, + } + + if err := callback(bm); err != nil { + return err + } + } + + return nil +} + +func (s *s3Storage) ConnectionInfo() storage.ConnectionInfo { + return storage.ConnectionInfo{ + Type: s3storageType, + Config: &s.Options, + } +} + +func (s *s3Storage) Close(ctx context.Context) error { + return nil +} + +func (s *s3Storage) String() string { + return fmt.Sprintf("s3://%v/%v", s.BucketName, s.Prefix) +} + +type progressReader struct { + cb storage.ProgressFunc + blockID string + completed int64 + totalLength int64 + lastReported int64 +} + +func (r *progressReader) Read(b []byte) (int, error) { + r.completed += int64(len(b)) + if r.completed >= r.lastReported+1000000 && r.completed < r.totalLength { + r.cb(r.blockID, r.completed, r.totalLength) + r.lastReported = r.completed + } + return len(b), nil +} + +func newProgressReader(cb storage.ProgressFunc, blockID string, totalLength int64) io.Reader { + if cb == nil { + return nil + } + + return &progressReader{cb: cb, blockID: blockID, totalLength: totalLength} +} + +func toBandwidth(bytesPerSecond int) iothrottler.Bandwidth { + if bytesPerSecond <= 0 { + return iothrottler.Unlimited + } + + return iothrottler.Bandwidth(bytesPerSecond) * iothrottler.BytesPerSecond +} + +// New creates new S3-backed storage with specified options: +// +// - the 'BucketName' field is required and all other parameters are optional. +func New(ctx context.Context, opt *Options) (storage.Storage, error) { + if opt.BucketName == "" { + return nil, errors.New("bucket name must be specified") + } + + cli, err := minio.New(opt.Endpoint, opt.AccessKeyID, opt.SecretAccessKey, !opt.DoNotUseTLS) + if err != nil { + return nil, fmt.Errorf("unable to create client: %v", err) + } + + downloadThrottler := iothrottler.NewIOThrottlerPool(toBandwidth(opt.MaxDownloadSpeedBytesPerSecond)) + uploadThrottler := iothrottler.NewIOThrottlerPool(toBandwidth(opt.MaxUploadSpeedBytesPerSecond)) + + return &s3Storage{ + Options: *opt, + ctx: ctx, + cli: cli, + downloadThrottler: downloadThrottler, + uploadThrottler: uploadThrottler, + }, nil +} + +func init() { + storage.AddSupportedStorage( + s3storageType, + func() interface{} { + return &Options{} + }, + func(ctx context.Context, o interface{}) (storage.Storage, error) { + return New(ctx, o.(*Options)) + }) +} diff --git a/storage/s3/s3_storage_test.go b/storage/s3/s3_storage_test.go new file mode 100644 index 000000000..29602947c --- /dev/null +++ b/storage/s3/s3_storage_test.go @@ -0,0 +1,112 @@ +package s3 + +import ( + "context" + "crypto/rand" + "crypto/sha1" + "fmt" + "log" + "net" + "os" + "testing" + "time" + + "github.com/kopia/repo/internal/storagetesting" + "github.com/kopia/repo/storage" + "github.com/minio/minio-go" +) + +// https://github.com/minio/minio-go +const ( + endpoint = "play.minio.io:9000" + accessKeyID = "Q3AM3UQ867SPQQA43P2F" + secretAccessKey = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG" + useSSL = true + + // the test takes a few seconds, delete stuff older than 1h to avoid accumulating cruft + cleanupAge = 1 * time.Hour +) + +var bucketName = getBucketName() + +func getBucketName() string { + hn, err := os.Hostname() + if err != nil { + return "kopia-test-1" + } + h := sha1.New() + fmt.Fprintf(h, "%v", hn) + return fmt.Sprintf("kopia-test-%x", h.Sum(nil)[0:8]) +} + +func endpointReachable() bool { + conn, err := net.DialTimeout("tcp4", endpoint, 5*time.Second) + if err == nil { + conn.Close() + return true + } + + return false +} + +func TestS3Storage(t *testing.T) { + if !endpointReachable() { + t.Skip("endpoint not reachable") + } + + ctx := context.Background() + + // recreate per-host bucket, which sometimes get cleaned up by play.minio.io + createBucket(t) + cleanupOldData(ctx, t) + + data := make([]byte, 8) + rand.Read(data) + + st, err := New(context.Background(), &Options{ + AccessKeyID: accessKeyID, + SecretAccessKey: secretAccessKey, + Endpoint: endpoint, + BucketName: bucketName, + Prefix: fmt.Sprintf("test-%v-%x-", time.Now().Unix(), data), + }) + if err != nil { + t.Fatalf("err: %v", err) + } + + storagetesting.VerifyStorage(ctx, t, st) + storagetesting.AssertConnectionInfoRoundTrips(ctx, t, st) +} + +func createBucket(t *testing.T) { + minioClient, err := minio.New(endpoint, accessKeyID, secretAccessKey, useSSL) + if err != nil { + t.Fatalf("can't initialize minio client: %v", err) + } + minioClient.MakeBucket(bucketName, "us-east-1") +} + +func cleanupOldData(ctx context.Context, t *testing.T) { + // cleanup old data from the bucket + st, err := New(context.Background(), &Options{ + AccessKeyID: accessKeyID, + SecretAccessKey: secretAccessKey, + Endpoint: endpoint, + BucketName: bucketName, + }) + if err != nil { + t.Fatalf("err: %v", err) + } + + st.ListBlocks(ctx, "", func(it storage.BlockMetadata) error { + age := time.Since(it.Timestamp) + if age > cleanupAge { + if err := st.DeleteBlock(ctx, it.BlockID); err != nil { + t.Errorf("warning: unable to delete %q: %v", it.BlockID, err) + } + } else { + log.Printf("keeping %v", it.BlockID) + } + return nil + }) +} diff --git a/storage/storage.go b/storage/storage.go new file mode 100644 index 000000000..9934c6aed --- /dev/null +++ b/storage/storage.go @@ -0,0 +1,108 @@ +package storage + +import ( + "context" + "errors" + "fmt" + "time" +) + +// CancelFunc requests cancellation of a storage operation. +type CancelFunc func() + +// Storage encapsulates API for connecting to blob storage. +// +// The underlying storage system must provide: +// +// * high durability, availability and bit-rot protection +// * read-after-write - block written using PutBlock() must be immediately readable using GetBlock() and ListBlocks() +// * atomicity - it mustn't be possible to observe partial results of PutBlock() via either GetBlock() or ListBlocks() +// * timestamps that don't go back in time (small clock skew up to minutes is allowed) +// * reasonably low latency for retrievals +// +// The required semantics are provided by existing commercial cloud storage products (Google Cloud, AWS, Azure). +type Storage interface { + // PutBlock uploads the block with given data to the repository or replaces existing block with the provided + // id with given contents. + PutBlock(ctx context.Context, id string, data []byte) error + + // DeleteBlock removes the block from storage. Future GetBlock() operations will fail with ErrBlockNotFound. + DeleteBlock(ctx context.Context, id string) error + + // GetBlock returns full or partial contents of a block with given ID. + // If length>0, the the function retrieves a range of bytes [offset,offset+length) + // If length<0, the entire block must be fetched. + GetBlock(ctx context.Context, id string, offset, length int64) ([]byte, error) + + // ListBlocks returns a channel of BlockMetadata that describes storage blocks with existing name prefixes. + // Iteration continues until all blocks have been listed or until client code invokes the returned cancellation function. + ListBlocks(ctx context.Context, prefix string, cb func(bm BlockMetadata) error) error + + // ConnectionInfo returns JSON-serializable data structure containing information required to + // connect to storage. + ConnectionInfo() ConnectionInfo + + // Close releases all resources associated with storage. + Close(ctx context.Context) error +} + +// BlockMetadata represents metadata about a single block in a storage. +type BlockMetadata struct { + BlockID string + Length int64 + Timestamp time.Time +} + +// ErrBlockNotFound is returned when a block cannot be found in storage. +var ErrBlockNotFound = errors.New("block not found") + +// ListAllBlocks returns BlockMetadata for all blocks in a given storage that have the provided name prefix. +func ListAllBlocks(ctx context.Context, st Storage, prefix string) ([]BlockMetadata, error) { + var result []BlockMetadata + + err := st.ListBlocks(ctx, prefix, func(bm BlockMetadata) error { + result = append(result, bm) + return nil + }) + + return result, err +} + +// ListAllBlocksConsistent lists all blocks with given name prefix in the provided storage until the results are +// consistent. The results are consistent if the list result fetched twice is identical. This guarantees that while +// the first scan was in progress, no new block was added or removed. +// maxAttempts specifies maximum number of list attempts (must be >= 2) +func ListAllBlocksConsistent(ctx context.Context, st Storage, prefix string, maxAttempts int) ([]BlockMetadata, error) { + var previous []BlockMetadata + + for i := 0; i < maxAttempts; i++ { + result, err := ListAllBlocks(ctx, st, prefix) + if err != nil { + return nil, err + } + if i > 0 && sameBlocks(result, previous) { + return result, nil + } + + previous = result + } + + return nil, fmt.Errorf("unable to achieve consistent snapshot despite %v attempts", maxAttempts) +} + +// sameBlocks returns true if b1 & b2 contain the same blocks (ignoring order). +func sameBlocks(b1, b2 []BlockMetadata) bool { + if len(b1) != len(b2) { + return false + } + m := map[string]BlockMetadata{} + for _, b := range b1 { + m[b.BlockID] = b + } + for _, b := range b2 { + if m[b.BlockID] != b { + return false + } + } + return true +} diff --git a/storage/storage_test.go b/storage/storage_test.go new file mode 100644 index 000000000..a94639694 --- /dev/null +++ b/storage/storage_test.go @@ -0,0 +1,57 @@ +package storage_test + +import ( + "context" + "testing" + "time" + + "github.com/kopia/repo/internal/storagetesting" + "github.com/kopia/repo/storage" +) + +func TestListAllBlocksConsistent(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + st := storagetesting.NewMapStorage(data, nil, time.Now) + st.PutBlock(ctx, "foo1", []byte{1, 2, 3}) + st.PutBlock(ctx, "foo2", []byte{1, 2, 3}) + st.PutBlock(ctx, "foo3", []byte{1, 2, 3}) + + // set up faulty storage that will add a block while a scan is in progress. + f := &storagetesting.FaultyStorage{ + Base: st, + Faults: map[string][]*storagetesting.Fault{ + "ListBlocksItem": { + {ErrCallback: func() error { + st.PutBlock(ctx, "foo0", []byte{1, 2, 3}) + return nil + }}, + }, + }, + } + + r, err := storage.ListAllBlocksConsistent(ctx, f, "foo", 3) + if err != nil { + t.Fatalf("error: %v", err) + } + + // make sure we get the list with 4 items, not 3. + if got, want := len(r), 4; got != want { + t.Errorf("unexpected list result count: %v, want %v", got, want) + } +} + +func TestListAllBlocksConsistentEmpty(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + st := storagetesting.NewMapStorage(data, nil, time.Now) + + r, err := storage.ListAllBlocksConsistent(ctx, st, "foo", 3) + if err != nil { + t.Fatalf("error: %v", err) + } + + if got, want := len(r), 0; got != want { + t.Errorf("unexpected list result count: %v, want %v", got, want) + } +} diff --git a/storage/webdav/webdav_options.go b/storage/webdav/webdav_options.go new file mode 100644 index 000000000..79e93d544 --- /dev/null +++ b/storage/webdav/webdav_options.go @@ -0,0 +1,17 @@ +package webdav + +// Options defines options for Filesystem-backed storage. +type Options struct { + URL string `json:"url"` + DirectoryShards []int `json:"dirShards"` + Username string `json:"username,omitempty"` + Password string `json:"password,omitempty" kopia:"sensitive"` +} + +func (fso *Options) shards() []int { + if fso.DirectoryShards == nil { + return fsDefaultShards + } + + return fso.DirectoryShards +} diff --git a/storage/webdav/webdav_storage.go b/storage/webdav/webdav_storage.go new file mode 100644 index 000000000..0792eaa15 --- /dev/null +++ b/storage/webdav/webdav_storage.go @@ -0,0 +1,210 @@ +// Package webdav implements WebDAV-based Storage. +package webdav + +import ( + "context" + "errors" + "fmt" + "math/rand" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/kopia/repo/storage" + "github.com/studio-b12/gowebdav" +) + +const ( + davStorageType = "webdav" + fsStorageChunkSuffix = ".f" +) + +var ( + fsDefaultShards = []int{3, 3} +) + +// davStorage implements blob.Storage on top of remove WebDAV repository. +// It is very similar to File storage, except uses HTTP URLs instead of local files. +// Storage formats are compatible (both use sharded directory structure), so a repository +// may be accessed using WebDAV or File interchangeably. +type davStorage struct { + Options + + cli *gowebdav.Client +} + +func (d *davStorage) GetBlock(ctx context.Context, blockID string, offset, length int64) ([]byte, error) { + _, path := d.getDirPathAndFilePath(blockID) + + data, err := d.cli.Read(path) + if err != nil { + return nil, d.translateError(err) + } + if length < 0 { + return data, nil + } + + if int(offset) > len(data) || offset < 0 { + return nil, errors.New("invalid offset") + } + + data = data[offset:] + if int(length) > len(data) { + return data, nil + } + + return data[0:length], nil +} + +func (d *davStorage) translateError(err error) error { + switch err := err.(type) { + case *os.PathError: + switch err.Err.Error() { + case "404": + return storage.ErrBlockNotFound + } + return err + default: + return err + } +} + +func getBlockIDFromFileName(name string) (string, bool) { + if strings.HasSuffix(name, fsStorageChunkSuffix) { + return name[0 : len(name)-len(fsStorageChunkSuffix)], true + } + + return "", false +} + +func makeFileName(blockID string) string { + return blockID + fsStorageChunkSuffix +} + +func (d *davStorage) ListBlocks(ctx context.Context, prefix string, callback func(storage.BlockMetadata) error) error { + var walkDir func(string, string) error + + walkDir = func(path string, currentPrefix string) error { + entries, err := d.cli.ReadDir(gowebdav.FixSlash(path)) + if err != nil { + return fmt.Errorf("read dir error on %v: %v", path, err) + } + + sort.Slice(entries, func(i, j int) bool { + return entries[i].Name() < entries[j].Name() + }) + + for _, e := range entries { + if e.IsDir() { + newPrefix := currentPrefix + e.Name() + var match bool + + if len(prefix) > len(newPrefix) { + // looking for 'abcd', got 'ab' so far, worth trying + match = strings.HasPrefix(prefix, newPrefix) + } else { + match = strings.HasPrefix(newPrefix, prefix) + } + + if match { + if err := walkDir(path+"/"+e.Name(), currentPrefix+e.Name()); err != nil { + return err + } + } + } else if fullID, ok := getBlockIDFromFileName(currentPrefix + e.Name()); ok { + if strings.HasPrefix(fullID, prefix) { + if err := callback(storage.BlockMetadata{ + BlockID: fullID, + Length: e.Size(), + Timestamp: e.ModTime(), + }); err != nil { + return err + } + } + } + } + + return nil + } + + return walkDir("", "") +} + +func (d *davStorage) PutBlock(ctx context.Context, blockID string, data []byte) error { + dirPath, filePath := d.getDirPathAndFilePath(blockID) + tmpPath := fmt.Sprintf("%v-%v", filePath, rand.Int63()) + if err := d.translateError(d.cli.Write(tmpPath, data, 0600)); err != nil { + if err != storage.ErrBlockNotFound { + return err + } + + d.cli.MkdirAll(dirPath, 0700) //nolint:errcheck + if err = d.translateError(d.cli.Write(tmpPath, data, 0600)); err != nil { + return err + } + } + + return d.translateError(d.cli.Rename(tmpPath, filePath, true)) +} + +func (d *davStorage) DeleteBlock(ctx context.Context, blockID string) error { + _, filePath := d.getDirPathAndFilePath(blockID) + return d.translateError(d.cli.Remove(filePath)) +} + +func (d *davStorage) getShardDirectory(blockID string) (string, string) { + shardPath := "/" + if len(blockID) < 20 { + return shardPath, blockID + } + for _, size := range d.shards() { + shardPath = filepath.Join(shardPath, blockID[0:size]) + blockID = blockID[size:] + } + + return shardPath, blockID +} + +func (d *davStorage) getDirPathAndFilePath(blockID string) (string, string) { + shardPath, blockID := d.getShardDirectory(blockID) + result := filepath.Join(shardPath, makeFileName(blockID)) + return shardPath, result +} + +func (d *davStorage) ConnectionInfo() storage.ConnectionInfo { + return storage.ConnectionInfo{ + Type: davStorageType, + Config: &d.Options, + } +} + +func (d *davStorage) Close(ctx context.Context) error { + return nil +} + +// New creates new WebDAV-backed storage in a specified URL. +func New(ctx context.Context, opts *Options) (storage.Storage, error) { + r := &davStorage{ + Options: *opts, + cli: gowebdav.NewClient(opts.URL, opts.Username, opts.Password), + } + + for _, s := range r.shards() { + if s == 0 { + return nil, fmt.Errorf("invalid shard spec: %v", opts.DirectoryShards) + } + } + + r.Options.URL = strings.TrimSuffix(r.Options.URL, "/") + return r, nil +} + +func init() { + storage.AddSupportedStorage( + davStorageType, + func() interface{} { return &Options{} }, + func(ctx context.Context, o interface{}) (storage.Storage, error) { + return New(ctx, o.(*Options)) + }) +} diff --git a/storage/webdav/webdav_storage_test.go b/storage/webdav/webdav_storage_test.go new file mode 100644 index 000000000..dc243ea47 --- /dev/null +++ b/storage/webdav/webdav_storage_test.go @@ -0,0 +1,62 @@ +package webdav + +import ( + "context" + "fmt" + "io/ioutil" + "net/http" + "net/http/httptest" + "os" + "testing" + + "golang.org/x/net/webdav" + + "github.com/kopia/repo/internal/storagetesting" +) + +func TestWebDAVStorage(t *testing.T) { + tmpDir, _ := ioutil.TempDir("", "webdav") + defer os.RemoveAll(tmpDir) + + t.Logf("tmpDir: %v", tmpDir) + + mux := http.NewServeMux() + mux.Handle("/", &webdav.Handler{ + FileSystem: webdav.Dir(tmpDir), + LockSystem: webdav.NewMemLS(), + }) + + server := httptest.NewServer(mux) + defer server.Close() + + ctx := context.Background() + + // Test varioush shard configurations. + for _, shardSpec := range [][]int{ + []int{1}, + []int{3, 3}, + []int{2}, + []int{1, 1}, + []int{1, 2}, + []int{2, 2, 2}, + } { + t.Run(fmt.Sprintf("shards-%v", shardSpec), func(t *testing.T) { + if err := os.RemoveAll(tmpDir); err != nil { + t.Errorf("can't remove all: %q", tmpDir) + } + os.MkdirAll(tmpDir, 0700) + + r, err := New(context.Background(), &Options{ + URL: server.URL, + DirectoryShards: shardSpec, + }) + + if r == nil || err != nil { + t.Errorf("unexpected result: %v %v", r, err) + } + + storagetesting.VerifyStorage(ctx, t, r) + storagetesting.AssertConnectionInfoRoundTrips(ctx, t, r) + }) + } +} diff --git a/tests/repository_stress_test/repository_stress.go b/tests/repository_stress_test/repository_stress.go new file mode 100644 index 000000000..ec4817f32 --- /dev/null +++ b/tests/repository_stress_test/repository_stress.go @@ -0,0 +1,3 @@ +package repositorystress + +// dummy package diff --git a/tests/repository_stress_test/repository_stress_test.go b/tests/repository_stress_test/repository_stress_test.go new file mode 100644 index 000000000..91c73d88d --- /dev/null +++ b/tests/repository_stress_test/repository_stress_test.go @@ -0,0 +1,316 @@ +package repositorystress_test + +import ( + "context" + "fmt" + "io/ioutil" + "log" + "math/rand" + "os" + "path/filepath" + "runtime" + "strings" + "sync" + "testing" + "time" + + "github.com/kopia/repo" + "github.com/kopia/repo/block" + "github.com/kopia/repo/storage" + "github.com/kopia/repo/storage/filesystem" +) + +const masterPassword = "foo-bar-baz-1234" + +type testContext struct { + r *repo.Repository +} + +var ( + knownBlocks []string + knownBlocksMutex sync.Mutex +) + +func TestStressRepository(t *testing.T) { + if testing.Short() { + t.Skip("skipping stress test during short tests") + } + ctx := block.UsingListCache(context.Background(), false) + + tmpPath, err := ioutil.TempDir("", "kopia") + if err != nil { + t.Fatalf("unable to create temp directory") + } + + defer func() { + if !t.Failed() { + os.RemoveAll(tmpPath) + } + }() + + t.Logf("path: %v", tmpPath) + + storagePath := filepath.Join(tmpPath, "storage") + configFile1 := filepath.Join(tmpPath, "kopia1.config") + configFile2 := filepath.Join(tmpPath, "kopia2.config") + + os.MkdirAll(storagePath, 0700) + st, err := filesystem.New(ctx, &filesystem.Options{ + Path: storagePath, + }) + if err != nil { + t.Fatalf("unable to initialize storage: %v", err) + } + + // create repository + if err := repo.Initialize(ctx, st, &repo.NewRepositoryOptions{}, masterPassword); err != nil { + t.Fatalf("unable to initialize repository: %v", err) + } + + // set up two parallel kopia connections, each with its own config file and cache. + if err := repo.Connect(ctx, configFile1, st, masterPassword, repo.ConnectOptions{ + CachingOptions: block.CachingOptions{ + CacheDirectory: filepath.Join(tmpPath, "cache1"), + MaxCacheSizeBytes: 2000000000, + }, + }); err != nil { + t.Fatalf("unable to connect 1: %v", err) + } + + if err := repo.Connect(ctx, configFile2, st, masterPassword, repo.ConnectOptions{ + CachingOptions: block.CachingOptions{ + CacheDirectory: filepath.Join(tmpPath, "cache2"), + MaxCacheSizeBytes: 2000000000, + }, + }); err != nil { + t.Fatalf("unable to connect 2: %v", err) + } + + cancel := make(chan struct{}) + + var wg sync.WaitGroup + wg.Add(1) + go longLivedRepositoryTest(ctx, t, cancel, configFile1, &wg) + wg.Add(1) + go longLivedRepositoryTest(ctx, t, cancel, configFile1, &wg) + wg.Add(1) + go longLivedRepositoryTest(ctx, t, cancel, configFile1, &wg) + wg.Add(1) + go longLivedRepositoryTest(ctx, t, cancel, configFile1, &wg) + wg.Add(1) + go longLivedRepositoryTest(ctx, t, cancel, configFile2, &wg) + wg.Add(1) + go longLivedRepositoryTest(ctx, t, cancel, configFile2, &wg) + wg.Add(1) + go longLivedRepositoryTest(ctx, t, cancel, configFile2, &wg) + wg.Add(1) + go longLivedRepositoryTest(ctx, t, cancel, configFile2, &wg) + + time.Sleep(5 * time.Second) + close(cancel) + + wg.Wait() +} + +func longLivedRepositoryTest(ctx context.Context, t *testing.T, cancel chan struct{}, configFile string, wg *sync.WaitGroup) { + defer wg.Done() + + rep, err := repo.Open(ctx, configFile, masterPassword, &repo.Options{}) + if err != nil { + t.Errorf("error opening repository: %v", err) + return + } + defer rep.Close(ctx) + + var wg2 sync.WaitGroup + + for i := 0; i < 4; i++ { + wg2.Add(1) + go func() { + defer wg2.Done() + + repositoryTest(ctx, t, cancel, rep) + }() + } + + wg2.Wait() +} + +func repositoryTest(ctx context.Context, t *testing.T, cancel chan struct{}, rep *repo.Repository) { + // reopen := func(t *testing.T, r *repo.Repository) error { + // if err := rep.Close(ctx); err != nil { + // return fmt.Errorf("error closing: %v", err) + // } + + // t0 := time.Now() + // rep, err = repo.Open(ctx, configFile, &repo.Options{}) + // log.Printf("reopened in %v", time.Since(t0)) + // return err + // } + + workTypes := []*struct { + name string + fun func(ctx context.Context, t *testing.T, r *repo.Repository) error + weight int + hitCount int + }{ + //{"reopen", reopen, 1, 0}, + {"writeRandomBlock", writeRandomBlock, 100, 0}, + {"writeRandomManifest", writeRandomManifest, 100, 0}, + {"readKnownBlock", readKnownBlock, 500, 0}, + {"listBlocks", listBlocks, 50, 0}, + {"listAndReadAllBlocks", listAndReadAllBlocks, 5, 0}, + {"readRandomManifest", readRandomManifest, 50, 0}, + {"compact", compact, 1, 0}, + {"refresh", refresh, 3, 0}, + {"flush", flush, 1, 0}, + } + + var totalWeight int + for _, w := range workTypes { + totalWeight += w.weight + } + + iter := 0 + for { + select { + case <-cancel: + rep.Close(ctx) + return + default: + } + + if iter%1000 == 0 { + var bits []string + for _, w := range workTypes { + bits = append(bits, fmt.Sprintf("%v:%v", w.name, w.hitCount)) + } + log.Printf("#%v %v %v goroutines", iter, strings.Join(bits, " "), runtime.NumGoroutine()) + } + iter++ + + roulette := rand.Intn(totalWeight) + for _, w := range workTypes { + if roulette < w.weight { + w.hitCount++ + //log.Printf("running %v", w.name) + if err := w.fun(ctx, t, rep); err != nil { + w.hitCount++ + t.Errorf("error: %v", fmt.Errorf("error running %v: %v", w.name, err)) + return + } + break + } + + roulette -= w.weight + } + } + +} + +func writeRandomBlock(ctx context.Context, t *testing.T, r *repo.Repository) error { + data := make([]byte, 1000) + rand.Read(data) + blockID, err := r.Blocks.WriteBlock(ctx, data, "") + if err == nil { + knownBlocksMutex.Lock() + if len(knownBlocks) >= 1000 { + n := rand.Intn(len(knownBlocks)) + knownBlocks[n] = blockID + } else { + knownBlocks = append(knownBlocks, blockID) + } + knownBlocksMutex.Unlock() + } + return err +} + +func readKnownBlock(ctx context.Context, t *testing.T, r *repo.Repository) error { + knownBlocksMutex.Lock() + if len(knownBlocks) == 0 { + knownBlocksMutex.Unlock() + return nil + } + blockID := knownBlocks[rand.Intn(len(knownBlocks))] + knownBlocksMutex.Unlock() + + _, err := r.Blocks.GetBlock(ctx, blockID) + if err == nil || err == storage.ErrBlockNotFound { + return nil + } + + return err +} + +func listBlocks(ctx context.Context, t *testing.T, r *repo.Repository) error { + _, err := r.Blocks.ListBlocks("") + return err +} + +func listAndReadAllBlocks(ctx context.Context, t *testing.T, r *repo.Repository) error { + blocks, err := r.Blocks.ListBlocks("") + if err != nil { + return err + } + + for _, bi := range blocks { + _, err := r.Blocks.GetBlock(ctx, bi) + if err != nil { + if err == storage.ErrBlockNotFound && strings.HasPrefix(bi, "m") { + // this is ok, sometimes manifest manager will perform compaction and 'm' blocks will be marked as deleted + continue + } + return fmt.Errorf("error reading block %v: %v", bi, err) + } + } + + return nil +} + +func compact(ctx context.Context, t *testing.T, r *repo.Repository) error { + return r.Blocks.CompactIndexes(ctx, block.CompactOptions{ + MinSmallBlocks: 1, + MaxSmallBlocks: 1, + }) +} + +func flush(ctx context.Context, t *testing.T, r *repo.Repository) error { + return r.Flush(ctx) +} + +func refresh(ctx context.Context, t *testing.T, r *repo.Repository) error { + return r.Refresh(ctx) +} + +func readRandomManifest(ctx context.Context, t *testing.T, r *repo.Repository) error { + manifests, err := r.Manifests.Find(ctx, nil) + if err != nil { + return err + } + if len(manifests) == 0 { + return nil + } + n := rand.Intn(len(manifests)) + _, err = r.Manifests.GetRaw(ctx, manifests[n].ID) + return err +} + +func writeRandomManifest(ctx context.Context, t *testing.T, r *repo.Repository) error { + key1 := fmt.Sprintf("key-%v", rand.Intn(10)) + key2 := fmt.Sprintf("key-%v", rand.Intn(10)) + val1 := fmt.Sprintf("val1-%v", rand.Intn(10)) + val2 := fmt.Sprintf("val2-%v", rand.Intn(10)) + content1 := fmt.Sprintf("content-%v", rand.Intn(10)) + content2 := fmt.Sprintf("content-%v", rand.Intn(10)) + content1val := fmt.Sprintf("val1-%v", rand.Intn(10)) + content2val := fmt.Sprintf("val2-%v", rand.Intn(10)) + _, err := r.Manifests.Put(ctx, map[string]string{ + "type": key1, + key1: val1, + key2: val2, + }, map[string]string{ + content1: content1val, + content2: content2val, + }) + return err +} diff --git a/tests/repository_test/repository.go b/tests/repository_test/repository.go new file mode 100644 index 000000000..8fb601007 --- /dev/null +++ b/tests/repository_test/repository.go @@ -0,0 +1,3 @@ +package repository + +// dummy package diff --git a/tests/repository_test/repository_test.go b/tests/repository_test/repository_test.go new file mode 100644 index 000000000..514cb6498 --- /dev/null +++ b/tests/repository_test/repository_test.go @@ -0,0 +1,314 @@ +package repository_test + +import ( + "bytes" + "context" + cryptorand "crypto/rand" + "fmt" + "io/ioutil" + "math/rand" + "reflect" + "runtime/debug" + "testing" + + "github.com/kopia/repo" + "github.com/kopia/repo/block" + "github.com/kopia/repo/internal/repotesting" + "github.com/kopia/repo/object" + "github.com/kopia/repo/storage" +) + +func TestWriters(t *testing.T) { + cases := []struct { + data []byte + objectID object.ID + }{ + { + []byte("the quick brown fox jumps over the lazy dog"), + "345acef0bcf82f1daf8e49fab7b7fac7ec296c518501eabea3645b99345a4e08", + }, + {make([]byte, 100), "1d804f1f69df08f3f59070bf962de69433e3d61ac18522a805a84d8c92741340"}, // 100 zero bytes + } + + ctx := context.Background() + + for _, c := range cases { + var env repotesting.Environment + defer env.Setup(t).Close(t) + + writer := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{}) + writer.Write(c.data) + + result, err := writer.Result() + if err != nil { + t.Errorf("error getting writer results for %v, expected: %v", c.data, c.objectID.String()) + continue + } + + env.Repository.Objects.Flush(ctx) + + if !objectIDsEqual(result, c.objectID) { + t.Errorf("incorrect result for %v, expected: %v got: %v", c.data, c.objectID.String(), result.String()) + } + + env.Repository.Blocks.Flush(ctx) + } +} + +func objectIDsEqual(o1 object.ID, o2 object.ID) bool { + return reflect.DeepEqual(o1, o2) +} + +func TestWriterCompleteChunkInTwoWrites(t *testing.T) { + var env repotesting.Environment + defer env.Setup(t).Close(t) + ctx := context.Background() + + bytes := make([]byte, 100) + writer := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{}) + writer.Write(bytes[0:50]) + writer.Write(bytes[0:50]) + result, err := writer.Result() + if result != "1d804f1f69df08f3f59070bf962de69433e3d61ac18522a805a84d8c92741340" { + t.Errorf("unexpected result: %v err: %v", result, err) + } +} + +func TestPackingSimple(t *testing.T) { + var env repotesting.Environment + defer env.Setup(t).Close(t) + + ctx := context.Background() + + content1 := "hello, how do you do?" + content2 := "hi, how are you?" + content3 := "thank you!" + + oid1a := writeObject(ctx, t, env.Repository, []byte(content1), "packed-object-1a") + oid1b := writeObject(ctx, t, env.Repository, []byte(content1), "packed-object-1b") + oid2a := writeObject(ctx, t, env.Repository, []byte(content2), "packed-object-2a") + oid2b := writeObject(ctx, t, env.Repository, []byte(content2), "packed-object-2b") + + oid3a := writeObject(ctx, t, env.Repository, []byte(content3), "packed-object-3a") + oid3b := writeObject(ctx, t, env.Repository, []byte(content3), "packed-object-3b") + verify(ctx, t, env.Repository, oid1a, []byte(content1), "packed-object-1") + verify(ctx, t, env.Repository, oid2a, []byte(content2), "packed-object-2") + oid2c := writeObject(ctx, t, env.Repository, []byte(content2), "packed-object-2c") + oid1c := writeObject(ctx, t, env.Repository, []byte(content1), "packed-object-1c") + + env.Repository.Objects.Flush(ctx) + env.Repository.Blocks.Flush(ctx) + + if got, want := oid1a.String(), oid1b.String(); got != want { + t.Errorf("oid1a(%q) != oid1b(%q)", got, want) + } + if got, want := oid1a.String(), oid1c.String(); got != want { + t.Errorf("oid1a(%q) != oid1c(%q)", got, want) + } + if got, want := oid2a.String(), oid2b.String(); got != want { + t.Errorf("oid2(%q)a != oidb(%q)", got, want) + } + if got, want := oid2a.String(), oid2c.String(); got != want { + t.Errorf("oid2(%q)a != oidc(%q)", got, want) + } + if got, want := oid3a.String(), oid3b.String(); got != want { + t.Errorf("oid3a(%q) != oid3b(%q)", got, want) + } + + env.VerifyStorageBlockCount(t, 3) + + env.MustReopen(t) + + verify(ctx, t, env.Repository, oid1a, []byte(content1), "packed-object-1") + verify(ctx, t, env.Repository, oid2a, []byte(content2), "packed-object-2") + verify(ctx, t, env.Repository, oid3a, []byte(content3), "packed-object-3") + + if err := env.Repository.Blocks.CompactIndexes(ctx, block.CompactOptions{MinSmallBlocks: 1, MaxSmallBlocks: 1}); err != nil { + t.Errorf("optimize error: %v", err) + } + + env.MustReopen(t) + + verify(ctx, t, env.Repository, oid1a, []byte(content1), "packed-object-1") + verify(ctx, t, env.Repository, oid2a, []byte(content2), "packed-object-2") + verify(ctx, t, env.Repository, oid3a, []byte(content3), "packed-object-3") + + if err := env.Repository.Blocks.CompactIndexes(ctx, block.CompactOptions{MinSmallBlocks: 1, MaxSmallBlocks: 1}); err != nil { + t.Errorf("optimize error: %v", err) + } + + env.MustReopen(t) + + verify(ctx, t, env.Repository, oid1a, []byte(content1), "packed-object-1") + verify(ctx, t, env.Repository, oid2a, []byte(content2), "packed-object-2") + verify(ctx, t, env.Repository, oid3a, []byte(content3), "packed-object-3") +} + +func TestHMAC(t *testing.T) { + var env repotesting.Environment + defer env.Setup(t).Close(t) + ctx := context.Background() + + content := bytes.Repeat([]byte{0xcd}, 50) + + w := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{}) + w.Write(content) + result, err := w.Result() + if result.String() != "367352007ee6ca9fa755ce8352347d092c17a24077fd33c62f655574a8cf906d" { + t.Errorf("unexpected result: %v err: %v", result.String(), err) + } +} + +func TestReaderStoredBlockNotFound(t *testing.T) { + var env repotesting.Environment + defer env.Setup(t).Close(t) + ctx := context.Background() + + objectID, err := object.ParseID("Ddeadbeef") + if err != nil { + t.Errorf("cannot parse object ID: %v", err) + } + reader, err := env.Repository.Objects.Open(ctx, objectID) + if err != storage.ErrBlockNotFound || reader != nil { + t.Errorf("unexpected result: reader: %v err: %v", reader, err) + } +} + +func TestEndToEndReadAndSeek(t *testing.T) { + var env repotesting.Environment + defer env.Setup(t).Close(t) + ctx := context.Background() + + for _, size := range []int{1, 199, 200, 201, 9999, 512434} { + // Create some random data sample of the specified size. + randomData := make([]byte, size) + cryptorand.Read(randomData) + + writer := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{}) + writer.Write(randomData) + objectID, err := writer.Result() + writer.Close() + if err != nil { + t.Errorf("cannot get writer result for %v: %v", size, err) + continue + } + + verify(ctx, t, env.Repository, objectID, randomData, fmt.Sprintf("%v %v", objectID, size)) + } +} + +func writeObject(ctx context.Context, t *testing.T, rep *repo.Repository, data []byte, testCaseID string) object.ID { + w := rep.Objects.NewWriter(ctx, object.WriterOptions{}) + if _, err := w.Write(data); err != nil { + t.Fatalf("can't write object %q - write failed: %v", testCaseID, err) + + } + oid, err := w.Result() + if err != nil { + t.Fatalf("can't write object %q - result failed: %v", testCaseID, err) + } + + return oid +} + +func verify(ctx context.Context, t *testing.T, rep *repo.Repository, objectID object.ID, expectedData []byte, testCaseID string) { + t.Helper() + reader, err := rep.Objects.Open(ctx, objectID) + if err != nil { + t.Errorf("cannot get reader for %v (%v): %v %v", testCaseID, objectID, err, string(debug.Stack())) + return + } + + for i := 0; i < 20; i++ { + sampleSize := int(rand.Int31n(300)) + seekOffset := int(rand.Int31n(int32(len(expectedData)))) + if seekOffset+sampleSize > len(expectedData) { + sampleSize = len(expectedData) - seekOffset + } + if sampleSize > 0 { + got := make([]byte, sampleSize) + if offset, err := reader.Seek(int64(seekOffset), 0); err != nil || offset != int64(seekOffset) { + t.Errorf("seek error: %v offset=%v expected:%v", err, offset, seekOffset) + } + if n, err := reader.Read(got); err != nil || n != sampleSize { + t.Errorf("invalid data: n=%v, expected=%v, err:%v", n, sampleSize, err) + } + + expected := expectedData[seekOffset : seekOffset+sampleSize] + + if !bytes.Equal(expected, got) { + t.Errorf("incorrect data read for %v: expected: %x, got: %x", testCaseID, expected, got) + } + } + } +} + +func TestFormats(t *testing.T) { + ctx := context.Background() + makeFormat := func(blockFormat string) func(*repo.NewRepositoryOptions) { + return func(n *repo.NewRepositoryOptions) { + n.BlockFormat.BlockFormat = blockFormat + n.BlockFormat.HMACSecret = []byte("key") + n.ObjectFormat.MaxBlockSize = 10000 + n.ObjectFormat.Splitter = "FIXED" + } + } + + cases := []struct { + format func(*repo.NewRepositoryOptions) + oids map[string]object.ID + }{ + { + format: func(n *repo.NewRepositoryOptions) { + n.ObjectFormat.MaxBlockSize = 10000 + }, + oids: map[string]object.ID{ + "": "b613679a0814d9ec772f95d778c35fc5ff1697c493715653c6c712144292c5ad", + "The quick brown fox jumps over the lazy dog": "fb011e6154a19b9a4c767373c305275a5a69e8b68b0b4c9200c383dced19a416", + }, + }, + { + format: makeFormat("UNENCRYPTED_HMAC_SHA256"), + oids: map[string]object.ID{ + "The quick brown fox jumps over the lazy dog": "f7bc83f430538424b13298e6aa6fb143ef4d59a14946175997479dbc2d1a3cd8", + }, + }, + { + format: makeFormat("UNENCRYPTED_HMAC_SHA256_128"), + oids: map[string]object.ID{ + "The quick brown fox jumps over the lazy dog": "f7bc83f430538424b13298e6aa6fb143", + }, + }, + } + + for caseIndex, c := range cases { + var env repotesting.Environment + defer env.Setup(t, c.format).Close(t) + + for k, v := range c.oids { + bytesToWrite := []byte(k) + w := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{}) + w.Write(bytesToWrite) + oid, err := w.Result() + if err != nil { + t.Errorf("error: %v", err) + } + if !objectIDsEqual(oid, v) { + t.Errorf("invalid oid for #%v\ngot:\n%#v\nexpected:\n%#v", caseIndex, oid.String(), v.String()) + } + + rc, err := env.Repository.Objects.Open(ctx, oid) + if err != nil { + t.Errorf("open failed: %v", err) + continue + } + bytesRead, err := ioutil.ReadAll(rc) + if err != nil { + t.Errorf("error reading: %v", err) + } + if !bytes.Equal(bytesRead, bytesToWrite) { + t.Errorf("data mismatch, read:%x vs written:%v", bytesRead, bytesToWrite) + } + } + } +} diff --git a/tests/stress_test/stress.go b/tests/stress_test/stress.go new file mode 100644 index 000000000..1d01396c9 --- /dev/null +++ b/tests/stress_test/stress.go @@ -0,0 +1,3 @@ +package stress + +// dummy package diff --git a/tests/stress_test/stress_test.go b/tests/stress_test/stress_test.go new file mode 100644 index 000000000..ecb7e9b3f --- /dev/null +++ b/tests/stress_test/stress_test.go @@ -0,0 +1,131 @@ +package stress_test + +import ( + "context" + "fmt" + "math/rand" + "os" + "reflect" + "testing" + "time" + + "github.com/kopia/repo/block" + "github.com/kopia/repo/internal/storagetesting" + "github.com/kopia/repo/storage" +) + +const goroutineCount = 16 + +func TestStressBlockManager(t *testing.T) { + if testing.Short() { + t.Skip("skipping stress test during short tests") + } + + data := map[string][]byte{} + keyTimes := map[string]time.Time{} + memst := storagetesting.NewMapStorage(data, keyTimes, time.Now) + + var duration = 3 * time.Second + if os.Getenv("KOPIA_LONG_STRESS_TEST") != "" { + duration = 3 * time.Minute + } + + stressTestWithStorage(t, memst, duration) +} + +func stressTestWithStorage(t *testing.T, st storage.Storage, duration time.Duration) { + ctx := context.Background() + + openMgr := func() (*block.Manager, error) { + return block.NewManager(ctx, st, block.FormattingOptions{ + Version: 1, + BlockFormat: "ENCRYPTED_HMAC_SHA256_AES256_SIV", + MaxPackSize: 20000000, + MasterKey: []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + }, block.CachingOptions{}) + } + + seed0 := time.Now().Nanosecond() + + t.Logf("running with seed %v", seed0) + + deadline := time.Now().Add(duration) + + t.Run("workers", func(t *testing.T) { + for i := 0; i < goroutineCount; i++ { + i := i + t.Run(fmt.Sprintf("worker-%v", i), func(t *testing.T) { + t.Parallel() + stressWorker(ctx, t, deadline, i, openMgr, int64(seed0+i)) + }) + } + }) +} + +func stressWorker(ctx context.Context, t *testing.T, deadline time.Time, workerID int, openMgr func() (*block.Manager, error), seed int64) { + src := rand.NewSource(seed) + rand := rand.New(src) + + bm, err := openMgr() + if err != nil { + t.Errorf("error opening manager: %v", err) + } + + type writtenBlock struct { + contentID string + data []byte + } + + var workerBlocks []writtenBlock + + for time.Now().Before(deadline) { + l := rand.Intn(30000) + data := make([]byte, l) + if _, err := rand.Read(data); err != nil { + t.Errorf("err: %v", err) + return + } + dataCopy := append([]byte{}, data...) + contentID, err := bm.WriteBlock(ctx, data, "") + if err != nil { + t.Errorf("err: %v", err) + return + } + + switch rand.Intn(20) { + case 0: + if err := bm.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + return + } + case 1: + if err := bm.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + return + } + bm, err = openMgr() + if err != nil { + t.Errorf("error opening: %v", err) + return + } + } + + //log.Printf("wrote %v", contentID) + workerBlocks = append(workerBlocks, writtenBlock{contentID, dataCopy}) + if len(workerBlocks) > 5 { + pos := rand.Intn(len(workerBlocks)) + previous := workerBlocks[pos] + //log.Printf("reading %v", previous.contentID) + d2, err := bm.GetBlock(ctx, previous.contentID) + if err != nil { + t.Errorf("error verifying block %q: %v", previous.contentID, err) + return + } + if !reflect.DeepEqual(previous.data, d2) { + t.Errorf("invalid previous data for %q %x %x", previous.contentID, d2, previous.data) + return + } + workerBlocks = append(workerBlocks[0:pos], workerBlocks[pos+1:]...) + } + } +} From 834279e49787844054ab999737ca4da5cfab1296 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Fri, 26 Oct 2018 17:36:39 -0700 Subject: [PATCH 03/74] Added Go 1.11 module --- go.mod | 22 ++++++++++++++++++ go.sum | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 go.mod create mode 100644 go.sum diff --git a/go.mod b/go.mod new file mode 100644 index 000000000..46141bc3b --- /dev/null +++ b/go.mod @@ -0,0 +1,22 @@ +module github.com/kopia/repo + +require ( + cloud.google.com/go v0.31.0 + github.com/efarrer/iothrottler v0.0.0-20141121142253-60e7e547c7fe + github.com/go-ini/ini v1.39.0 // indirect + github.com/googleapis/gax-go v2.0.0+incompatible // indirect + github.com/minio/minio-go v6.0.9+incompatible + github.com/mitchellh/go-homedir v1.0.0 // indirect + github.com/op/go-logging v0.0.0-20160315200505-970db520ece7 + github.com/silvasur/buzhash v0.0.0-20160816060738-9bdec3dec7c6 + github.com/studio-b12/gowebdav v0.0.0-20181024110551-cba565a9dcfc + go.opencensus.io v0.18.0 // indirect + golang.org/x/crypto v0.0.0-20181025213731-e84da0312774 + golang.org/x/exp v0.0.0-20181022080537-42ba7d4b6eb2 + golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519 // indirect + golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4 + golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5 // indirect + google.golang.org/api v0.0.0-20181026000445-511bab8e55de + google.golang.org/genproto v0.0.0-20181026194446-8b5d7a19e2d9 // indirect + google.golang.org/grpc v1.16.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 000000000..69cc636d7 --- /dev/null +++ b/go.sum @@ -0,0 +1,73 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.31.0 h1:o9K5MWWt2wk+d9jkGn2DAZ7Q9nUdnFLOpK9eIkDwONQ= +cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg= +github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/efarrer/iothrottler v0.0.0-20141121142253-60e7e547c7fe h1:WAx1vRufH0I2pTWldQkXPzpc+jndCOi2FH334LFQ1PI= +github.com/efarrer/iothrottler v0.0.0-20141121142253-60e7e547c7fe/go.mod h1:zjXkUoNEq44qYz/1TlzBhN2W21rDU3HvDBiJWQAZTq8= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-ini/ini v1.39.0 h1:/CyW/jTlZLjuzy52jc1XnhJm6IUKEuunpJFpecywNeI= +github.com/go-ini/ini v1.39.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/googleapis/gax-go v2.0.0+incompatible h1:j0GKcs05QVmm7yesiZq2+9cxHkNK9YM6zKx4D2qucQU= +github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY= +github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/minio/minio-go v6.0.9+incompatible h1:1GBagCy3VtWteFBwjjNyajSf0JJ/iT0hYVlK8xipsds= +github.com/minio/minio-go v6.0.9+incompatible/go.mod h1:7guKYtitv8dktvNUGrhzmNlA5wrAABTQXCoesZdFQO8= +github.com/mitchellh/go-homedir v1.0.0 h1:vKb8ShqSby24Yrqr/yDYkuFz8d0WUjys40rvnGC8aR0= +github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/op/go-logging v0.0.0-20160315200505-970db520ece7 h1:lDH9UUVJtmYCjyT0CI4q8xvlXPxeZ0gYCVvWbmPlp88= +github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= +github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= +github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= +github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/silvasur/buzhash v0.0.0-20160816060738-9bdec3dec7c6 h1:31fhvQj+O9qDqMxUgQDOCQA5RV1iIFMzYPhBUyzg2p0= +github.com/silvasur/buzhash v0.0.0-20160816060738-9bdec3dec7c6/go.mod h1:jk5gVE20+MCoyJ2TFiiMrbWPyaH4t9T5F3HwVdthB2w= +github.com/studio-b12/gowebdav v0.0.0-20181024110551-cba565a9dcfc h1:p1iYuFAxSsQ5JDzBOpBEsqFpjgKRyGrnjQpvvq2AK5A= +github.com/studio-b12/gowebdav v0.0.0-20181024110551-cba565a9dcfc/go.mod h1:gCcfDlA1Y7GqOaeEKw5l9dOGx1VLdc/HuQSlQAaZ30s= +go.opencensus.io v0.18.0 h1:Mk5rgZcggtbvtAun5aJzAtjKKN/t0R3jJPlWILlv938= +go.opencensus.io v0.18.0/go.mod h1:vKdFvxhtzZ9onBp9VKHK8z/sRpBMnKAsufL7wlDrCOA= +golang.org/x/crypto v0.0.0-20181025213731-e84da0312774 h1:a4tQYYYuK9QdeO/+kEvNYyuR21S+7ve5EANok6hABhI= +golang.org/x/crypto v0.0.0-20181025213731-e84da0312774/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/exp v0.0.0-20181022080537-42ba7d4b6eb2 h1:lpkPb6P4ObnPRN3VbEzv/6CUtwaEDtx0cvCg4eWQuBk= +golang.org/x/exp v0.0.0-20181022080537-42ba7d4b6eb2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519 h1:x6rhz8Y9CjbgQkccRGmELH6K+LJj7tOoh3XWeC1yaQM= +golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4 h1:99CA0JJbUX4ozCnLon680Jc9e0T1i8HCaLVJMwtI8Hc= +golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5 h1:x6r4Jo0KNzOOzYd8lbcRsqjuqEASK6ob3auvWYM4/8U= +golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= +google.golang.org/api v0.0.0-20181026000445-511bab8e55de h1:jZyuTBGMXzHm+q0+2tRrBCyXKlKrmXeDQcv7s4HeQLY= +google.golang.org/api v0.0.0-20181026000445-511bab8e55de/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20180831171423-11092d34479b/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20181026194446-8b5d7a19e2d9 h1:26lptpu+T60F849wXfTQMz9ecFf6nTQM0J1JjLSga5U= +google.golang.org/genproto v0.0.0-20181026194446-8b5d7a19e2d9/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= +google.golang.org/grpc v1.16.0 h1:dz5IJGuC2BB7qXR5AyHNwAUBhZscK2xVez7mznh72sY= +google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= From 43c2ccbc900004c77bb5196f0c52a8377efd0995 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Fri, 26 Oct 2018 17:43:33 -0700 Subject: [PATCH 04/74] added travis configuration --- .travis.yml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..200d02dd7 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,6 @@ +language: go +go: +- "1.11" +os: +- linux +script: go test ./... From 5d098cd23a703334f814dd3ff0f19894889f1884 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Fri, 26 Oct 2018 17:53:21 -0700 Subject: [PATCH 05/74] added readme --- README.md | 46 ++++++++++ kopia.svg | 251 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 297 insertions(+) create mode 100644 README.md create mode 100644 kopia.svg diff --git a/README.md b/README.md new file mode 100644 index 000000000..8cee14342 --- /dev/null +++ b/README.md @@ -0,0 +1,46 @@ +Repository +===== + +![Kopia](kopia.svg) + +[![Build Status](https://travis-ci.org/kopia/repo.svg?branch=master)](https://travis-ci.org/kopia/repo) +[![GoDoc](https://godoc.org/github.com/kopia/repo?status.svg)](https://godoc.org/github.com/kopia/repo) + +This library implements Content-Addressable Storage Repository used by [Kopia](https://github.com/kopia/kopia) to +store its snapshots. + +> **NOTICE**: +> +> This library is still in early stages of development and is **not ready for general use**. +> The repository data format is subject to change, including backwards-incompatible changes. Use at your own risk. + +Licensing +--- +Kopia is licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE) for the full license text. + +Disclaimer +--- + +Kopia is a personal project and is not affiliated with, supported or endorsed by Google. + +Cryptography Notice +--- + + This distribution includes cryptographic software. The country in + which you currently reside may have restrictions on the import, + possession, use, and/or re-export to another country, of encryption + software. BEFORE using any encryption software, please check your + country's laws, regulations and policies concerning the import, + possession, or use, and re-export of encryption software, to see if + this is permitted. See for more + information. + + The U.S. Government Department of Commerce, Bureau of Industry and + Security (BIS), has classified this software as Export Commodity + Control Number (ECCN) 5D002.C.1, which includes information security + software using or performing cryptographic functions with symmetric + algorithms. The form and manner of this distribution makes it + eligible for export under the License Exception ENC Technology + Software Unrestricted (TSU) exception (see the BIS Export + Administration Regulations, Section 740.13) for both object code and + source code. diff --git a/kopia.svg b/kopia.svg new file mode 100644 index 000000000..3c75710c8 --- /dev/null +++ b/kopia.svg @@ -0,0 +1,251 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + From bae3112ce314f9f5699c0943590d0404597d24d9 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Fri, 26 Oct 2018 17:59:42 -0700 Subject: [PATCH 06/74] added coveralls.io integration --- .travis.yml | 5 ++++- go.mod | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 200d02dd7..2d11bf44a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,4 +3,7 @@ go: - "1.11" os: - linux -script: go test ./... +before_install: + - go get github.com/mattn/goveralls +script: + - $GOPATH/bin/goveralls -service=travis-ci diff --git a/go.mod b/go.mod index 46141bc3b..9ef8b73ac 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( go.opencensus.io v0.18.0 // indirect golang.org/x/crypto v0.0.0-20181025213731-e84da0312774 golang.org/x/exp v0.0.0-20181022080537-42ba7d4b6eb2 - golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519 // indirect + golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519 golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4 golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5 // indirect google.golang.org/api v0.0.0-20181026000445-511bab8e55de From 8697a08a9d568a60c0c003becde4608db0841bf4 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Fri, 26 Oct 2018 18:04:52 -0700 Subject: [PATCH 07/74] build logging --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 2d11bf44a..48e089688 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,4 +6,4 @@ os: before_install: - go get github.com/mattn/goveralls script: - - $GOPATH/bin/goveralls -service=travis-ci + - $GOPATH/bin/goveralls -v -package ./... -service=travis-ci From 7d8f4a469564179aa88b49084c8cdcb386777191 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Fri, 26 Oct 2018 20:35:39 -0700 Subject: [PATCH 08/74] repo: fixed default cache dir --- connect.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/connect.go b/connect.go index 948218975..16859e181 100644 --- a/connect.go +++ b/connect.go @@ -75,7 +75,7 @@ func setupCaching(configPath string, lc *LocalConfig, opt block.CachingOptions, h := sha256.New() h.Write(uniqueID) h.Write([]byte(configPath)) - lc.Caching.CacheDirectory = filepath.Join(cacheDir, hex.EncodeToString(h.Sum(nil))[0:16]) + lc.Caching.CacheDirectory = filepath.Join(cacheDir, "kopia", hex.EncodeToString(h.Sum(nil))[0:16]) } else { absCacheDir, err := filepath.Abs(opt.CacheDirectory) if err != nil { From 4f4554ab1ac5fa76f750e1f64190f39e9d5d30e4 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Fri, 26 Oct 2018 21:10:40 -0700 Subject: [PATCH 09/74] coverage badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 8cee14342..61d59c303 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ Repository [![Build Status](https://travis-ci.org/kopia/repo.svg?branch=master)](https://travis-ci.org/kopia/repo) [![GoDoc](https://godoc.org/github.com/kopia/repo?status.svg)](https://godoc.org/github.com/kopia/repo) +[![Coverage Status](https://coveralls.io/repos/github/kopia/repo/badge.svg?branch=master)](https://coveralls.io/github/kopia/repo?branch=master) This library implements Content-Addressable Storage Repository used by [Kopia](https://github.com/kopia/kopia) to store its snapshots. From 9975ae9c26a1c53a99f3c61a7589f2739801f95d Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 07:38:12 -0700 Subject: [PATCH 10/74] travis: coverage over all tests --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 48e089688..ccc3dad07 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,4 +6,5 @@ os: before_install: - go get github.com/mattn/goveralls script: - - $GOPATH/bin/goveralls -v -package ./... -service=travis-ci + - go test -count=1 -coverprofile=tmp.cov --coverpkg ./... -timeout 90s ./... + - $GOPATH/bin/goveralls -service=travis-ci -coverprofile=tmp.cov From 7237ded760ae9b6c0d9c55abbc5c94e700aca642 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 08:47:27 -0700 Subject: [PATCH 11/74] added linting to travis + Makefile --- .gometalinter.json | 7 +++++++ .travis.yml | 7 ++----- Makefile | 23 +++++++++++++++++++++++ 3 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 .gometalinter.json create mode 100644 Makefile diff --git a/.gometalinter.json b/.gometalinter.json new file mode 100644 index 000000000..5c27fe988 --- /dev/null +++ b/.gometalinter.json @@ -0,0 +1,7 @@ +{ + "Disable": ["maligned","gas","gosec"], + "Exclude": [ + ".+_test\\.go" + ], + "Deadline": "120s" +} \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index ccc3dad07..e5f710075 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,8 +3,5 @@ go: - "1.11" os: - linux -before_install: - - go get github.com/mattn/goveralls -script: - - go test -count=1 -coverprofile=tmp.cov --coverpkg ./... -timeout 90s ./... - - $GOPATH/bin/goveralls -service=travis-ci -coverprofile=tmp.cov +before_install: make setup +script: make travis diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..881a11866 --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ +all: test lint + +travis: test lint upload-coverage + +setup: + go get github.com/mattn/goveralls + go get -u gopkg.in/alecthomas/gometalinter.v2 + gometalinter.v2 --install + +lint: + gometalinter.v2 ./... + +test: + go test -count=1 -coverprofile=tmp.cov --coverpkg ./... -timeout 90s ./... + +upload-coverage: + goveralls -service=travis-ci -coverprofile=tmp.cov + +coverage-html: + go tool cover -html=tmp.cov + +godoc: + godoc -http=:33333 From 0c67743168d365ff8b1760341daabc0471c75092 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 08:47:36 -0700 Subject: [PATCH 12/74] storagetesting: new test cases --- internal/storagetesting/asserts.go | 42 ++++++++++++++++-------------- internal/storagetesting/verify.go | 19 ++++++++++++++ 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/internal/storagetesting/asserts.go b/internal/storagetesting/asserts.go index 03e797c01..e7432a8c0 100644 --- a/internal/storagetesting/asserts.go +++ b/internal/storagetesting/asserts.go @@ -3,10 +3,8 @@ import ( "bytes" "context" - "fmt" - "path/filepath" "reflect" - "runtime" + "sort" "testing" "github.com/kopia/repo/storage" @@ -14,14 +12,16 @@ // AssertGetBlock asserts that the specified storage block has correct content. func AssertGetBlock(ctx context.Context, t *testing.T, s storage.Storage, block string, expected []byte) { + t.Helper() + b, err := s.GetBlock(ctx, block, 0, -1) if err != nil { - t.Errorf(errorPrefix()+"GetBlock(%v) returned error %v, expected data: %v", block, err, expected) + t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected) return } if !bytes.Equal(b, expected) { - t.Errorf(errorPrefix()+"GetBlock(%v) returned %x, but expected %x", block, b, expected) + t.Errorf("GetBlock(%v) returned %x, but expected %x", block, b, expected) } half := int64(len(expected) / 2) @@ -31,36 +31,39 @@ func AssertGetBlock(ctx context.Context, t *testing.T, s storage.Storage, block b, err = s.GetBlock(ctx, block, 0, half) if err != nil { - t.Errorf(errorPrefix()+"GetBlock(%v) returned error %v, expected data: %v", block, err, expected) + t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected) return } if !bytes.Equal(b, expected[0:half]) { - t.Errorf(errorPrefix()+"GetBlock(%v) returned %x, but expected %x", block, b, expected[0:half]) + t.Errorf("GetBlock(%v) returned %x, but expected %x", block, b, expected[0:half]) } b, err = s.GetBlock(ctx, block, half, int64(len(expected))-half) if err != nil { - t.Errorf(errorPrefix()+"GetBlock(%v) returned error %v, expected data: %v", block, err, expected) + t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected) return } if !bytes.Equal(b, expected[len(expected)-int(half):]) { - t.Errorf(errorPrefix()+"GetBlock(%v) returned %x, but expected %x", block, b, expected[len(expected)-int(half):]) + t.Errorf("GetBlock(%v) returned %x, but expected %x", block, b, expected[len(expected)-int(half):]) } } // AssertGetBlockNotFound asserts that GetBlock() for specified storage block returns ErrBlockNotFound. func AssertGetBlockNotFound(ctx context.Context, t *testing.T, s storage.Storage, block string) { + t.Helper() + b, err := s.GetBlock(ctx, block, 0, -1) if err != storage.ErrBlockNotFound || b != nil { - t.Errorf(errorPrefix()+"GetBlock(%v) returned %v, %v but expected ErrBlockNotFound", block, b, err) + t.Errorf("GetBlock(%v) returned %v, %v but expected ErrBlockNotFound", block, b, err) } } // AssertListResults asserts that the list results with given prefix return the specified list of names in order. -func AssertListResults(ctx context.Context, t *testing.T, s storage.Storage, prefix string, expected ...string) { +func AssertListResults(ctx context.Context, t *testing.T, s storage.Storage, prefix string, want ...string) { + t.Helper() var names []string if err := s.ListBlocks(ctx, prefix, func(e storage.BlockMetadata) error { @@ -70,15 +73,16 @@ func AssertListResults(ctx context.Context, t *testing.T, s storage.Storage, pre t.Fatalf("err: %v", err) } - if !reflect.DeepEqual(names, expected) { - t.Errorf(errorPrefix()+"ListBlocks(%v) returned %v, but expected %v", prefix, names, expected) + names = sorted(names) + want = sorted(want) + + if !reflect.DeepEqual(names, want) { + t.Errorf("ListBlocks(%v) returned %v, but wanted %v", prefix, names, want) } } -func errorPrefix() string { - if _, fn, line, ok := runtime.Caller(2); ok { - return fmt.Sprintf("called from %v:%v: ", filepath.Base(fn), line) - } - - return "" +func sorted(s []string) []string { + x := append([]string(nil), s...) + sort.Strings(x) + return x } diff --git a/internal/storagetesting/verify.go b/internal/storagetesting/verify.go index 7640c18c0..07eab1fed 100644 --- a/internal/storagetesting/verify.go +++ b/internal/storagetesting/verify.go @@ -19,6 +19,7 @@ func VerifyStorage(ctx context.Context, t *testing.T, r storage.Storage) { {blk: string("zxce0e35630770c54668a8cfb4e414c6bf8f"), contents: []byte{1}}, {blk: string("abff4585856ebf0748fd989e1dd623a8963d"), contents: bytes.Repeat([]byte{1}, 1000)}, {blk: string("abgc3dca496d510f492c858a2df1eb824e62"), contents: bytes.Repeat([]byte{1}, 10000)}, + {blk: string("kopia.repository"), contents: bytes.Repeat([]byte{2}, 100)}, } // First verify that blocks don't exist. @@ -35,7 +36,25 @@ func VerifyStorage(ctx context.Context, t *testing.T, r storage.Storage) { AssertGetBlock(ctx, t, r, b.blk, b.contents) } + AssertListResults(ctx, t, r, "", blocks[0].blk, blocks[1].blk, blocks[2].blk, blocks[3].blk, blocks[4].blk) AssertListResults(ctx, t, r, "ab", blocks[0].blk, blocks[2].blk, blocks[3].blk) + + // Overwrite blocks. + for _, b := range blocks { + if err := r.PutBlock(ctx, b.blk, b.contents); err != nil { + t.Errorf("can't put block: %v", err) + } + + AssertGetBlock(ctx, t, r, b.blk, b.contents) + } + if err := r.DeleteBlock(ctx, blocks[0].blk); err != nil { + t.Errorf("unable to delete block: %v", err) + } + if err := r.DeleteBlock(ctx, blocks[0].blk); err != nil { + t.Errorf("invalid error when deleting deleted block: %v", err) + } + AssertListResults(ctx, t, r, "ab", blocks[2].blk, blocks[3].blk) + AssertListResults(ctx, t, r, "", blocks[1].blk, blocks[2].blk, blocks[3].blk, blocks[4].blk) } // AssertConnectionInfoRoundTrips verifies that the ConnectionInfo returned by a given storage can be used to create From c41aea11c5a63f60c24c820f31760804f7519f1f Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 09:00:44 -0700 Subject: [PATCH 13/74] travis: force GO111MODULE=on --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 881a11866..89dc0f605 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ lint: gometalinter.v2 ./... test: - go test -count=1 -coverprofile=tmp.cov --coverpkg ./... -timeout 90s ./... + GO111MODULE=on go test -count=1 -coverprofile=tmp.cov --coverpkg ./... -timeout 90s ./... upload-coverage: goveralls -service=travis-ci -coverprofile=tmp.cov From 506cea606f21c05d92c4a52903d65a019822029b Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 09:07:03 -0700 Subject: [PATCH 14/74] travis: disable go modules when installing tools --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 89dc0f605..7c9111bcb 100644 --- a/Makefile +++ b/Makefile @@ -3,9 +3,9 @@ all: test lint travis: test lint upload-coverage setup: - go get github.com/mattn/goveralls - go get -u gopkg.in/alecthomas/gometalinter.v2 - gometalinter.v2 --install + GO111MODULE=off go get github.com/mattn/goveralls + GO111MODULE=off go get -u gopkg.in/alecthomas/gometalinter.v2 + GO111MODULE=off gometalinter.v2 --install lint: gometalinter.v2 ./... From 34a1975142cd7defafdf16076d500a3e27a35d46 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 09:18:49 -0700 Subject: [PATCH 15/74] lint: force GO111MODULE=on when running gometalinter --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7c9111bcb..b41978ef0 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ setup: GO111MODULE=off gometalinter.v2 --install lint: - gometalinter.v2 ./... + GO111MODULE=on gometalinter.v2 ./... test: GO111MODULE=on go test -count=1 -coverprofile=tmp.cov --coverpkg ./... -timeout 90s ./... From e331902f1e6b52d425d976a6a3e4569d1cf2598a Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 09:30:54 -0700 Subject: [PATCH 16/74] travis: disable lint --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b41978ef0..9b22eee17 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ all: test lint -travis: test lint upload-coverage +travis: test upload-coverage setup: GO111MODULE=off go get github.com/mattn/goveralls @@ -8,7 +8,7 @@ setup: GO111MODULE=off gometalinter.v2 --install lint: - GO111MODULE=on gometalinter.v2 ./... + gometalinter.v2 ./... test: GO111MODULE=on go test -count=1 -coverprofile=tmp.cov --coverpkg ./... -timeout 90s ./... From 4b4f9240dfa3e9b132700a9a091c9ae98836d815 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 11:38:51 -0700 Subject: [PATCH 17/74] storage: additional storage tests, added GCS test suite --- block/block_cache_test.go | 3 +- internal/storagetesting/asserts.go | 16 ++++++++++ internal/storagetesting/map.go | 2 +- internal/storagetesting/verify.go | 1 + storage/filesystem/filesystem_storage.go | 9 +++++- storage/gcs/gcs_storage.go | 27 +++++++++++++++-- storage/gcs/gcs_storage_test.go | 37 ++++++++++++++++++++++++ storage/s3/s3_storage.go | 20 ++++++++----- storage/webdav/webdav_storage.go | 2 +- 9 files changed, 104 insertions(+), 13 deletions(-) create mode 100644 storage/gcs/gcs_storage_test.go diff --git a/block/block_cache_test.go b/block/block_cache_test.go index dc2860654..9479a4de4 100644 --- a/block/block_cache_test.go +++ b/block/block_cache_test.go @@ -53,6 +53,7 @@ func TestDiskBlockCache(t *testing.T) { } verifyBlockCache(t, cache) } + func verifyBlockCache(t *testing.T, cache *blockCache) { ctx := context.Background() defer cache.close() @@ -73,7 +74,7 @@ func verifyBlockCache(t *testing.T, cache *blockCache) { {"xf0f0f2", "block-1", 0, -1, []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil}, {"xf0f0f3", "no-such-block", 0, -1, nil, storage.ErrBlockNotFound}, {"xf0f0f4", "no-such-block", 10, 5, nil, storage.ErrBlockNotFound}, - {"f0f0f5", "block-1", 7, 10, []byte{8, 9, 10}, nil}, + {"f0f0f5", "block-1", 7, 3, []byte{8, 9, 10}, nil}, {"xf0f0f6", "block-1", 11, 10, nil, fmt.Errorf("invalid offset")}, {"xf0f0f6", "block-1", -1, 5, nil, fmt.Errorf("invalid offset")}, } diff --git a/internal/storagetesting/asserts.go b/internal/storagetesting/asserts.go index e7432a8c0..beb50294e 100644 --- a/internal/storagetesting/asserts.go +++ b/internal/storagetesting/asserts.go @@ -29,6 +29,12 @@ func AssertGetBlock(ctx context.Context, t *testing.T, s storage.Storage, block return } + b, err = s.GetBlock(ctx, block, 0, 0) + if err != nil { + t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected) + return + } + b, err = s.GetBlock(ctx, block, 0, half) if err != nil { t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected) @@ -49,6 +55,16 @@ func AssertGetBlock(ctx context.Context, t *testing.T, s storage.Storage, block t.Errorf("GetBlock(%v) returned %x, but expected %x", block, b, expected[len(expected)-int(half):]) } + AssertInvalidOffsetLength(ctx, t, s, block, -3, 1) + AssertInvalidOffsetLength(ctx, t, s, block, int64(len(expected)), 3) + AssertInvalidOffsetLength(ctx, t, s, block, int64(len(expected)-1), 3) + AssertInvalidOffsetLength(ctx, t, s, block, int64(len(expected)+1), 3) +} + +func AssertInvalidOffsetLength(ctx context.Context, t *testing.T, s storage.Storage, block string, offset, length int64) { + if _, err := s.GetBlock(ctx, block, offset, length); err == nil { + t.Errorf("GetBlock(%v,%v,%v) did not return error for invalid offset/length", block, offset, length) + } } // AssertGetBlockNotFound asserts that GetBlock() for specified storage block returns ErrBlockNotFound. diff --git a/internal/storagetesting/map.go b/internal/storagetesting/map.go index 438e1dc77..5e303e53e 100644 --- a/internal/storagetesting/map.go +++ b/internal/storagetesting/map.go @@ -35,7 +35,7 @@ func (s *mapStorage) GetBlock(ctx context.Context, id string, offset, length int data = data[offset:] if int(length) > len(data) { - return data, nil + return nil, errors.New("invalid length") } return data[0:length], nil } diff --git a/internal/storagetesting/verify.go b/internal/storagetesting/verify.go index 07eab1fed..303e54aac 100644 --- a/internal/storagetesting/verify.go +++ b/internal/storagetesting/verify.go @@ -47,6 +47,7 @@ func VerifyStorage(ctx context.Context, t *testing.T, r storage.Storage) { AssertGetBlock(ctx, t, r, b.blk, b.contents) } + if err := r.DeleteBlock(ctx, blocks[0].blk); err != nil { t.Errorf("unable to delete block: %v", err) } diff --git a/storage/filesystem/filesystem_storage.go b/storage/filesystem/filesystem_storage.go index a65dd4ca8..a3a6db4c4 100644 --- a/storage/filesystem/filesystem_storage.go +++ b/storage/filesystem/filesystem_storage.go @@ -53,7 +53,14 @@ func (fs *fsStorage) GetBlock(ctx context.Context, blockID string, offset, lengt if _, err := f.Seek(offset, io.SeekStart); err != nil { return nil, err } - return ioutil.ReadAll(io.LimitReader(f, length)) + b, err := ioutil.ReadAll(io.LimitReader(f, length)) + if err != nil { + return nil, err + } + if int64(len(b)) != length { + return nil, fmt.Errorf("invalid length") + } + return b, nil } func getstringFromFileName(name string) (string, bool) { diff --git a/storage/gcs/gcs_storage.go b/storage/gcs/gcs_storage.go index eca7b5ff2..fe0f233f7 100644 --- a/storage/gcs/gcs_storage.go +++ b/storage/gcs/gcs_storage.go @@ -9,6 +9,8 @@ "io" "io/ioutil" + "google.golang.org/api/googleapi" + "github.com/efarrer/iothrottler" "github.com/kopia/repo/internal/retry" "github.com/kopia/repo/internal/throttle" @@ -37,6 +39,10 @@ type gcsStorage struct { } func (gcs *gcsStorage) GetBlock(ctx context.Context, b string, offset, length int64) ([]byte, error) { + if offset < 0 { + return nil, fmt.Errorf("invalid offset") + } + attempt := func() (interface{}, error) { reader, err := gcs.bucket.Object(gcs.getObjectNameString(b)).NewRangeReader(gcs.ctx, offset, length) if err != nil { @@ -52,7 +58,12 @@ func (gcs *gcsStorage) GetBlock(ctx context.Context, b string, offset, length in return nil, translateError(err) } - return v.([]byte), nil + fetched := v.([]byte) + if len(fetched) != int(length) && length >= 0 { + return nil, fmt.Errorf("invalid offset/length") + } + + return fetched, nil } func exponentialBackoff(desc string, att retry.AttemptFunc) (interface{}, error) { @@ -60,6 +71,13 @@ func exponentialBackoff(desc string, att retry.AttemptFunc) (interface{}, error) } func isRetriableError(err error) bool { + if apiError, ok := err.(*googleapi.Error); ok { + if apiError.Code >= 500 { + return true + } + return false + } + switch err { case nil: return false @@ -124,7 +142,12 @@ func (gcs *gcsStorage) DeleteBlock(ctx context.Context, b string) error { } _, err := exponentialBackoff(fmt.Sprintf("DeleteBlock(%q)", b), attempt) - return translateError(err) + err = translateError(err) + if err == storage.ErrBlockNotFound { + return nil + } + + return err } func (gcs *gcsStorage) getObjectNameString(blockID string) string { diff --git a/storage/gcs/gcs_storage_test.go b/storage/gcs/gcs_storage_test.go new file mode 100644 index 000000000..634c7477a --- /dev/null +++ b/storage/gcs/gcs_storage_test.go @@ -0,0 +1,37 @@ +package gcs_test + +import ( + "context" + "os" + "testing" + + "github.com/kopia/repo/internal/storagetesting" + + "github.com/kopia/repo/storage" + "github.com/kopia/repo/storage/gcs" +) + +func TestGCSStorage(t *testing.T) { + bucket := os.Getenv("KOPIA_GCS_TEST_BUCKET") + if bucket == "" { + t.Skip("KOPIA_GCS_TEST_BUCKET not provided") + } + + ctx := context.Background() + st, err := gcs.New(ctx, &gcs.Options{ + BucketName: bucket, + ServiceAccountCredentials: os.Getenv("KOPIA_GCS_CREDENTIALS_FILE"), + }) + + if err != nil { + t.Fatalf("unable to connect to GCS") + } + + if err := st.ListBlocks(ctx, "", func(bm storage.BlockMetadata) error { + return st.DeleteBlock(ctx, bm.BlockID) + }); err != nil { + t.Fatalf("unable to clear GCS bucket: %v", err) + } + + storagetesting.VerifyStorage(ctx, t, st) +} diff --git a/storage/s3/s3_storage.go b/storage/s3/s3_storage.go index 5f17b71b3..504d1211e 100644 --- a/storage/s3/s3_storage.go +++ b/storage/s3/s3_storage.go @@ -50,7 +50,16 @@ func (s *s3Storage) GetBlock(ctx context.Context, b string, offset, length int64 return nil, err } - return ioutil.ReadAll(throttled) + b, err := ioutil.ReadAll(throttled) + if err != nil { + return nil, err + } + + if len(b) != int(length) && length >= 0 { + return nil, fmt.Errorf("invalid length, got %v bytes, but expected %v", len(b), length) + } + + return b, nil } v, err := exponentialBackoff(fmt.Sprintf("GetBlock(%q,%v,%v)", b, offset, length), attempt) @@ -71,12 +80,7 @@ func isRetriableError(err error) bool { return me.StatusCode >= 500 } - switch err { - case nil: - return false - default: - return true - } + return false } func translateError(err error) error { @@ -89,6 +93,8 @@ func translateError(err error) error { } } + return err + switch err { case nil: return nil diff --git a/storage/webdav/webdav_storage.go b/storage/webdav/webdav_storage.go index 0792eaa15..21fd6ce94 100644 --- a/storage/webdav/webdav_storage.go +++ b/storage/webdav/webdav_storage.go @@ -51,7 +51,7 @@ func (d *davStorage) GetBlock(ctx context.Context, blockID string, offset, lengt data = data[offset:] if int(length) > len(data) { - return data, nil + return nil, errors.New("invalid length") } return data[0:length], nil From 7cc5f37e6196b481dac65f0cc805c1209057a816 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 11:45:46 -0700 Subject: [PATCH 18/74] storage/gcs: added GCS test encrypted service account --- .gitignore | 2 ++ .travis.yml | 7 +++++-- test_service_account.json.enc | Bin 0 -> 2320 bytes 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 test_service_account.json.enc diff --git a/.gitignore b/.gitignore index 28bb98eeb..3ff849040 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ # Output of the go coverage tool, specifically when used with LiteIDE *.out *.cov + +*service_account.json diff --git a/.travis.yml b/.travis.yml index e5f710075..8420c14e3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,10 @@ language: go go: -- "1.11" +- '1.11' os: - linux -before_install: make setup +before_install: +- openssl aes-256-cbc -K $encrypted_0098ef8519ef_key -iv $encrypted_0098ef8519ef_iv + -in test_service_account.json.enc -out storage/gcs/test_service_account.json -d +- make setup script: make travis diff --git a/test_service_account.json.enc b/test_service_account.json.enc new file mode 100644 index 0000000000000000000000000000000000000000..20cb866b6c6af7fb0930b665c2b371e89331111b GIT binary patch literal 2320 zcmV+r3Gepa++djcLI^zCTyltV-l9I$PUf6VVRd77kt#V`{w>;UtqDFyOmw{pftD?> z9CX%a1<;I=1d449j?1XIVDttaO)Qb8isk(6ynxpAzZj#WG`*QRVL;_-!`tp59wp-y zEh8wT0f`Vh)MHRiR*||ZN_+X*2p-P94`WNY|OZ|{T z9go=E3SE*VCal?<#@>y9s874nxHhGmo@jr%qdLb1x={Ncsg(r!kZGjzqUpf;+i0=k zk#(I81m{b6Gn+(;MLlVKPHyb6s$tSI_Bu@_{(=Ty&1IL_4mAVt!|$4dvp=YGpPLem zpEu~gXVt+Gcf^RYe72NcAwycw$T{O8uTsGglBc0u!m;3Gs^c24SiUgRdw+W5GBkeh zAnoL@Fw&zEj*;S7V&dQt`SVjkXz~zH)RQ1y{!(~9$z2#Kx{X}r1-KET*Uwy;KUBbfl$m)2WoGUd6foAI ztR!9btX+G5g9ducSD2zSbJ?2F1|UvYsh^rNMc|nJWaPdUBJQK|x~oK{iM;J6HWkmQ zG(!eiCnRv4-}FBrx$3ElpLl`1cAjhd17S+_^S|t3!Fn<-7azni5LdH`1;DCpS&eM1S+| zA(@c(F1dDZl*yU?rlq`PSV`gR&w>M9c#%QGa;!f_;u%GXW!A{kV;L;1r)xmQVq0&j9o_yeqMF z3^_XREQFP3kWOKswU^L>1(=(|d^!@1Y~oE%n68gimlDQ=x|TCA;(kUJq&xH=hCv^T zzqM4{`u{CHq*gT=nuJsNV;hi_bu(jRUA|FQogb%&Z|WZcI=#3*!Gvdrhw$Ix*@(@txqMY1b7arw7} zPn)jB>@PzWcZ4AKp4!L)`Y{uk6xX~zHbZ0Djyl4wkHqFMDHwPjuxJs@YsTVkMo5ajuvnQN?k~ zRRRE6$RaaDWd*<8AUa|`M7iG^tzd$Q3RTS{ZX(1iGirRHvM&NZ$6;O~z47bQc<$CK z`tJMC!}^W|$+is(>eOgl6@{&!W&+necgxJP4ZFtlC6ut@H{monxoL4HV!+tsZRFtn zS@S5CDqE5HI{`Y&8*xPh>*qo54!coC#Fhs@jIq(8ZNPYfEMViVW^P$4-u}hkl-T)0 z3iRRnzCr}>V@}__&S*rxt3`U_hhc#ydCwb4*581d(vNlKynu=Dgvngus4USJ9oKuaCCdP@||NieGHz)}}iUiHEm*| zKw5r6NL@tS-*!&PU-1QD$^(`t8uUTih+dC5I0A>ZFwTao)vuCf-K;#HIW8@ zMY|5Ny5K(p)9DWiCxIgolrPcvWskb*`ddK3aSQFi4>IQrm#-AY(-m#s(3Sk;d_!3{ z_gZ4uJBkUF`QB0+w0|il)&>s}^JeLUlaFln>#xisJ_oF zSx5J&8kZ(fVm?V@o^VHF6udgR9(zthFssVx1X4{g2(F}a)VAEyP}J$ASTNNIs@F$J zkEvi@gOC6H_5W4xFJw||QBP2V*F><7)f_SxtIG)_26A4oE|IBtOj+& z=eIB$o@=QTub6r8VHkmNR@$-n9oV}O)nYiR^D>ma@UT+5Y1>HU!I|j{plKr{kfSlS z$*lDnFGgg|cBex`>?P9;xspL$3O1zM3;4s95`%ymFxsF^Q2r>>(r_B7>Y}uaX-vjn zTcABuRh@8|EC|82div+UzG+O&^nA$Tc?YcHy2Ni;7Yyk1Ta;S4G)f9?47cYeLT_Q zLF&Ht(ETK8YK;mNWNDh(GegFtjWAQ3{qxP5i@qlsB8ZUCJ|z2KG)!BDS*H0cv@6w# q&wvNk%M{t9Z7&!80EnNgvt)nYQ|F>Y8Qc4Cg?Xv+%UISE?mSS^hl+s! literal 0 HcmV?d00001 From 5dbb96023a355470fe0e44ab42967a75ba63d159 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 11:48:53 -0700 Subject: [PATCH 19/74] travis: removed lint setup --- .travis.yml | 2 +- Makefile | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8420c14e3..1e1b96740 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,5 +6,5 @@ os: before_install: - openssl aes-256-cbc -K $encrypted_0098ef8519ef_key -iv $encrypted_0098ef8519ef_iv -in test_service_account.json.enc -out storage/gcs/test_service_account.json -d -- make setup +- make travis-setup script: make travis diff --git a/Makefile b/Makefile index 9b22eee17..abcad9090 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,9 @@ setup: GO111MODULE=off go get -u gopkg.in/alecthomas/gometalinter.v2 GO111MODULE=off gometalinter.v2 --install +travis-setup: + GO111MODULE=off go get github.com/mattn/goveralls + lint: gometalinter.v2 ./... From 306fa0fa8d36bfc5743211435c0d56046918f6d4 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 11:56:26 -0700 Subject: [PATCH 20/74] storage/s3: fixed zero-length range --- storage/s3/s3_storage.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/storage/s3/s3_storage.go b/storage/s3/s3_storage.go index 504d1211e..7779cd79a 100644 --- a/storage/s3/s3_storage.go +++ b/storage/s3/s3_storage.go @@ -55,10 +55,14 @@ func (s *s3Storage) GetBlock(ctx context.Context, b string, offset, length int64 return nil, err } - if len(b) != int(length) && length >= 0 { + if len(b) != int(length) && length > 0 { return nil, fmt.Errorf("invalid length, got %v bytes, but expected %v", len(b), length) } + if length == 0 { + return []byte{}, nil + } + return b, nil } From 0eded3877609e9408fa2b127d660e433572f64bb Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 11:56:46 -0700 Subject: [PATCH 21/74] storage/gcs: additional logging for gcs test --- storage/gcs/gcs_storage_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/gcs/gcs_storage_test.go b/storage/gcs/gcs_storage_test.go index 634c7477a..a7a60df4e 100644 --- a/storage/gcs/gcs_storage_test.go +++ b/storage/gcs/gcs_storage_test.go @@ -24,7 +24,7 @@ func TestGCSStorage(t *testing.T) { }) if err != nil { - t.Fatalf("unable to connect to GCS") + t.Fatalf("unable to connect to GCS: %v", err) } if err := st.ListBlocks(ctx, "", func(bm storage.BlockMetadata) error { From 4237bbc42ac9e5de4ab68cdb06b8a2257190da93 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 12:07:54 -0700 Subject: [PATCH 22/74] storagetesting: verify progress callbacks --- internal/storagetesting/verify.go | 6 +++++- storage/gcs/gcs_storage_test.go | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/storagetesting/verify.go b/internal/storagetesting/verify.go index 303e54aac..a8a3486af 100644 --- a/internal/storagetesting/verify.go +++ b/internal/storagetesting/verify.go @@ -27,9 +27,13 @@ func VerifyStorage(ctx context.Context, t *testing.T, r storage.Storage) { AssertGetBlockNotFound(ctx, t, r, b.blk) } + ctx2 := storage.WithUploadProgressCallback(ctx, func(desc string, completed, total int64) { + log.Infof("progress %v: %v/%v", desc, completed, total) + }) + // Now add blocks. for _, b := range blocks { - if err := r.PutBlock(ctx, b.blk, b.contents); err != nil { + if err := r.PutBlock(ctx2, b.blk, b.contents); err != nil { t.Errorf("can't put block: %v", err) } diff --git a/storage/gcs/gcs_storage_test.go b/storage/gcs/gcs_storage_test.go index a7a60df4e..2859bde4d 100644 --- a/storage/gcs/gcs_storage_test.go +++ b/storage/gcs/gcs_storage_test.go @@ -34,4 +34,6 @@ func TestGCSStorage(t *testing.T) { } storagetesting.VerifyStorage(ctx, t, st) + storagetesting.AssertConnectionInfoRoundTrips(ctx, t, st) + } From 7196891d175605fa18d0f3070c3546f202f6a734 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 12:09:30 -0700 Subject: [PATCH 23/74] storagetesting: added Close() --- internal/storagetesting/verify.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/storagetesting/verify.go b/internal/storagetesting/verify.go index a8a3486af..3e384a001 100644 --- a/internal/storagetesting/verify.go +++ b/internal/storagetesting/verify.go @@ -64,7 +64,7 @@ func VerifyStorage(ctx context.Context, t *testing.T, r storage.Storage) { // AssertConnectionInfoRoundTrips verifies that the ConnectionInfo returned by a given storage can be used to create // equivalent storage -func AssertConnectionInfoRoundTrips(ctx context.Context, t *testing.T, s storage.Storage) storage.Storage { +func AssertConnectionInfoRoundTrips(ctx context.Context, t *testing.T, s storage.Storage) { t.Helper() ci := s.ConnectionInfo() @@ -78,5 +78,7 @@ func AssertConnectionInfoRoundTrips(ctx context.Context, t *testing.T, s storage t.Errorf("connection info does not round-trip: %v vs %v", ci, ci2) } - return s2 + if err := s2.Close(ctx); err != nil { + t.Errorf("unable to close storage: %v", err) + } } From 8dee493bf3abe2ea18dbae8f4273405709a6636c Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 12:14:13 -0700 Subject: [PATCH 24/74] storagetesting: added test case for zero length --- internal/storagetesting/asserts.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/storagetesting/asserts.go b/internal/storagetesting/asserts.go index beb50294e..a6a61e498 100644 --- a/internal/storagetesting/asserts.go +++ b/internal/storagetesting/asserts.go @@ -35,6 +35,11 @@ func AssertGetBlock(ctx context.Context, t *testing.T, s storage.Storage, block return } + if len(b) != 0 { + t.Errorf("GetBlock(%v) returned non-zero length: %v", block, len(b)) + return + } + b, err = s.GetBlock(ctx, block, 0, half) if err != nil { t.Errorf("GetBlock(%v) returned error %v, expected data: %v", block, err, expected) @@ -61,6 +66,7 @@ func AssertGetBlock(ctx context.Context, t *testing.T, s storage.Storage, block AssertInvalidOffsetLength(ctx, t, s, block, int64(len(expected)+1), 3) } +// AssertInvalidOffsetLength verifies that the given combination of (offset,length) fails on GetBlock() func AssertInvalidOffsetLength(ctx context.Context, t *testing.T, s storage.Storage, block string, offset, length int64) { if _, err := s.GetBlock(ctx, block, offset, length); err == nil { t.Errorf("GetBlock(%v,%v,%v) did not return error for invalid offset/length", block, offset, length) From c494a94ca4a2bea22c6ba881a6c2f26e2b272188 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 27 Oct 2018 12:24:49 -0700 Subject: [PATCH 25/74] storage: test cleanup --- storage/gcs/gcs_storage_test.go | 6 ++++++ storage/s3/s3_storage.go | 7 ------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/storage/gcs/gcs_storage_test.go b/storage/gcs/gcs_storage_test.go index 2859bde4d..03e8ae724 100644 --- a/storage/gcs/gcs_storage_test.go +++ b/storage/gcs/gcs_storage_test.go @@ -36,4 +36,10 @@ func TestGCSStorage(t *testing.T) { storagetesting.VerifyStorage(ctx, t, st) storagetesting.AssertConnectionInfoRoundTrips(ctx, t, st) + // delete everything again + if err := st.ListBlocks(ctx, "", func(bm storage.BlockMetadata) error { + return st.DeleteBlock(ctx, bm.BlockID) + }); err != nil { + t.Fatalf("unable to clear GCS bucket: %v", err) + } } diff --git a/storage/s3/s3_storage.go b/storage/s3/s3_storage.go index 7779cd79a..ae78a8788 100644 --- a/storage/s3/s3_storage.go +++ b/storage/s3/s3_storage.go @@ -98,13 +98,6 @@ func translateError(err error) error { } return err - - switch err { - case nil: - return nil - default: - return fmt.Errorf("unexpected S3 error: %v", err) - } } func (s *s3Storage) PutBlock(ctx context.Context, b string, data []byte) error { From 4eea68077c447c47a9aaaee6758fb060af4b63ff Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sun, 28 Oct 2018 20:26:13 -0700 Subject: [PATCH 26/74] example: moved example from kopia/kopia/examples to this repository --- examples/upload_download/main.go | 38 +++++++++++ examples/upload_download/setup_repository.go | 54 +++++++++++++++ .../upload_download_objects.go | 65 +++++++++++++++++++ 3 files changed, 157 insertions(+) create mode 100644 examples/upload_download/main.go create mode 100644 examples/upload_download/setup_repository.go create mode 100644 examples/upload_download/upload_download_objects.go diff --git a/examples/upload_download/main.go b/examples/upload_download/main.go new file mode 100644 index 000000000..500def9c9 --- /dev/null +++ b/examples/upload_download/main.go @@ -0,0 +1,38 @@ +// Command repository_api demonstrates the use of Kopia's Repository API. +package main + +import ( + "context" + "log" + "os" + + "github.com/kopia/repo" +) + +func main() { + ctx := context.Background() + + if err := setupRepositoryAndConnect(ctx, masterPassword); err != nil { + log.Printf("unable to set up repository: %v", err) + os.Exit(1) + } + + r, err := repo.Open(ctx, configFile, masterPassword, nil) + if err != nil { + log.Printf("unable to open repository: %v", err) + os.Exit(1) + } + defer r.Close(ctx) //nolint:errcheck + + uploadAndDownloadObjects(ctx, r) + + // Now list blocks found in the repository. + blks, err := r.Blocks.ListBlocks("") + if err != nil { + log.Printf("err: %v", err) + } + + for _, b := range blks { + log.Printf("found block %v", b) + } +} diff --git a/examples/upload_download/setup_repository.go b/examples/upload_download/setup_repository.go new file mode 100644 index 000000000..4bd928de1 --- /dev/null +++ b/examples/upload_download/setup_repository.go @@ -0,0 +1,54 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/kopia/repo" + "github.com/kopia/repo/block" + "github.com/kopia/repo/storage/filesystem" + "github.com/kopia/repo/storage/logging" +) + +const ( + masterPassword = "my-password$!@#!@" + storageDir = "/tmp/kopia-example/storage" + configFile = "/tmp/kopia-example/config" + cacheDirectory = "/tmp/kopia-example/cache" +) + +func setupRepositoryAndConnect(ctx context.Context, password string) error { + if err := os.MkdirAll(storageDir, 0700); err != nil { + return fmt.Errorf("unable to create directory: %v", err) + } + st, err := filesystem.New(ctx, &filesystem.Options{ + Path: storageDir, + }) + if err != nil { + return fmt.Errorf("unable to connect to storage: %v", err) + } + + // set up logging so we can see what's going on + st = logging.NewWrapper(st) + + // see if we already have the config file, if not connect. + if _, err := os.Stat(configFile); os.IsNotExist(err) { + // initialize repository + if err := repo.Initialize(ctx, st, &repo.NewRepositoryOptions{}, password); err != nil { + return fmt.Errorf("unable to initialize repository: %v", err) + } + + // now establish connection to repository and create configuration file. + if err := repo.Connect(ctx, configFile, st, password, repo.ConnectOptions{ + CachingOptions: block.CachingOptions{ + CacheDirectory: cacheDirectory, + MaxCacheSizeBytes: 100000000, + }, + }); err != nil { + return fmt.Errorf("unable to connect to repository: %v", err) + } + } + + return nil +} diff --git a/examples/upload_download/upload_download_objects.go b/examples/upload_download/upload_download_objects.go new file mode 100644 index 000000000..53e37cd7e --- /dev/null +++ b/examples/upload_download/upload_download_objects.go @@ -0,0 +1,65 @@ +package main + +import ( + "context" + "crypto/rand" + "io/ioutil" + "log" + "os" + + "github.com/kopia/repo" + "github.com/kopia/repo/object" +) + +func uploadRandomObject(ctx context.Context, r *repo.Repository, length int) (object.ID, error) { + w := r.Objects.NewWriter(ctx, object.WriterOptions{}) + defer w.Close() //nolint:errcheck + + buf := make([]byte, 256*1024) + for length > 0 { + todo := length + if todo > len(buf) { + todo = len(buf) + } + rand.Read(buf[0:todo]) //nolint:errcheck + if _, err := w.Write(buf[0:todo]); err != nil { + return "", err + } + length -= todo + } + return w.Result() +} + +func downloadObject(ctx context.Context, r *repo.Repository, oid object.ID) ([]byte, error) { + rd, err := r.Objects.Open(ctx, oid) + if err != nil { + return nil, err + } + defer rd.Close() //nolint:errcheck + + return ioutil.ReadAll(rd) +} + +func uploadAndDownloadObjects(ctx context.Context, r *repo.Repository) { + var oids []object.ID + + for size := 100; size < 100000000; size *= 2 { + log.Printf("uploading file with %v bytes", size) + oid, err := uploadRandomObject(ctx, r, size) + if err != nil { + log.Printf("unable to upload: %v", err) + os.Exit(1) + } + log.Printf("uploaded %v bytes as %v", size, oid) + oids = append(oids, oid) + } + + for _, oid := range oids { + log.Printf("downloading %q", oid) + b, err := downloadObject(ctx, r, oid) + if err != nil { + log.Printf("unable to read object: %v", err) + } + log.Printf("downloaded %v", len(b)) + } +} From ee0189596189f58e053f63561692895569433ce6 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 29 Oct 2018 19:27:00 -0700 Subject: [PATCH 27/74] examples: excluded from code coverage --- Makefile | 2 +- examples/upload_download/main.go | 2 ++ examples/upload_download/setup_repository.go | 2 ++ examples/upload_download/upload_download_objects.go | 2 ++ 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index abcad9090..64a419a63 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ lint: gometalinter.v2 ./... test: - GO111MODULE=on go test -count=1 -coverprofile=tmp.cov --coverpkg ./... -timeout 90s ./... + GO111MODULE=on go test -tags test -count=1 -coverprofile=tmp.cov --coverpkg ./... -timeout 90s ./... upload-coverage: goveralls -service=travis-ci -coverprofile=tmp.cov diff --git a/examples/upload_download/main.go b/examples/upload_download/main.go index 500def9c9..2dedc41bf 100644 --- a/examples/upload_download/main.go +++ b/examples/upload_download/main.go @@ -1,3 +1,5 @@ +//+build !test + // Command repository_api demonstrates the use of Kopia's Repository API. package main diff --git a/examples/upload_download/setup_repository.go b/examples/upload_download/setup_repository.go index 4bd928de1..8655074db 100644 --- a/examples/upload_download/setup_repository.go +++ b/examples/upload_download/setup_repository.go @@ -1,3 +1,5 @@ +//+build !test + package main import ( diff --git a/examples/upload_download/upload_download_objects.go b/examples/upload_download/upload_download_objects.go index 53e37cd7e..bfddf52a6 100644 --- a/examples/upload_download/upload_download_objects.go +++ b/examples/upload_download/upload_download_objects.go @@ -1,3 +1,5 @@ +//+build !test + package main import ( From 12286ced5793165b6ab265cc53ebf00580442382 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 29 Oct 2018 19:43:57 -0700 Subject: [PATCH 28/74] storage/gcs: tests for non-existent buckets --- storage/gcs/gcs_storage.go | 4 ---- storage/gcs/gcs_storage_test.go | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/storage/gcs/gcs_storage.go b/storage/gcs/gcs_storage.go index fe0f233f7..05a731135 100644 --- a/storage/gcs/gcs_storage.go +++ b/storage/gcs/gcs_storage.go @@ -190,10 +190,6 @@ func (gcs *gcsStorage) Close(ctx context.Context) error { return nil } -func (gcs *gcsStorage) String() string { - return fmt.Sprintf("gcs://%v/%v", gcs.BucketName, gcs.Prefix) -} - func toBandwidth(bytesPerSecond int) iothrottler.Bandwidth { if bytesPerSecond <= 0 { return iothrottler.Unlimited diff --git a/storage/gcs/gcs_storage_test.go b/storage/gcs/gcs_storage_test.go index 03e8ae724..edc55fb3b 100644 --- a/storage/gcs/gcs_storage_test.go +++ b/storage/gcs/gcs_storage_test.go @@ -43,3 +43,25 @@ func TestGCSStorage(t *testing.T) { t.Fatalf("unable to clear GCS bucket: %v", err) } } + +func TestGCSStorageInvalid(t *testing.T) { + bucket := os.Getenv("KOPIA_GCS_TEST_BUCKET") + if bucket == "" { + t.Skip("KOPIA_GCS_TEST_BUCKET not provided") + } + + ctx := context.Background() + st, err := gcs.New(ctx, &gcs.Options{ + BucketName: bucket + "-no-such-bucket", + ServiceAccountCredentials: os.Getenv("KOPIA_GCS_CREDENTIALS_FILE"), + }) + + if err != nil { + t.Fatalf("unable to connect to GCS: %v", err) + } + + defer st.Close(ctx) + if err := st.PutBlock(ctx, "xxx", []byte{1, 2, 3}); err == nil { + t.Errorf("unexpecte success when adding to non-existent bucket") + } +} From 849817c2bf51d92af449648c0d7eff19aa1f1758 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Tue, 30 Oct 2018 19:00:36 -0700 Subject: [PATCH 29/74] manifest: additional tests --- manifest/manifest_manager.go | 17 +++++++--- manifest/manifest_manager_test.go | 52 +++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/manifest/manifest_manager.go b/manifest/manifest_manager.go index 9dcd0f11e..3809e1665 100644 --- a/manifest/manifest_manager.go +++ b/manifest/manifest_manager.go @@ -14,7 +14,6 @@ "sync" "time" - "github.com/kopia/repo/block" "github.com/kopia/repo/internal/repologging" "github.com/kopia/repo/storage" ) @@ -27,10 +26,20 @@ const manifestBlockPrefix = "m" const autoCompactionBlockCount = 16 +type blockManager interface { + GetBlock(ctx context.Context, blockID string) ([]byte, error) + WriteBlock(ctx context.Context, data []byte, prefix string) (string, error) + DeleteBlock(blockID string) error + ListBlocks(prefix string) ([]string, error) + DisableIndexFlush() + EnableIndexFlush() + Flush(ctx context.Context) error +} + // Manager organizes JSON manifests of various kinds, including snapshot manifests type Manager struct { mu sync.Mutex - b *block.Manager + b blockManager initialized bool pendingEntries map[string]*manifestEntry @@ -58,7 +67,7 @@ func (m *Manager) Put(ctx context.Context, labels map[string]string, payload int b, err := json.Marshal(payload) if err != nil { - return "", err + return "", fmt.Errorf("marshal error: %v", err) } e := &manifestEntry{ @@ -502,7 +511,7 @@ func copyLabels(m map[string]string) map[string]string { } // NewManager returns new manifest manager for the provided block manager. -func NewManager(ctx context.Context, b *block.Manager) (*Manager, error) { +func NewManager(ctx context.Context, b blockManager) (*Manager, error) { m := &Manager{ b: b, pendingEntries: map[string]*manifestEntry{}, diff --git a/manifest/manifest_manager_test.go b/manifest/manifest_manager_test.go index 2e6b3bd70..16b6c9f0f 100644 --- a/manifest/manifest_manager_test.go +++ b/manifest/manifest_manager_test.go @@ -5,6 +5,7 @@ "fmt" "reflect" "sort" + "strings" "testing" "time" @@ -146,6 +147,15 @@ func verifyItem(ctx context.Context, t *testing.T, mgr *Manager, id string, labe if !reflect.DeepEqual(l.Labels, labels) { t.Errorf("invalid labels retrieved %v, wanted %v", l.Labels, labels) } + + var d2 map[string]int + if err := mgr.Get(ctx, id, &d2); err != nil { + t.Errorf("Get failed: %v", err) + } + + if !reflect.DeepEqual(d2, data) { + t.Errorf("invalid data retrieved %v, wanted %v", d2, data) + } } func verifyItemNotFound(ctx context.Context, t *testing.T, mgr *Manager, id string) { @@ -191,3 +201,45 @@ func newManagerForTesting(ctx context.Context, t *testing.T, data map[string][]b return NewManager(ctx, bm) } + +func TestManifestInvalidPut(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + mgr, setupErr := newManagerForTesting(ctx, t, data) + if setupErr != nil { + t.Fatalf("unable to open block manager: %v", setupErr) + } + + cases := []struct { + labels map[string]string + payload interface{} + expectedError string + }{ + {map[string]string{"": ""}, "xxx", "'type' label is required"}, + {map[string]string{"type": "blah"}, complex128(1), "marshal error"}, + } + + for i, tc := range cases { + _, err := mgr.Put(ctx, tc.labels, tc.payload) + if err == nil || !strings.Contains(err.Error(), tc.expectedError) { + t.Errorf("invalid error when putting case %v: %v, expected %v", i, err, tc.expectedError) + } + } +} + +func TestManifestAutoCompaction(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + + for i := 0; i < 100; i++ { + mgr, setupErr := newManagerForTesting(ctx, t, data) + if setupErr != nil { + t.Fatalf("unable to open block manager: %v", setupErr) + } + + item1 := map[string]int{"foo": 1, "bar": 2} + labels1 := map[string]string{"type": "item", "color": "red"} + addAndVerify(ctx, t, mgr, labels1, item1) + mgr.Flush(ctx) + } +} From 82653d3736fdec9560b0b4f555ea29a477c8cd8e Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Wed, 31 Oct 2018 21:14:19 -0700 Subject: [PATCH 30/74] packindex: merged package into /block, removed unnecessary visibility and dead code --- block/block_index_recovery.go | 9 ++- block/block_manager.go | 18 +++--- block/block_manager_compaction.go | 8 +-- block/block_manager_test.go | 5 +- {internal/packindex => block}/builder.go | 17 ++---- block/committed_block_index.go | 17 +++--- block/committed_block_index_disk_cache.go | 5 +- block/committed_block_index_mem_cache.go | 8 +-- .../content_id_to_bytes.go | 2 +- {internal/packindex => block}/format.go | 2 +- {internal/packindex => block}/index.go | 10 ++-- {internal/packindex => block}/info.go | 2 +- {internal/packindex => block}/merged.go | 16 ++--- {internal/packindex => block}/merged_test.go | 36 ++++++----- .../packindex_internal_test.go | 2 +- .../packindex => block}/packindex_test.go | 28 ++++----- internal/packindex/subset.go | 28 --------- internal/packindex/subset_test.go | 60 ------------------- 18 files changed, 82 insertions(+), 191 deletions(-) rename {internal/packindex => block}/builder.go (92%) rename {internal/packindex => block}/content_id_to_bytes.go (96%) rename {internal/packindex => block}/format.go (99%) rename {internal/packindex => block}/index.go (94%) rename {internal/packindex => block}/info.go (97%) rename {internal/packindex => block}/merged.go (84%) rename {internal/packindex => block}/merged_test.go (53%) rename {internal/packindex => block}/packindex_internal_test.go (95%) rename {internal/packindex => block}/packindex_test.go (90%) delete mode 100644 internal/packindex/subset.go delete mode 100644 internal/packindex/subset_test.go diff --git a/block/block_index_recovery.go b/block/block_index_recovery.go index d9bd6f8fd..275a78655 100644 --- a/block/block_index_recovery.go +++ b/block/block_index_recovery.go @@ -7,8 +7,6 @@ "fmt" "hash/crc32" "reflect" - - "github.com/kopia/repo/internal/packindex" ) // RecoverIndexFromPackFile attempts to recover index block entries from a given pack file. @@ -19,10 +17,11 @@ func (bm *Manager) RecoverIndexFromPackFile(ctx context.Context, packFile string return nil, err } - ndx, err := packindex.Open(bytes.NewReader(localIndexBytes)) + ndx, err := openPackIndex(bytes.NewReader(localIndexBytes)) if err != nil { return nil, fmt.Errorf("unable to open index in file %v", packFile) } + defer ndx.Close() var recovered []Info @@ -147,7 +146,7 @@ func decodePostamble(payload []byte) *packBlockPostamble { } } -func (bm *Manager) buildLocalIndex(pending packindex.Builder) ([]byte, error) { +func (bm *Manager) buildLocalIndex(pending packIndexBuilder) ([]byte, error) { var buf bytes.Buffer if err := pending.Build(&buf); err != nil { return nil, fmt.Errorf("unable to build local index: %v", err) @@ -157,7 +156,7 @@ func (bm *Manager) buildLocalIndex(pending packindex.Builder) ([]byte, error) { } // appendPackFileIndexRecoveryData appends data designed to help with recovery of pack index in case it gets damaged or lost. -func (bm *Manager) appendPackFileIndexRecoveryData(blockData []byte, pending packindex.Builder) ([]byte, error) { +func (bm *Manager) appendPackFileIndexRecoveryData(blockData []byte, pending packIndexBuilder) ([]byte, error) { // build, encrypt and append local index localIndexOffset := len(blockData) localIndex, err := bm.buildLocalIndex(pending) diff --git a/block/block_manager.go b/block/block_manager.go index ca1aaa6e7..1ef3f04f4 100644 --- a/block/block_manager.go +++ b/block/block_manager.go @@ -18,7 +18,6 @@ "sync/atomic" "time" - "github.com/kopia/repo/internal/packindex" "github.com/kopia/repo/internal/repologging" "github.com/kopia/repo/storage" ) @@ -44,9 +43,6 @@ indexLoadAttempts = 10 ) -// Info is an information about a single block managed by Manager. -type Info = packindex.Info - // IndexInfo is an information about a single index block managed by Manager. type IndexInfo struct { FileName string @@ -67,9 +63,9 @@ type Manager struct { locked bool checkInvariantsOnUnlock bool - currentPackItems map[string]Info // blocks that are in the pack block currently being built (all inline) - currentPackDataLength int // total length of all items in the current pack block - packIndexBuilder packindex.Builder // blocks that are in index currently being built (current pack and all packs saved but not committed) + currentPackItems map[string]Info // blocks that are in the pack block currently being built (all inline) + currentPackDataLength int // total length of all items in the current pack block + packIndexBuilder packIndexBuilder // blocks that are in index currently being built (current pack and all packs saved but not committed) committedBlocks *committedBlockIndex disableIndexFlushCount int @@ -300,7 +296,7 @@ func (bm *Manager) flushPackIndexesLocked(ctx context.Context) error { if err := bm.committedBlocks.addBlock(indexBlockID, dataCopy, true); err != nil { return fmt.Errorf("unable to add committed block: %v", err) } - bm.packIndexBuilder = packindex.NewBuilder() + bm.packIndexBuilder = make(packIndexBuilder) } bm.flushPackIndexesAfter = bm.timeNow().Add(flushPackIndexTimeout) @@ -352,7 +348,7 @@ func (bm *Manager) writePackBlockLocked(ctx context.Context) error { return nil } -func (bm *Manager) preparePackDataBlock(packFile string) ([]byte, packindex.Builder, error) { +func (bm *Manager) preparePackDataBlock(packFile string) ([]byte, packIndexBuilder, error) { formatLog.Debugf("preparing block data with %v items", len(bm.currentPackItems)) blockData, err := appendRandomBytes(nil, rand.Intn(bm.maxPreambleLength-bm.minPreambleLength+1)+bm.minPreambleLength) @@ -360,7 +356,7 @@ func (bm *Manager) preparePackDataBlock(packFile string) ([]byte, packindex.Buil return nil, nil, fmt.Errorf("unable to prepare block preamble: %v", err) } - packFileIndex := packindex.Builder{} + packFileIndex := packIndexBuilder{} for blockID, info := range bm.currentPackItems { if info.Payload == nil { continue @@ -985,7 +981,7 @@ func newManagerWithOptions(ctx context.Context, st storage.Storage, f Formatting maxPackSize: f.MaxPackSize, formatter: formatter, currentPackItems: make(map[string]Info), - packIndexBuilder: packindex.NewBuilder(), + packIndexBuilder: make(packIndexBuilder), committedBlocks: blockIndex, minPreambleLength: defaultMinPreambleLength, maxPreambleLength: defaultMaxPreambleLength, diff --git a/block/block_manager_compaction.go b/block/block_manager_compaction.go index b6306d8b9..01638b708 100644 --- a/block/block_manager_compaction.go +++ b/block/block_manager_compaction.go @@ -5,8 +5,6 @@ "context" "fmt" "time" - - "github.com/kopia/repo/internal/packindex" ) var autoCompactionOptions = CompactOptions{ @@ -91,7 +89,7 @@ func (bm *Manager) compactAndDeleteIndexBlocks(ctx context.Context, indexBlocks formatLog.Debugf("compacting %v blocks", len(indexBlocks)) t0 := time.Now() - bld := packindex.NewBuilder() + bld := make(packIndexBuilder) for _, indexBlock := range indexBlocks { if err := bm.addIndexBlocksToBuilder(ctx, bld, indexBlock, opt); err != nil { return err @@ -124,13 +122,13 @@ func (bm *Manager) compactAndDeleteIndexBlocks(ctx context.Context, indexBlocks return nil } -func (bm *Manager) addIndexBlocksToBuilder(ctx context.Context, bld packindex.Builder, indexBlock IndexInfo, opt CompactOptions) error { +func (bm *Manager) addIndexBlocksToBuilder(ctx context.Context, bld packIndexBuilder, indexBlock IndexInfo, opt CompactOptions) error { data, err := bm.getPhysicalBlockInternal(ctx, indexBlock.FileName) if err != nil { return err } - index, err := packindex.Open(bytes.NewReader(data)) + index, err := openPackIndex(bytes.NewReader(data)) if err != nil { return fmt.Errorf("unable to open index block %q: %v", indexBlock, err) } diff --git a/block/block_manager_test.go b/block/block_manager_test.go index cce7b9e51..1c71714d2 100644 --- a/block/block_manager_test.go +++ b/block/block_manager_test.go @@ -15,7 +15,6 @@ "testing" "time" - "github.com/kopia/repo/internal/packindex" "github.com/kopia/repo/internal/storagetesting" "github.com/kopia/repo/storage" logging "github.com/op/go-logging" @@ -803,10 +802,10 @@ func dumpBlockManagerData(t *testing.T, data map[string][]byte) { t.Helper() for k, v := range data { if k[0] == 'n' { - ndx, err := packindex.Open(bytes.NewReader(v)) + ndx, err := openPackIndex(bytes.NewReader(v)) if err == nil { t.Logf("index %v (%v bytes)", k, len(v)) - ndx.Iterate("", func(i packindex.Info) error { + ndx.Iterate("", func(i Info) error { t.Logf(" %+v\n", i) return nil }) diff --git a/internal/packindex/builder.go b/block/builder.go similarity index 92% rename from internal/packindex/builder.go rename to block/builder.go index bc71c51f2..276d8e7c5 100644 --- a/internal/packindex/builder.go +++ b/block/builder.go @@ -1,4 +1,4 @@ -package packindex +package block import ( "bufio" @@ -8,18 +8,18 @@ "sort" ) -// Builder prepares and writes block index for writing. -type Builder map[string]*Info +// packIndexBuilder prepares and writes block index for writing. +type packIndexBuilder map[string]*Info // Add adds a new entry to the builder or conditionally replaces it if the timestamp is greater. -func (b Builder) Add(i Info) { +func (b packIndexBuilder) Add(i Info) { old, ok := b[i.BlockID] if !ok || i.TimestampSeconds >= old.TimestampSeconds { b[i.BlockID] = &i } } -func (b Builder) sortedBlocks() []*Info { +func (b packIndexBuilder) sortedBlocks() []*Info { var allBlocks []*Info for _, v := range b { @@ -42,7 +42,7 @@ type indexLayout struct { } // Build writes the pack index to the provided output. -func (b Builder) Build(output io.Writer) error { +func (b packIndexBuilder) Build(output io.Writer) error { allBlocks := b.sortedBlocks() layout := &indexLayout{ packFileOffsets: map[string]uint32{}, @@ -145,8 +145,3 @@ func formatEntry(entry []byte, it *Info, layout *indexLayout) error { binary.BigEndian.PutUint64(entryTimestampAndFlags, timestampAndFlags) return nil } - -// NewBuilder creates a new Builder. -func NewBuilder() Builder { - return make(map[string]*Info) -} diff --git a/block/committed_block_index.go b/block/committed_block_index.go index ddc6dac86..2a4a81efc 100644 --- a/block/committed_block_index.go +++ b/block/committed_block_index.go @@ -5,7 +5,6 @@ "path/filepath" "sync" - "github.com/kopia/repo/internal/packindex" "github.com/kopia/repo/storage" ) @@ -13,14 +12,14 @@ type committedBlockIndex struct { cache committedBlockIndexCache mu sync.Mutex - inUse map[string]packindex.Index - merged packindex.Merged + inUse map[string]packIndex + merged mergedIndex } type committedBlockIndexCache interface { hasIndexBlockID(indexBlockID string) (bool, error) addBlockToCache(indexBlockID string, data []byte) error - openIndex(indexBlockID string) (packindex.Index, error) + openIndex(indexBlockID string) (packIndex, error) expireUnused(used []string) error } @@ -65,7 +64,7 @@ func (b *committedBlockIndex) addBlock(indexBlockID string, data []byte, use boo func (b *committedBlockIndex) listBlocks(prefix string, cb func(i Info) error) error { b.mu.Lock() - m := append(packindex.Merged(nil), b.merged...) + m := append(mergedIndex(nil), b.merged...) b.mu.Unlock() return m.Iterate(prefix, cb) @@ -94,8 +93,8 @@ func (b *committedBlockIndex) use(packFiles []string) (bool, error) { } log.Debugf("set of index files has changed (had %v, now %v)", len(b.inUse), len(packFiles)) - var newMerged packindex.Merged - newInUse := map[string]packindex.Index{} + var newMerged mergedIndex + newInUse := map[string]packIndex{} defer func() { newMerged.Close() //nolint:errcheck }() @@ -128,12 +127,12 @@ func newCommittedBlockIndex(caching CachingOptions) (*committedBlockIndex, error cache = &diskCommittedBlockIndexCache{dirname} } else { cache = &memoryCommittedBlockIndexCache{ - blocks: map[string]packindex.Index{}, + blocks: map[string]packIndex{}, } } return &committedBlockIndex{ cache: cache, - inUse: map[string]packindex.Index{}, + inUse: map[string]packIndex{}, }, nil } diff --git a/block/committed_block_index_disk_cache.go b/block/committed_block_index_disk_cache.go index 909a666bf..9e0a1f4c9 100644 --- a/block/committed_block_index_disk_cache.go +++ b/block/committed_block_index_disk_cache.go @@ -8,7 +8,6 @@ "strings" "time" - "github.com/kopia/repo/internal/packindex" "golang.org/x/exp/mmap" ) @@ -25,7 +24,7 @@ func (c *diskCommittedBlockIndexCache) indexBlockPath(indexBlockID string) strin return filepath.Join(c.dirname, indexBlockID+simpleIndexSuffix) } -func (c *diskCommittedBlockIndexCache) openIndex(indexBlockID string) (packindex.Index, error) { +func (c *diskCommittedBlockIndexCache) openIndex(indexBlockID string) (packIndex, error) { fullpath := c.indexBlockPath(indexBlockID) f, err := mmap.Open(fullpath) @@ -33,7 +32,7 @@ func (c *diskCommittedBlockIndexCache) openIndex(indexBlockID string) (packindex return nil, err } - return packindex.Open(f) + return openPackIndex(f) } func (c *diskCommittedBlockIndexCache) hasIndexBlockID(indexBlockID string) (bool, error) { diff --git a/block/committed_block_index_mem_cache.go b/block/committed_block_index_mem_cache.go index b68d59a5c..03fe7817f 100644 --- a/block/committed_block_index_mem_cache.go +++ b/block/committed_block_index_mem_cache.go @@ -4,13 +4,11 @@ "bytes" "fmt" "sync" - - "github.com/kopia/repo/internal/packindex" ) type memoryCommittedBlockIndexCache struct { mu sync.Mutex - blocks map[string]packindex.Index + blocks map[string]packIndex } func (m *memoryCommittedBlockIndexCache) hasIndexBlockID(indexBlockID string) (bool, error) { @@ -24,7 +22,7 @@ func (m *memoryCommittedBlockIndexCache) addBlockToCache(indexBlockID string, da m.mu.Lock() defer m.mu.Unlock() - ndx, err := packindex.Open(bytes.NewReader(data)) + ndx, err := openPackIndex(bytes.NewReader(data)) if err != nil { return err } @@ -33,7 +31,7 @@ func (m *memoryCommittedBlockIndexCache) addBlockToCache(indexBlockID string, da return nil } -func (m *memoryCommittedBlockIndexCache) openIndex(indexBlockID string) (packindex.Index, error) { +func (m *memoryCommittedBlockIndexCache) openIndex(indexBlockID string) (packIndex, error) { m.mu.Lock() defer m.mu.Unlock() diff --git a/internal/packindex/content_id_to_bytes.go b/block/content_id_to_bytes.go similarity index 96% rename from internal/packindex/content_id_to_bytes.go rename to block/content_id_to_bytes.go index 731f3b57d..136219d06 100644 --- a/internal/packindex/content_id_to_bytes.go +++ b/block/content_id_to_bytes.go @@ -1,4 +1,4 @@ -package packindex +package block import ( "encoding/hex" diff --git a/internal/packindex/format.go b/block/format.go similarity index 99% rename from internal/packindex/format.go rename to block/format.go index 5509556c9..5fc65271a 100644 --- a/internal/packindex/format.go +++ b/block/format.go @@ -1,4 +1,4 @@ -package packindex +package block import ( "encoding/binary" diff --git a/internal/packindex/index.go b/block/index.go similarity index 94% rename from internal/packindex/index.go rename to block/index.go index 1ea4c2cb2..a4f4a4c1d 100644 --- a/internal/packindex/index.go +++ b/block/index.go @@ -1,4 +1,4 @@ -package packindex +package block import ( "bytes" @@ -9,8 +9,8 @@ "strings" ) -// Index is a read-only index of packed blocks. -type Index interface { +// packIndex is a read-only index of packed blocks. +type packIndex interface { io.Closer GetInfo(blockID string) (*Info, error) @@ -186,8 +186,8 @@ func (b *index) Close() error { return nil } -// Open reads an Index from a given reader. The caller must call Close() when the index is no longer used. -func Open(readerAt io.ReaderAt) (Index, error) { +// openPackIndex reads an Index from a given reader. The caller must call Close() when the index is no longer used. +func openPackIndex(readerAt io.ReaderAt) (packIndex, error) { h, err := readHeader(readerAt) if err != nil { return nil, fmt.Errorf("invalid header: %v", err) diff --git a/internal/packindex/info.go b/block/info.go similarity index 97% rename from internal/packindex/info.go rename to block/info.go index a37a2ba92..388471231 100644 --- a/internal/packindex/info.go +++ b/block/info.go @@ -1,4 +1,4 @@ -package packindex +package block import ( "time" diff --git a/internal/packindex/merged.go b/block/merged.go similarity index 84% rename from internal/packindex/merged.go rename to block/merged.go index 76e637972..1596ad845 100644 --- a/internal/packindex/merged.go +++ b/block/merged.go @@ -1,15 +1,15 @@ -package packindex +package block import ( "container/heap" "errors" ) -// Merged is an implementation of Index that transparently merges retuns from underlying Indexes. -type Merged []Index +// mergedIndex is an implementation of Index that transparently merges retuns from underlying Indexes. +type mergedIndex []packIndex // Close closes all underlying indexes. -func (m Merged) Close() error { +func (m mergedIndex) Close() error { for _, ndx := range m { if err := ndx.Close(); err != nil { return err @@ -20,7 +20,7 @@ func (m Merged) Close() error { } // GetInfo returns information about a single block. If a block is not found, returns (nil,nil) -func (m Merged) GetInfo(contentID string) (*Info, error) { +func (m mergedIndex) GetInfo(contentID string) (*Info, error) { var best *Info for _, ndx := range m { i, err := ndx.GetInfo(contentID) @@ -68,7 +68,7 @@ func (h *nextInfoHeap) Pop() interface{} { return x } -func iterateChan(prefix string, ndx Index, done chan bool) <-chan Info { +func iterateChan(prefix string, ndx packIndex, done chan bool) <-chan Info { ch := make(chan Info) go func() { defer close(ch) @@ -87,7 +87,7 @@ func iterateChan(prefix string, ndx Index, done chan bool) <-chan Info { // Iterate invokes the provided callback for all unique block IDs in the underlying sources until either // all blocks have been visited or until an error is returned by the callback. -func (m Merged) Iterate(prefix string, cb func(i Info) error) error { +func (m mergedIndex) Iterate(prefix string, cb func(i Info) error) error { var minHeap nextInfoHeap done := make(chan bool) defer close(done) @@ -129,4 +129,4 @@ func (m Merged) Iterate(prefix string, cb func(i Info) error) error { return nil } -var _ Index = (*Merged)(nil) +var _ packIndex = (*mergedIndex)(nil) diff --git a/internal/packindex/merged_test.go b/block/merged_test.go similarity index 53% rename from internal/packindex/merged_test.go rename to block/merged_test.go index 257efb8f5..0c7127ce3 100644 --- a/internal/packindex/merged_test.go +++ b/block/merged_test.go @@ -1,43 +1,41 @@ -package packindex_test +package block import ( "bytes" "fmt" "reflect" "testing" - - "github.com/kopia/repo/internal/packindex" ) func TestMerged(t *testing.T) { i1, err := indexWithItems( - packindex.Info{BlockID: "aabbcc", TimestampSeconds: 1, PackFile: "xx", PackOffset: 11}, - packindex.Info{BlockID: "ddeeff", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, - packindex.Info{BlockID: "z010203", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, - packindex.Info{BlockID: "de1e1e", TimestampSeconds: 4, PackFile: "xx", PackOffset: 111}, + Info{BlockID: "aabbcc", TimestampSeconds: 1, PackFile: "xx", PackOffset: 11}, + Info{BlockID: "ddeeff", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, + Info{BlockID: "z010203", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, + Info{BlockID: "de1e1e", TimestampSeconds: 4, PackFile: "xx", PackOffset: 111}, ) if err != nil { t.Fatalf("can't create index: %v", err) } i2, err := indexWithItems( - packindex.Info{BlockID: "aabbcc", TimestampSeconds: 3, PackFile: "yy", PackOffset: 33}, - packindex.Info{BlockID: "xaabbcc", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, - packindex.Info{BlockID: "de1e1e", TimestampSeconds: 4, PackFile: "xx", PackOffset: 222, Deleted: true}, + Info{BlockID: "aabbcc", TimestampSeconds: 3, PackFile: "yy", PackOffset: 33}, + Info{BlockID: "xaabbcc", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, + Info{BlockID: "de1e1e", TimestampSeconds: 4, PackFile: "xx", PackOffset: 222, Deleted: true}, ) if err != nil { t.Fatalf("can't create index: %v", err) } i3, err := indexWithItems( - packindex.Info{BlockID: "aabbcc", TimestampSeconds: 2, PackFile: "zz", PackOffset: 22}, - packindex.Info{BlockID: "ddeeff", TimestampSeconds: 1, PackFile: "zz", PackOffset: 222}, - packindex.Info{BlockID: "k010203", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, - packindex.Info{BlockID: "k020304", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, + Info{BlockID: "aabbcc", TimestampSeconds: 2, PackFile: "zz", PackOffset: 22}, + Info{BlockID: "ddeeff", TimestampSeconds: 1, PackFile: "zz", PackOffset: 222}, + Info{BlockID: "k010203", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, + Info{BlockID: "k020304", TimestampSeconds: 1, PackFile: "xx", PackOffset: 111}, ) if err != nil { t.Fatalf("can't create index: %v", err) } - m := packindex.Merged{i1, i2, i3} + m := mergedIndex{i1, i2, i3} i, err := m.GetInfo("aabbcc") if err != nil || i == nil { t.Fatalf("unable to get info: %v", err) @@ -47,7 +45,7 @@ func TestMerged(t *testing.T) { } var inOrder []string - m.Iterate("", func(i packindex.Info) error { + m.Iterate("", func(i Info) error { inOrder = append(inOrder, i.BlockID) if i.BlockID == "de1e1e" { if i.Deleted { @@ -81,8 +79,8 @@ func TestMerged(t *testing.T) { } } -func indexWithItems(items ...packindex.Info) (packindex.Index, error) { - b := packindex.NewBuilder() +func indexWithItems(items ...Info) (packIndex, error) { + b := make(packIndexBuilder) for _, it := range items { b.Add(it) } @@ -90,5 +88,5 @@ func indexWithItems(items ...packindex.Info) (packindex.Index, error) { if err := b.Build(&buf); err != nil { return nil, fmt.Errorf("build error: %v", err) } - return packindex.Open(bytes.NewReader(buf.Bytes())) + return openPackIndex(bytes.NewReader(buf.Bytes())) } diff --git a/internal/packindex/packindex_internal_test.go b/block/packindex_internal_test.go similarity index 95% rename from internal/packindex/packindex_internal_test.go rename to block/packindex_internal_test.go index a80661e82..305619855 100644 --- a/internal/packindex/packindex_internal_test.go +++ b/block/packindex_internal_test.go @@ -1,4 +1,4 @@ -package packindex +package block import "testing" diff --git a/internal/packindex/packindex_test.go b/block/packindex_test.go similarity index 90% rename from internal/packindex/packindex_test.go rename to block/packindex_test.go index 50b8644aa..88fd5e3fe 100644 --- a/internal/packindex/packindex_test.go +++ b/block/packindex_test.go @@ -1,4 +1,4 @@ -package packindex_test +package block import ( "bytes" @@ -9,8 +9,6 @@ "reflect" "strings" "testing" - - "github.com/kopia/repo/internal/packindex" ) func TestPackIndex(t *testing.T) { @@ -58,11 +56,11 @@ func TestPackIndex(t *testing.T) { return int64(rand.Int31()) } - var infos []packindex.Info + var infos []Info // deleted blocks with all information for i := 0; i < 100; i++ { - infos = append(infos, packindex.Info{ + infos = append(infos, Info{ TimestampSeconds: randomUnixTime(), Deleted: true, BlockID: deterministicBlockID("deleted-packed", i), @@ -74,7 +72,7 @@ func TestPackIndex(t *testing.T) { } // non-deleted block for i := 0; i < 100; i++ { - infos = append(infos, packindex.Info{ + infos = append(infos, Info{ TimestampSeconds: randomUnixTime(), BlockID: deterministicBlockID("packed", i), PackFile: deterministicPackFile(i), @@ -84,10 +82,10 @@ func TestPackIndex(t *testing.T) { }) } - infoMap := map[string]packindex.Info{} - b1 := packindex.NewBuilder() - b2 := packindex.NewBuilder() - b3 := packindex.NewBuilder() + infoMap := map[string]Info{} + b1 := make(packIndexBuilder) + b2 := make(packIndexBuilder) + b3 := make(packIndexBuilder) for _, info := range infos { infoMap[info.BlockID] = info @@ -123,7 +121,7 @@ func TestPackIndex(t *testing.T) { fuzzTestIndexOpen(t, data1) }) - ndx, err := packindex.Open(bytes.NewReader(data1)) + ndx, err := openPackIndex(bytes.NewReader(data1)) if err != nil { t.Fatalf("can't open index: %v", err) } @@ -141,7 +139,7 @@ func TestPackIndex(t *testing.T) { } cnt := 0 - ndx.Iterate("", func(info2 packindex.Info) error { + ndx.Iterate("", func(info2 Info) error { info := infoMap[info2.BlockID] if !reflect.DeepEqual(info, info2) { t.Errorf("invalid value retrieved: %+v, wanted %+v", info2, info) @@ -168,7 +166,7 @@ func TestPackIndex(t *testing.T) { for _, prefix := range prefixes { cnt2 := 0 - ndx.Iterate(string(prefix), func(info2 packindex.Info) error { + ndx.Iterate(string(prefix), func(info2 Info) error { cnt2++ if !strings.HasPrefix(string(info2.BlockID), string(prefix)) { t.Errorf("unexpected item %v when iterating prefix %v", info2.BlockID, prefix) @@ -184,13 +182,13 @@ func fuzzTestIndexOpen(t *testing.T, originalData []byte) { rnd := rand.New(rand.NewSource(12345)) fuzzTest(rnd, originalData, 50000, func(d []byte) { - ndx, err := packindex.Open(bytes.NewReader(d)) + ndx, err := openPackIndex(bytes.NewReader(d)) if err != nil { return } defer ndx.Close() cnt := 0 - ndx.Iterate("", func(cb packindex.Info) error { + ndx.Iterate("", func(cb Info) error { if cnt < 10 { ndx.GetInfo(cb.BlockID) } diff --git a/internal/packindex/subset.go b/internal/packindex/subset.go deleted file mode 100644 index 5c6e55299..000000000 --- a/internal/packindex/subset.go +++ /dev/null @@ -1,28 +0,0 @@ -package packindex - -// IsSubset returns true if all entries in index 'a' are contained in index 'b'. -func IsSubset(a, b Index) bool { - done := make(chan bool) - defer close(done) - - ach := iterateChan("", a, done) - bch := iterateChan("", b, done) - - for ait := range ach { - bit, ok := <-bch - if !ok { - return false - } - for bit.BlockID < ait.BlockID { - bit, ok = <-bch - if !ok { - return false - } - } - - if bit.BlockID != ait.BlockID { - return false - } - } - return true -} diff --git a/internal/packindex/subset_test.go b/internal/packindex/subset_test.go deleted file mode 100644 index aaad99295..000000000 --- a/internal/packindex/subset_test.go +++ /dev/null @@ -1,60 +0,0 @@ -package packindex_test - -import ( - "bytes" - "fmt" - "testing" - - "github.com/kopia/repo/internal/packindex" -) - -func TestSubset(t *testing.T) { - cases := []struct { - aBlocks, bBlocks []string - want bool - }{ - {[]string{}, []string{"aa"}, true}, - {[]string{}, []string{"aa", "bb"}, true}, - {[]string{"aa"}, []string{"aa"}, true}, - {[]string{"aa"}, []string{"bb"}, false}, - {[]string{"aa"}, []string{"aa", "bb"}, true}, - {[]string{"aa"}, []string{"aa", "bb", "cc"}, true}, - {[]string{"aa", "bb"}, []string{"bb", "cc"}, false}, - {[]string{"aa", "bb"}, []string{"aa"}, false}, - {[]string{"aa", "bb"}, []string{}, false}, - {[]string{"aa", "bb", "cc", "dd", "ee", "ff"}, []string{"aa", "bb", "cc", "dd", "ee", "ff"}, true}, - {[]string{"aa", "bb", "cc", "dd", "ee", "ff"}, []string{"aa", "bb", "cc", "dd", "ef", "ff"}, false}, - {[]string{"aa", "bb", "cc", "dd", "ee", "ff"}, []string{"aa", "bb", "cc", "dd", "ee", "ef", "ff"}, true}, - } - - for _, tc := range cases { - a, err := indexWithBlockIDs(tc.aBlocks) - if err != nil { - t.Fatalf("error building index: %v", err) - } - b, err := indexWithBlockIDs(tc.bBlocks) - if err != nil { - t.Fatalf("error building index: %v", err) - } - - if got, want := packindex.IsSubset(a, b), tc.want; got != want { - t.Errorf("invalid value of IsSubset(%v,%v): %v, wanted %v", tc.aBlocks, tc.bBlocks, got, want) - } - } -} -func indexWithBlockIDs(items []string) (packindex.Index, error) { - b := packindex.NewBuilder() - for _, it := range items { - b.Add(packindex.Info{ - BlockID: it, - PackFile: "x", - PackOffset: 1, - Length: 1, - }) - } - var buf bytes.Buffer - if err := b.Build(&buf); err != nil { - return nil, fmt.Errorf("build error: %v", err) - } - return packindex.Open(bytes.NewReader(buf.Bytes())) -} From 433b193602def5dfb63bb3cc30e5d825f5fc64e6 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Thu, 1 Nov 2018 22:22:14 -0700 Subject: [PATCH 31/74] block: added tests for FindUnreferencedStorageFiles --- block/block_manager.go | 13 +++--- block/block_manager_test.go | 89 ++++++++++++++++++++++++++++++++++++- 2 files changed, 96 insertions(+), 6 deletions(-) diff --git a/block/block_manager.go b/block/block_manager.go index 1ef3f04f4..21f9b6c06 100644 --- a/block/block_manager.go +++ b/block/block_manager.go @@ -336,11 +336,13 @@ func (bm *Manager) writePackBlockLocked(ctx context.Context) error { return fmt.Errorf("error preparing data block: %v", err) } - if err := bm.writePackFileNotLocked(ctx, packFile, blockData); err != nil { - return fmt.Errorf("can't save pack data block: %v", err) + if len(blockData) > 0 { + if err := bm.writePackFileNotLocked(ctx, packFile, blockData); err != nil { + return fmt.Errorf("can't save pack data block: %v", err) + } } - formatLog.Debugf("wrote pack file: %v", packFile) + formatLog.Debugf("wrote pack file: %v (%v bytes)", packFile, len(blockData)) for _, info := range packFileIndex { bm.packIndexBuilder.Add(*info) } @@ -575,7 +577,7 @@ func (bm *Manager) ListBlockInfos(prefix string, includeDeleted bool) ([]Info, e if (i.Deleted && !includeDeleted) || !strings.HasPrefix(i.BlockID, prefix) { return nil } - if bi, ok := bm.packIndexBuilder[i.BlockID]; ok && bi.Deleted { + if i.Deleted && !includeDeleted { return nil } result = append(result, i) @@ -761,12 +763,13 @@ func (bm *Manager) BlockInfo(ctx context.Context, blockID string) (Info, error) // FindUnreferencedStorageFiles returns the list of unreferenced storage blocks. func (bm *Manager) FindUnreferencedStorageFiles(ctx context.Context) ([]storage.BlockMetadata, error) { - infos, err := bm.ListBlockInfos("", false) + infos, err := bm.ListBlockInfos("", true) if err != nil { return nil, fmt.Errorf("unable to list index blocks: %v", err) } usedPackBlocks := findPackBlocksInUse(infos) + var unused []storage.BlockMetadata err = bm.st.ListBlocks(ctx, PackBlockPrefix, func(bi storage.BlockMetadata) error { u := usedPackBlocks[bi.BlockID] diff --git a/block/block_manager_test.go b/block/block_manager_test.go index 1c71714d2..76efb1d86 100644 --- a/block/block_manager_test.go +++ b/block/block_manager_test.go @@ -28,7 +28,7 @@ var hmacSecret = []byte{1, 2, 3} func init() { - logging.SetLevel(logging.INFO, "") + logging.SetLevel(logging.DEBUG, "") } func TestBlockManagerEmptyFlush(t *testing.T) { @@ -569,6 +569,93 @@ func TestDeleteAndRecreate(t *testing.T) { } } +func TestFindUnreferencedStorageFiles(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + verifyUnreferencedStorageFilesCount(ctx, t, bm, 0) + blockID := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100)) + if err := bm.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + } + verifyUnreferencedStorageFilesCount(ctx, t, bm, 0) + if err := bm.DeleteBlock(blockID); err != nil { + t.Errorf("error deleting block: %v", blockID) + } + if err := bm.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + } + + // block still present in first pack + verifyUnreferencedStorageFilesCount(ctx, t, bm, 0) + + bm.RewriteBlock(ctx, blockID) + if err := bm.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + } + verifyUnreferencedStorageFilesCount(ctx, t, bm, 1) + bm.RewriteBlock(ctx, blockID) + if err := bm.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + } + verifyUnreferencedStorageFilesCount(ctx, t, bm, 2) +} + +func TestFindUnreferencedStorageFiles2(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + verifyUnreferencedStorageFilesCount(ctx, t, bm, 0) + blockID := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100)) + writeBlockAndVerify(ctx, t, bm, seededRandomData(11, 100)) + dumpBlocks(t, bm, "after writing") + if err := bm.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + } + dumpBlocks(t, bm, "after flush") + verifyUnreferencedStorageFilesCount(ctx, t, bm, 0) + if err := bm.DeleteBlock(blockID); err != nil { + t.Errorf("error deleting block: %v", blockID) + } + dumpBlocks(t, bm, "after delete") + if err := bm.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + } + dumpBlocks(t, bm, "after flush") + // block present in first pack, original pack is still referenced + verifyUnreferencedStorageFilesCount(ctx, t, bm, 0) +} + +func dumpBlocks(t *testing.T, bm *Manager, caption string) { + t.Helper() + infos, err := bm.ListBlockInfos("", true) + if err != nil { + t.Errorf("error listing blocks: %v", err) + return + } + + log.Infof("**** dumping %v blocks %v", len(infos), caption) + for i, bi := range infos { + log.Debugf(" bi[%v]=%#v", i, bi) + } + log.Infof("finished dumping %v blocks", len(infos)) +} + +func verifyUnreferencedStorageFilesCount(ctx context.Context, t *testing.T, bm *Manager, want int) { + t.Helper() + unref, err := bm.FindUnreferencedStorageFiles(ctx) + if err != nil { + t.Errorf("error in FindUnreferencedStorageFiles: %v", err) + } + + log.Infof("got %v expecting %v", unref, want) + if got := len(unref); got != want { + t.Errorf("invalid number of unreferenced blocks: %v, wanted %v", got, want) + } +} + func TestBlockWriteAliasing(t *testing.T) { ctx := context.Background() data := map[string][]byte{} From 9cfc1297fd53fb4b5b2421c83deea4b2d7d8d84e Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Thu, 1 Nov 2018 22:34:42 -0700 Subject: [PATCH 32/74] block: dead code --- block/block_manager.go | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/block/block_manager.go b/block/block_manager.go index 21f9b6c06..44983d3dd 100644 --- a/block/block_manager.go +++ b/block/block_manager.go @@ -577,7 +577,7 @@ func (bm *Manager) ListBlockInfos(prefix string, includeDeleted bool) ([]Info, e if (i.Deleted && !includeDeleted) || !strings.HasPrefix(i.BlockID, prefix) { return nil } - if i.Deleted && !includeDeleted { + if bi, ok := bm.packIndexBuilder[i.BlockID]; ok && bi.Deleted { return nil } result = append(result, i) @@ -733,14 +733,6 @@ func (bm *Manager) getBlockInfo(blockID string) (Info, error) { return bm.committedBlocks.getBlock(blockID) } -// GetIndexBlock gets the contents of a given index block. If the block is not found returns blob.ErrBlockNotFound. -func (bm *Manager) GetIndexBlock(ctx context.Context, blockID string) ([]byte, error) { - bm.lock() - defer bm.unlock() - - return bm.getPhysicalBlockInternal(ctx, blockID) -} - // BlockInfo returns information about a single block. func (bm *Manager) BlockInfo(ctx context.Context, blockID string) (Info, error) { bi, err := bm.getBlockInfo(blockID) From 9c81f6f179375a6cb03b779ef287d94072a3307b Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Thu, 1 Nov 2018 23:02:38 -0700 Subject: [PATCH 33/74] block: added block_index_recovery_test.go --- block/block_index_recovery_test.go | 90 ++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 block/block_index_recovery_test.go diff --git a/block/block_index_recovery_test.go b/block/block_index_recovery_test.go new file mode 100644 index 000000000..d42cdc672 --- /dev/null +++ b/block/block_index_recovery_test.go @@ -0,0 +1,90 @@ +package block + +import ( + "context" + "testing" + "time" + + "github.com/kopia/repo/storage" +) + +func TestBlockIndexRecovery(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + keyTime := map[string]time.Time{} + bm := newTestBlockManager(data, keyTime, nil) + block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100)) + block2 := writeBlockAndVerify(ctx, t, bm, seededRandomData(11, 100)) + block3 := writeBlockAndVerify(ctx, t, bm, seededRandomData(12, 100)) + + if err := bm.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + } + + // delete all index blocks + bm.st.ListBlocks(ctx, newIndexBlockPrefix, func(bi storage.BlockMetadata) error { + log.Debugf("deleting %v", bi.BlockID) + return bm.st.DeleteBlock(ctx, bi.BlockID) + }) + + // now with index blocks gone, all blocks appear to not be found + bm = newTestBlockManager(data, keyTime, nil) + verifyBlockNotFound(ctx, t, bm, block1) + verifyBlockNotFound(ctx, t, bm, block2) + verifyBlockNotFound(ctx, t, bm, block3) + + totalRecovered := 0 + + // pass 1 - just list blocks to recover, but don't commit + err := bm.st.ListBlocks(ctx, PackBlockPrefix, func(bi storage.BlockMetadata) error { + infos, err := bm.RecoverIndexFromPackFile(ctx, bi.BlockID, bi.Length, false) + if err != nil { + return err + } + totalRecovered += len(infos) + log.Debugf("recovered %v blocks", len(infos)) + return nil + }) + if err != nil { + t.Errorf("error recovering: %v", err) + } + + if got, want := totalRecovered, 3; got != want { + t.Errorf("invalid # of blocks recovered: %v, want %v", got, want) + } + + // blocks are stil not found + verifyBlockNotFound(ctx, t, bm, block1) + verifyBlockNotFound(ctx, t, bm, block2) + verifyBlockNotFound(ctx, t, bm, block3) + + // pass 2 now pass commit=true to add recovered blocks to index + totalRecovered = 0 + + err = bm.st.ListBlocks(ctx, PackBlockPrefix, func(bi storage.BlockMetadata) error { + infos, err := bm.RecoverIndexFromPackFile(ctx, bi.BlockID, bi.Length, true) + if err != nil { + return err + } + totalRecovered += len(infos) + log.Debugf("recovered %v blocks", len(infos)) + return nil + }) + if err != nil { + t.Errorf("error recovering: %v", err) + } + + if got, want := totalRecovered, 3; got != want { + t.Errorf("invalid # of blocks recovered: %v, want %v", got, want) + } + + verifyBlock(ctx, t, bm, block1, seededRandomData(10, 100)) + verifyBlock(ctx, t, bm, block2, seededRandomData(11, 100)) + verifyBlock(ctx, t, bm, block3, seededRandomData(12, 100)) + if err := bm.Flush(ctx); err != nil { + t.Errorf("flush error: %v", err) + } + verifyBlock(ctx, t, bm, block1, seededRandomData(10, 100)) + verifyBlock(ctx, t, bm, block2, seededRandomData(11, 100)) + verifyBlock(ctx, t, bm, block3, seededRandomData(12, 100)) +} From 111a6277300301bb88267abaecfa67d128f7d50b Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 3 Nov 2018 11:54:35 -0700 Subject: [PATCH 34/74] misc: fixed lint warnings --- block/block_index_recovery.go | 2 +- storage/filesystem/filesystem_storage.go | 2 +- storage/gcs/gcs_storage.go | 5 +---- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/block/block_index_recovery.go b/block/block_index_recovery.go index 275a78655..f4c4bc231 100644 --- a/block/block_index_recovery.go +++ b/block/block_index_recovery.go @@ -21,7 +21,7 @@ func (bm *Manager) RecoverIndexFromPackFile(ctx context.Context, packFile string if err != nil { return nil, fmt.Errorf("unable to open index in file %v", packFile) } - defer ndx.Close() + defer ndx.Close() //nolint:errcheck var recovered []Info diff --git a/storage/filesystem/filesystem_storage.go b/storage/filesystem/filesystem_storage.go index a3a6db4c4..f9735b1e1 100644 --- a/storage/filesystem/filesystem_storage.go +++ b/storage/filesystem/filesystem_storage.go @@ -50,7 +50,7 @@ func (fs *fsStorage) GetBlock(ctx context.Context, blockID string, offset, lengt return ioutil.ReadAll(f) } - if _, err := f.Seek(offset, io.SeekStart); err != nil { + if _, err = f.Seek(offset, io.SeekStart); err != nil { return nil, err } b, err := ioutil.ReadAll(io.LimitReader(f, length)) diff --git a/storage/gcs/gcs_storage.go b/storage/gcs/gcs_storage.go index 05a731135..b990fd334 100644 --- a/storage/gcs/gcs_storage.go +++ b/storage/gcs/gcs_storage.go @@ -72,10 +72,7 @@ func exponentialBackoff(desc string, att retry.AttemptFunc) (interface{}, error) func isRetriableError(err error) bool { if apiError, ok := err.(*googleapi.Error); ok { - if apiError.Code >= 500 { - return true - } - return false + return apiError.Code >= 500 } switch err { From 2a6613de378888dbd66df6060d15cc47677f9de7 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 3 Nov 2018 12:09:25 -0700 Subject: [PATCH 35/74] go.mod: updated dependencies --- go.mod | 14 +++++++------- go.sum | 28 ++++++++++++++-------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/go.mod b/go.mod index 9ef8b73ac..97c473dd7 100644 --- a/go.mod +++ b/go.mod @@ -1,7 +1,7 @@ module github.com/kopia/repo require ( - cloud.google.com/go v0.31.0 + cloud.google.com/go v0.32.0 github.com/efarrer/iothrottler v0.0.0-20141121142253-60e7e547c7fe github.com/go-ini/ini v1.39.0 // indirect github.com/googleapis/gax-go v2.0.0+incompatible // indirect @@ -11,12 +11,12 @@ require ( github.com/silvasur/buzhash v0.0.0-20160816060738-9bdec3dec7c6 github.com/studio-b12/gowebdav v0.0.0-20181024110551-cba565a9dcfc go.opencensus.io v0.18.0 // indirect - golang.org/x/crypto v0.0.0-20181025213731-e84da0312774 + golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16 golang.org/x/exp v0.0.0-20181022080537-42ba7d4b6eb2 - golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519 - golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4 - golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5 // indirect - google.golang.org/api v0.0.0-20181026000445-511bab8e55de - google.golang.org/genproto v0.0.0-20181026194446-8b5d7a19e2d9 // indirect + golang.org/x/net v0.0.0-20181102091132-c10e9556a7bc // indirect + golang.org/x/oauth2 v0.0.0-20181102170140-232e45548389 + golang.org/x/sys v0.0.0-20181031143558-9b800f95dbbc // indirect + google.golang.org/api v0.0.0-20181102150758-04bb50b6b83d + google.golang.org/genproto v0.0.0-20181101192439-c830210a61df // indirect google.golang.org/grpc v1.16.0 // indirect ) diff --git a/go.sum b/go.sum index 69cc636d7..23325fb5e 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,6 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.31.0 h1:o9K5MWWt2wk+d9jkGn2DAZ7Q9nUdnFLOpK9eIkDwONQ= -cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.32.0 h1:DSt59WoyNcfAInilEpfvm2ugq8zvNyaHAm9MkzOwRQ4= +cloud.google.com/go v0.32.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= @@ -37,34 +37,34 @@ github.com/studio-b12/gowebdav v0.0.0-20181024110551-cba565a9dcfc h1:p1iYuFAxSsQ github.com/studio-b12/gowebdav v0.0.0-20181024110551-cba565a9dcfc/go.mod h1:gCcfDlA1Y7GqOaeEKw5l9dOGx1VLdc/HuQSlQAaZ30s= go.opencensus.io v0.18.0 h1:Mk5rgZcggtbvtAun5aJzAtjKKN/t0R3jJPlWILlv938= go.opencensus.io v0.18.0/go.mod h1:vKdFvxhtzZ9onBp9VKHK8z/sRpBMnKAsufL7wlDrCOA= -golang.org/x/crypto v0.0.0-20181025213731-e84da0312774 h1:a4tQYYYuK9QdeO/+kEvNYyuR21S+7ve5EANok6hABhI= -golang.org/x/crypto v0.0.0-20181025213731-e84da0312774/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16 h1:y6ce7gCWtnH+m3dCjzQ1PCuwl28DDIc3VNnvY29DlIA= +golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/exp v0.0.0-20181022080537-42ba7d4b6eb2 h1:lpkPb6P4ObnPRN3VbEzv/6CUtwaEDtx0cvCg4eWQuBk= golang.org/x/exp v0.0.0-20181022080537-42ba7d4b6eb2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519 h1:x6rhz8Y9CjbgQkccRGmELH6K+LJj7tOoh3XWeC1yaQM= -golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181102091132-c10e9556a7bc h1:ZMCWScCvS2fUVFw8LOpxyUUW5qiviqr4Dg5NdjLeiLU= +golang.org/x/net v0.0.0-20181102091132-c10e9556a7bc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4 h1:99CA0JJbUX4ozCnLon680Jc9e0T1i8HCaLVJMwtI8Hc= -golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20181102170140-232e45548389 h1:NSr16yuMknNO4kjJ2yNMJBdS55sdwZiWrXbt3fbM3pI= +golang.org/x/oauth2 v0.0.0-20181102170140-232e45548389/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5 h1:x6r4Jo0KNzOOzYd8lbcRsqjuqEASK6ob3auvWYM4/8U= -golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181031143558-9b800f95dbbc h1:SdCq5U4J+PpbSDIl9bM0V1e1Ug1jsnBkAFvTs1htn7U= +golang.org/x/sys v0.0.0-20181031143558-9b800f95dbbc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= -google.golang.org/api v0.0.0-20181026000445-511bab8e55de h1:jZyuTBGMXzHm+q0+2tRrBCyXKlKrmXeDQcv7s4HeQLY= -google.golang.org/api v0.0.0-20181026000445-511bab8e55de/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= +google.golang.org/api v0.0.0-20181102150758-04bb50b6b83d h1:wDkjCUR876SHY2B6YgBr9kAIUzPm1zb1X3d6sxFjpfo= +google.golang.org/api v0.0.0-20181102150758-04bb50b6b83d/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20180831171423-11092d34479b/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20181026194446-8b5d7a19e2d9 h1:26lptpu+T60F849wXfTQMz9ecFf6nTQM0J1JjLSga5U= -google.golang.org/genproto v0.0.0-20181026194446-8b5d7a19e2d9/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20181101192439-c830210a61df h1:Ri2mROsxIxitlzRQ0pYoP8/dsqeLEolHrhh29dltSI4= +google.golang.org/genproto v0.0.0-20181101192439-c830210a61df/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.16.0 h1:dz5IJGuC2BB7qXR5AyHNwAUBhZscK2xVez7mznh72sY= google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio= From 0d48a3778e3be984bccebb656d946dc6223e4d45 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Wed, 7 Nov 2018 17:43:22 -0800 Subject: [PATCH 36/74] readme: added go report card badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 61d59c303..f05d8901a 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ Repository [![Build Status](https://travis-ci.org/kopia/repo.svg?branch=master)](https://travis-ci.org/kopia/repo) [![GoDoc](https://godoc.org/github.com/kopia/repo?status.svg)](https://godoc.org/github.com/kopia/repo) [![Coverage Status](https://coveralls.io/repos/github/kopia/repo/badge.svg?branch=master)](https://coveralls.io/github/kopia/repo?branch=master) +[![Go Report Card](https://goreportcard.com/badge/github.com/kopia/repo)](https://goreportcard.com/report/github.com/kopia/repo) This library implements Content-Addressable Storage Repository used by [Kopia](https://github.com/kopia/kopia) to store its snapshots. From a673d36b1dcef770eb49de9392f12cbc2b9a9894 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Wed, 7 Nov 2018 17:45:35 -0800 Subject: [PATCH 37/74] lint: ran gofmt -s and fixed spelling as recommended by goreportcard.com --- block/block_manager_test.go | 2 +- block/merged.go | 2 +- internal/jsonstream/stream_test.go | 12 ++++++------ storage/filesystem/filesystem_storage_test.go | 16 ++++++++-------- storage/webdav/webdav_storage_test.go | 12 ++++++------ 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/block/block_manager_test.go b/block/block_manager_test.go index 76efb1d86..832a40e15 100644 --- a/block/block_manager_test.go +++ b/block/block_manager_test.go @@ -282,7 +282,7 @@ func TestBlockManagerFailedToWritePack(t *testing.T) { logging.SetLevel(logging.DEBUG, "faulty-storage") faulty.Faults = map[string][]*storagetesting.Fault{ - "PutBlock": []*storagetesting.Fault{ + "PutBlock": { {Err: errors.New("booboo")}, }, } diff --git a/block/merged.go b/block/merged.go index 1596ad845..20140604c 100644 --- a/block/merged.go +++ b/block/merged.go @@ -5,7 +5,7 @@ "errors" ) -// mergedIndex is an implementation of Index that transparently merges retuns from underlying Indexes. +// mergedIndex is an implementation of Index that transparently merges returns from underlying Indexes. type mergedIndex []packIndex // Close closes all underlying indexes. diff --git a/internal/jsonstream/stream_test.go b/internal/jsonstream/stream_test.go index c7e24d633..32ccf25fc 100644 --- a/internal/jsonstream/stream_test.go +++ b/internal/jsonstream/stream_test.go @@ -23,9 +23,9 @@ func TestStream(t *testing.T) { var buf bytes.Buffer data := []TestObj{ - TestObj{Name: "foo"}, - TestObj{Name: "bar"}, - TestObj{Name: "baz"}, + {Name: "foo"}, + {Name: "bar"}, + {Name: "baz"}, } w := NewWriter(&buf, testHeader1) @@ -61,9 +61,9 @@ func TestStreamWithSummary(t *testing.T) { var buf bytes.Buffer data := []TestObj{ - TestObj{Name: "foo"}, - TestObj{Name: "bar"}, - TestObj{Name: "baz"}, + {Name: "foo"}, + {Name: "bar"}, + {Name: "baz"}, } w := NewWriter(&buf, testHeader1) diff --git a/storage/filesystem/filesystem_storage_test.go b/storage/filesystem/filesystem_storage_test.go index bccdc3072..a4e92da26 100644 --- a/storage/filesystem/filesystem_storage_test.go +++ b/storage/filesystem/filesystem_storage_test.go @@ -20,13 +20,13 @@ func TestFileStorage(t *testing.T) { // Test varioush shard configurations. for _, shardSpec := range [][]int{ - []int{0}, - []int{1}, - []int{3, 3}, - []int{2}, - []int{1, 1}, - []int{1, 2}, - []int{2, 2, 2}, + {0}, + {1}, + {3, 3}, + {2}, + {1, 1}, + {1, 2}, + {2, 2, 2}, } { path, _ := ioutil.TempDir("", "r-fs") defer os.RemoveAll(path) @@ -66,7 +66,7 @@ func TestFileStorageTouch(t *testing.T) { fs := r.(*fsStorage) fs.PutBlock(ctx, t1, []byte{1}) - time.Sleep(1 * time.Second) // sleep a bit to accomodate Apple filesystems with low timestamp resolution + time.Sleep(1 * time.Second) // sleep a bit to accommodate Apple filesystems with low timestamp resolution fs.PutBlock(ctx, t2, []byte{1}) time.Sleep(1 * time.Second) fs.PutBlock(ctx, t3, []byte{1}) diff --git a/storage/webdav/webdav_storage_test.go b/storage/webdav/webdav_storage_test.go index dc243ea47..8bd63ab98 100644 --- a/storage/webdav/webdav_storage_test.go +++ b/storage/webdav/webdav_storage_test.go @@ -33,12 +33,12 @@ func TestWebDAVStorage(t *testing.T) { // Test varioush shard configurations. for _, shardSpec := range [][]int{ - []int{1}, - []int{3, 3}, - []int{2}, - []int{1, 1}, - []int{1, 2}, - []int{2, 2, 2}, + {1}, + {3, 3}, + {2}, + {1, 1}, + {1, 2}, + {2, 2, 2}, } { t.Run(fmt.Sprintf("shards-%v", shardSpec), func(t *testing.T) { if err := os.RemoveAll(tmpDir); err != nil { From b8ecd1dd10d018abce1ae20838c2451382d6e339 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Wed, 7 Nov 2018 17:51:29 -0800 Subject: [PATCH 38/74] lint: fixes --- go.mod | 2 +- initialize.go | 6 +++--- repository.go | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index 97c473dd7..82e3f568b 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( go.opencensus.io v0.18.0 // indirect golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16 golang.org/x/exp v0.0.0-20181022080537-42ba7d4b6eb2 - golang.org/x/net v0.0.0-20181102091132-c10e9556a7bc // indirect + golang.org/x/net v0.0.0-20181102091132-c10e9556a7bc golang.org/x/oauth2 v0.0.0-20181102170140-232e45548389 golang.org/x/sys v0.0.0-20181031143558-9b800f95dbbc // indirect google.golang.org/api v0.0.0-20181102150758-04bb50b6b83d diff --git a/initialize.go b/initialize.go index ec7e87b9f..2f7dbc087 100644 --- a/initialize.go +++ b/initialize.go @@ -54,15 +54,15 @@ func Initialize(ctx context.Context, st storage.Storage, opt *NewRepositoryOptio format := formatBlockFromOptions(opt) masterKey, err := format.deriveMasterKeyFromPassword(password) if err != nil { - return err + return fmt.Errorf("unable to derive master key: %v", err) } if err := encryptFormatBytes(format, repositoryObjectFormatFromOptions(opt), masterKey, format.UniqueID); err != nil { - return err + return fmt.Errorf("unable to encrypt format bytes: %v", err) } if err := writeFormatBlock(ctx, st, format); err != nil { - return err + return fmt.Errorf("unable to write format block: %v", err) } return nil diff --git a/repository.go b/repository.go index e63c1598e..539d1d66f 100644 --- a/repository.go +++ b/repository.go @@ -26,16 +26,16 @@ type Repository struct { // Close closes the repository and releases all resources. func (r *Repository) Close(ctx context.Context) error { if err := r.Manifests.Flush(ctx); err != nil { - return err + return fmt.Errorf("error flushing manifests: %v", err) } if err := r.Objects.Close(ctx); err != nil { - return err + return fmt.Errorf("error closing objects: %v", err) } if err := r.Blocks.Flush(ctx); err != nil { - return err + return fmt.Errorf("error closing blocks: %v", err) } if err := r.Storage.Close(ctx); err != nil { - return err + return fmt.Errorf("error closing storage: %v", err) } return nil } From 3487cc191e6f850e79ad2f80ce97dec3b7bc8cb4 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 22 Dec 2018 19:58:00 -0800 Subject: [PATCH 39/74] removed asynchronous mode for object.Manager --- object/object_manager.go | 35 ++------------------ object/object_manager_test.go | 4 --- object/object_writer.go | 42 ++++-------------------- repository.go | 6 ---- tests/repository_test/repository_test.go | 3 -- 5 files changed, 9 insertions(+), 81 deletions(-) diff --git a/object/object_manager.go b/object/object_manager.go index f56d1c1ee..c85ed783c 100644 --- a/object/object_manager.go +++ b/object/object_manager.go @@ -7,7 +7,6 @@ "context" "fmt" "io" - "sync" "github.com/kopia/repo/block" "github.com/kopia/repo/internal/jsonstream" @@ -40,22 +39,11 @@ type Manager struct { Format Format blockMgr blockManager - - async bool - writeBackWG sync.WaitGroup - writeBackSemaphore semaphore - - trace func(message string, args ...interface{}) + trace func(message string, args ...interface{}) newSplitter func() objectSplitter } -// Close closes the connection to the underlying blob storage and releases any resources. -func (om *Manager) Close(ctx context.Context) error { - om.writeBackWG.Wait() - return om.Flush(ctx) -} - // NewWriter creates an ObjectWriter for writing to the repository. func (om *Manager) NewWriter(ctx context.Context, opt WriterOptions) Writer { w := &objectWriter{ @@ -78,9 +66,6 @@ func (om *Manager) Open(ctx context.Context, objectID ID) (Reader, error) { // log.Printf("Repository::Open %v", objectID.String()) // defer log.Printf("finished Repository::Open() %v", objectID.String()) - // Flush any pending writes. - om.writeBackWG.Wait() - if indexObjectID, ok := objectID.IndexObjectID(); ok { rd, err := om.Open(ctx, indexObjectID) if err != nil { @@ -109,9 +94,6 @@ func (om *Manager) Open(ctx context.Context, objectID ID) (Reader, error) { // VerifyObject ensures that all objects backing ObjectID are present in the repository // and returns the total length of the object and storage blocks of which it is composed. func (om *Manager) VerifyObject(ctx context.Context, oid ID) (int64, []string, error) { - // Flush any pending writes. - om.writeBackWG.Wait() - blocks := &blockTracker{} l, err := om.verifyObjectInternal(ctx, oid, blocks) if err != nil { @@ -169,20 +151,12 @@ func (om *Manager) verifyObjectInternal(ctx context.Context, oid ID, blocks *blo } -// Flush closes any pending pack files. Once this method returns, ObjectIDs returned by ObjectManager are -// ok to be used. -func (om *Manager) Flush(ctx context.Context) error { - om.writeBackWG.Wait() - return nil -} - func nullTrace(message string, args ...interface{}) { } // ManagerOptions specifies object manager options. type ManagerOptions struct { - WriteBack int - Trace func(message string, args ...interface{}) + Trace func(message string, args ...interface{}) } // NewObjectManager creates an ObjectManager with the specified block manager and format. @@ -213,11 +187,6 @@ func NewObjectManager(ctx context.Context, bm blockManager, f Format, opts Manag om.trace = nullTrace } - if opts.WriteBack > 0 { - om.async = true - om.writeBackSemaphore = make(semaphore, opts.WriteBack) - } - return om, nil } diff --git a/object/object_manager_test.go b/object/object_manager_test.go index 5b14345ea..f2b712fb4 100644 --- a/object/object_manager_test.go +++ b/object/object_manager_test.go @@ -105,8 +105,6 @@ func TestWriters(t *testing.T) { continue } - om.writeBackWG.Wait() - if !objectIDsEqual(result, c.objectID) { t.Errorf("incorrect result for %v, expected: %v got: %v", c.data, c.objectID.String(), result.String()) } @@ -204,8 +202,6 @@ func TestIndirection(t *testing.T) { t.Errorf("unexpected block count for %v: %v, expected %v", c.dataLength, got, want) } - om.Flush(ctx) - l, b, err := om.VerifyObject(ctx, result) if err != nil { t.Errorf("error verifying %q: %v", result, err) diff --git a/object/object_writer.go b/object/object_writer.go index 2f11bd966..b8f4ddba7 100644 --- a/object/object_writer.go +++ b/object/object_writer.go @@ -59,13 +59,10 @@ type objectWriter struct { splitter objectSplitter pendingBlocksWG sync.WaitGroup - - err asyncErrors } func (w *objectWriter) Close() error { - w.pendingBlocksWG.Wait() - return w.err.check() + return nil } func (w *objectWriter) Write(data []byte) (n int, err error) { @@ -97,34 +94,14 @@ func (w *objectWriter) flushBuffer() error { w.buffer.WriteTo(&b2) //nolint:errcheck w.buffer.Reset() - do := func() { - blockID, err := w.repo.blockMgr.WriteBlock(w.ctx, b2.Bytes(), w.prefix) - w.repo.trace("OBJECT_WRITER(%q) stored %v (%v bytes)", w.description, blockID, length) - if err != nil { - w.err.add(fmt.Errorf("error when flushing chunk %d of %s: %v", chunkID, w.description, err)) - return - } - - w.blockIndex[chunkID].Object = DirectObjectID(blockID) + blockID, err := w.repo.blockMgr.WriteBlock(w.ctx, b2.Bytes(), w.prefix) + w.repo.trace("OBJECT_WRITER(%q) stored %v (%v bytes)", w.description, blockID, length) + if err != nil { + return fmt.Errorf("error when flushing chunk %d of %s: %v", chunkID, w.description, err) } - if w.repo.async { - w.repo.writeBackSemaphore.Lock() - w.pendingBlocksWG.Add(1) - w.repo.writeBackWG.Add(1) - - go func() { - defer w.pendingBlocksWG.Done() - defer w.repo.writeBackWG.Done() - defer w.repo.writeBackSemaphore.Unlock() - do() - }() - - return nil - } - - do() - return w.err.check() + w.blockIndex[chunkID].Object = DirectObjectID(blockID) + return nil } func (w *objectWriter) Result() (ID, error) { @@ -133,11 +110,6 @@ func (w *objectWriter) Result() (ID, error) { return "", err } } - w.pendingBlocksWG.Wait() - - if err := w.err.check(); err != nil { - return "", err - } if len(w.blockIndex) == 1 { return w.blockIndex[0].Object, nil diff --git a/repository.go b/repository.go index 539d1d66f..163b941ba 100644 --- a/repository.go +++ b/repository.go @@ -28,9 +28,6 @@ func (r *Repository) Close(ctx context.Context) error { if err := r.Manifests.Flush(ctx); err != nil { return fmt.Errorf("error flushing manifests: %v", err) } - if err := r.Objects.Close(ctx); err != nil { - return fmt.Errorf("error closing objects: %v", err) - } if err := r.Blocks.Flush(ctx); err != nil { return fmt.Errorf("error closing blocks: %v", err) } @@ -45,9 +42,6 @@ func (r *Repository) Flush(ctx context.Context) error { if err := r.Manifests.Flush(ctx); err != nil { return err } - if err := r.Objects.Flush(ctx); err != nil { - return err - } return r.Blocks.Flush(ctx) } diff --git a/tests/repository_test/repository_test.go b/tests/repository_test/repository_test.go index 514cb6498..29fb036ae 100644 --- a/tests/repository_test/repository_test.go +++ b/tests/repository_test/repository_test.go @@ -45,8 +45,6 @@ func TestWriters(t *testing.T) { continue } - env.Repository.Objects.Flush(ctx) - if !objectIDsEqual(result, c.objectID) { t.Errorf("incorrect result for %v, expected: %v got: %v", c.data, c.objectID.String(), result.String()) } @@ -96,7 +94,6 @@ func TestPackingSimple(t *testing.T) { oid2c := writeObject(ctx, t, env.Repository, []byte(content2), "packed-object-2c") oid1c := writeObject(ctx, t, env.Repository, []byte(content1), "packed-object-1c") - env.Repository.Objects.Flush(ctx) env.Repository.Blocks.Flush(ctx) if got, want := oid1a.String(), oid1b.String(); got != want { From 8f3bf77f933066f77b04add7edce7576406b80bf Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 22 Dec 2018 20:04:30 -0800 Subject: [PATCH 40/74] removed dead code --- object/semaphore.go | 12 ------------ object/writeback.go | 37 ------------------------------------- 2 files changed, 49 deletions(-) delete mode 100644 object/semaphore.go delete mode 100644 object/writeback.go diff --git a/object/semaphore.go b/object/semaphore.go deleted file mode 100644 index 4afa7ce24..000000000 --- a/object/semaphore.go +++ /dev/null @@ -1,12 +0,0 @@ -package object - -type empty struct{} -type semaphore chan empty - -func (s semaphore) Lock() { - s <- empty{} -} - -func (s semaphore) Unlock() { - <-s -} diff --git a/object/writeback.go b/object/writeback.go deleted file mode 100644 index d320f2c58..000000000 --- a/object/writeback.go +++ /dev/null @@ -1,37 +0,0 @@ -package object - -import ( - "fmt" - "strings" - "sync" -) - -type asyncErrors struct { - sync.RWMutex - errors []error -} - -func (e *asyncErrors) add(err error) { - e.Lock() - e.errors = append(e.errors, err) - e.Unlock() -} - -func (e *asyncErrors) check() error { - e.RLock() - defer e.RUnlock() - - switch len(e.errors) { - case 0: - return nil - case 1: - return e.errors[0] - default: - msg := make([]string, len(e.errors)) - for i, err := range e.errors { - msg[i] = err.Error() - } - - return fmt.Errorf("%v errors: %v", len(e.errors), strings.Join(msg, ";")) - } -} From 97ad1aeeac4ab193a21bfb58b0955f765bed84d3 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 22 Dec 2018 20:25:16 -0800 Subject: [PATCH 41/74] storage: additional tests --- storage/filesystem/filesystem_storage_test.go | 3 ++ storage/gcs/gcs_storage_test.go | 3 ++ storage/logging/logging_storage_test.go | 32 ++++++++++++++++--- storage/s3/s3_storage_test.go | 3 ++ storage/webdav/webdav_storage_test.go | 3 ++ 5 files changed, 39 insertions(+), 5 deletions(-) diff --git a/storage/filesystem/filesystem_storage_test.go b/storage/filesystem/filesystem_storage_test.go index a4e92da26..e0a9e7765 100644 --- a/storage/filesystem/filesystem_storage_test.go +++ b/storage/filesystem/filesystem_storage_test.go @@ -42,6 +42,9 @@ func TestFileStorage(t *testing.T) { storagetesting.VerifyStorage(ctx, t, r) storagetesting.AssertConnectionInfoRoundTrips(ctx, t, r) + if err := r.Close(ctx); err != nil { + t.Fatalf("err: %v", err) + } } } diff --git a/storage/gcs/gcs_storage_test.go b/storage/gcs/gcs_storage_test.go index edc55fb3b..5805bbb1c 100644 --- a/storage/gcs/gcs_storage_test.go +++ b/storage/gcs/gcs_storage_test.go @@ -42,6 +42,9 @@ func TestGCSStorage(t *testing.T) { }); err != nil { t.Fatalf("unable to clear GCS bucket: %v", err) } + if err := st.Close(ctx); err != nil { + t.Fatalf("err: %v", err) + } } func TestGCSStorageInvalid(t *testing.T) { diff --git a/storage/logging/logging_storage_test.go b/storage/logging/logging_storage_test.go index 084b4311a..83fd6e5dd 100644 --- a/storage/logging/logging_storage_test.go +++ b/storage/logging/logging_storage_test.go @@ -2,16 +2,38 @@ import ( "context" + "strings" "testing" "github.com/kopia/repo/internal/storagetesting" ) func TestLoggingStorage(t *testing.T) { - data := map[string][]byte{} - r := NewWrapper(storagetesting.NewMapStorage(data, nil, nil)) - if r == nil { - t.Errorf("unexpected result: %v", r) + var outputCount int + myPrefix := "myprefix" + myOutput := func(msg string, args ...interface{}) { + if !strings.HasPrefix(msg, myPrefix) { + t.Errorf("unexpected prefix %v", msg) + } + outputCount++ + } + + data := map[string][]byte{} + underlying := storagetesting.NewMapStorage(data, nil, nil) + st := NewWrapper(underlying, Output(myOutput), Prefix(myPrefix)) + if st == nil { + t.Fatalf("unexpected result: %v", st) + } + + ctx := context.Background() + storagetesting.VerifyStorage(ctx, t, st) + if err := st.Close(ctx); err != nil { + t.Fatalf("err: %v", err) + } + if outputCount == 0 { + t.Errorf("did not write any output!") + } + if got, want := st.ConnectionInfo().Type, underlying.ConnectionInfo().Type; got != want { + t.Errorf("unexpected connection infor %v, want %v", got, want) } - storagetesting.VerifyStorage(context.Background(), t, r) } diff --git a/storage/s3/s3_storage_test.go b/storage/s3/s3_storage_test.go index 29602947c..8658c780d 100644 --- a/storage/s3/s3_storage_test.go +++ b/storage/s3/s3_storage_test.go @@ -76,6 +76,9 @@ func TestS3Storage(t *testing.T) { storagetesting.VerifyStorage(ctx, t, st) storagetesting.AssertConnectionInfoRoundTrips(ctx, t, st) + if err := st.Close(ctx); err != nil { + t.Fatalf("err: %v", err) + } } func createBucket(t *testing.T) { diff --git a/storage/webdav/webdav_storage_test.go b/storage/webdav/webdav_storage_test.go index 8bd63ab98..1315d8bfa 100644 --- a/storage/webdav/webdav_storage_test.go +++ b/storage/webdav/webdav_storage_test.go @@ -57,6 +57,9 @@ func TestWebDAVStorage(t *testing.T) { storagetesting.VerifyStorage(ctx, t, r) storagetesting.AssertConnectionInfoRoundTrips(ctx, t, r) + if err := r.Close(ctx); err != nil { + t.Fatalf("err: %v", err) + } }) } } From 9bb34f2e8c9f0d23593d148f39a3fa314124e9e9 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 22 Dec 2018 22:49:09 -0800 Subject: [PATCH 42/74] block: refactored assertion checking --- block/block_manager.go | 52 +++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 34 deletions(-) diff --git a/block/block_manager.go b/block/block_manager.go index 44983d3dd..baf6f7f6f 100644 --- a/block/block_manager.go +++ b/block/block_manager.go @@ -211,58 +211,42 @@ func (bm *Manager) verifyInvariantsLocked() { func (bm *Manager) verifyCurrentPackItemsLocked() { for k, cpi := range bm.currentPackItems { - if cpi.BlockID != k { - bm.invariantViolated("block ID entry has invalid key: %v %v", cpi.BlockID, k) - } - if cpi.PackFile != "" && !cpi.Deleted { - bm.invariantViolated("block ID entry has unexpected pack block ID %v: %v", cpi.BlockID, cpi.PackFile) - } - if cpi.TimestampSeconds == 0 { - bm.invariantViolated("block has no timestamp: %v", cpi.BlockID) - } + bm.assertInvariant(cpi.BlockID == k, "block ID entry has invalid key: %v %v", cpi.BlockID, k) + bm.assertInvariant(cpi.Deleted || cpi.PackFile == "", "block ID entry has unexpected pack block ID %v: %v", cpi.BlockID, cpi.PackFile) + bm.assertInvariant(cpi.TimestampSeconds != 0, "block has no timestamp: %v", cpi.BlockID) bi, ok := bm.packIndexBuilder[k] - if !ok { - bm.invariantViolated("block ID entry not present in pack index builder: %v", cpi.BlockID) - } - if !reflect.DeepEqual(*bi, cpi) { - bm.invariantViolated("current pack index does not match pack index builder: %v", cpi, *bi) - } + bm.assertInvariant(ok, "block ID entry not present in pack index builder: %v", cpi.BlockID) + bm.assertInvariant(reflect.DeepEqual(*bi, cpi), "current pack index does not match pack index builder: %v", cpi, *bi) } } func (bm *Manager) verifyPackIndexBuilderLocked() { for k, cpi := range bm.packIndexBuilder { - if cpi.BlockID != k { - bm.invariantViolated("block ID entry has invalid key: %v %v", cpi.BlockID, k) - } + bm.assertInvariant(cpi.BlockID == k, "block ID entry has invalid key: %v %v", cpi.BlockID, k) if _, ok := bm.currentPackItems[cpi.BlockID]; ok { // ignore blocks also in currentPackItems continue } if cpi.Deleted { - if cpi.PackFile != "" { - bm.invariantViolated("block can't be both deleted and have a pack block: %v", cpi.BlockID) - } + bm.assertInvariant(cpi.PackFile == "", "block can't be both deleted and have a pack block: %v", cpi.BlockID) } else { - if cpi.PackFile == "" { - bm.invariantViolated("block that's not deleted must have a pack block: %+v", cpi) - } - if cpi.FormatVersion != byte(bm.writeFormatVersion) { - bm.invariantViolated("block that's not deleted must have a valid format version: %+v", cpi) - } - } - if cpi.TimestampSeconds == 0 { - bm.invariantViolated("block has no timestamp: %v", cpi.BlockID) + bm.assertInvariant(cpi.PackFile != "", "block that's not deleted must have a pack block: %+v", cpi) + bm.assertInvariant(cpi.FormatVersion == byte(bm.writeFormatVersion), "block that's not deleted must have a valid format version: %+v", cpi) } + bm.assertInvariant(cpi.TimestampSeconds != 0, "block has no timestamp: %v", cpi.BlockID) } } -func (bm *Manager) invariantViolated(msg string, arg ...interface{}) { - if len(arg) > 0 { - msg = fmt.Sprintf(msg, arg...) +func (bm *Manager) assertInvariant(ok bool, errorMsg string, arg ...interface{}) { + if ok { + return } - panic(msg) + if len(arg) > 0 { + errorMsg = fmt.Sprintf(errorMsg, arg...) + } + + panic(errorMsg) } func (bm *Manager) startPackIndexLocked() { From 097367ca02356a1009eba6361a95ff5427cd0976 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 22 Dec 2018 23:44:29 -0800 Subject: [PATCH 43/74] manifest: added TestManifestInitCorruptedBlock --- manifest/manifest_manager.go | 19 ++++---- manifest/manifest_manager_test.go | 74 +++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 10 deletions(-) diff --git a/manifest/manifest_manager.go b/manifest/manifest_manager.go index 3809e1665..ef89bf57b 100644 --- a/manifest/manifest_manager.go +++ b/manifest/manifest_manager.go @@ -220,17 +220,10 @@ func (m *Manager) flushPendingEntriesLocked(ctx context.Context) (string, error) } var buf bytes.Buffer - gz := gzip.NewWriter(&buf) - if err := json.NewEncoder(gz).Encode(man); err != nil { - return "", fmt.Errorf("unable to marshal: %v", err) - } - if err := gz.Flush(); err != nil { - return "", fmt.Errorf("unable to flush: %v", err) - } - if err := gz.Close(); err != nil { - return "", fmt.Errorf("unable to close: %v", err) - } + mustSucceed(json.NewEncoder(gz).Encode(man)) + mustSucceed(gz.Flush()) + mustSucceed(gz.Close()) blockID, err := m.b.WriteBlock(ctx, buf.Bytes(), manifestBlockPrefix) if err != nil { @@ -247,6 +240,12 @@ func (m *Manager) flushPendingEntriesLocked(ctx context.Context) (string, error) return blockID, nil } +func mustSucceed(e error) { + if e != nil { + panic("unexpected failure: " + e.Error()) + } +} + // Delete marks the specified manifest ID for deletion. func (m *Manager) Delete(ctx context.Context, id string) error { if err := m.ensureInitialized(ctx); err != nil { diff --git a/manifest/manifest_manager_test.go b/manifest/manifest_manager_test.go index 16b6c9f0f..a2d2a5756 100644 --- a/manifest/manifest_manager_test.go +++ b/manifest/manifest_manager_test.go @@ -123,6 +123,80 @@ func TestManifest(t *testing.T) { verifyItemNotFound(ctx, t, mgr3, id3) } +func TestManifestInitCorruptedBlock(t *testing.T) { + ctx := context.Background() + data := map[string][]byte{} + st := storagetesting.NewMapStorage(data, nil, nil) + + f := block.FormattingOptions{ + BlockFormat: "UNENCRYPTED_HMAC_SHA256_128", + MaxPackSize: 100000, + } + + // write some data to storage + bm, err := block.NewManager(ctx, st, f, block.CachingOptions{}) + if err != nil { + t.Fatalf("err: %v", err) + } + + mgr, err := NewManager(ctx, bm) + if err != nil { + t.Fatalf("err: %v", err) + } + + mgr.Put(ctx, map[string]string{"type": "foo"}, map[string]string{"some": "value"}) + mgr.Flush(ctx) + bm.Flush(ctx) + + // corrupt data at the storage level. + for k, v := range data { + if strings.HasPrefix(k, "p") { + for i := 0; i < len(v); i++ { + v[i] ^= 1 + } + } + } + + // make a new block manager based on corrupted data. + bm, err = block.NewManager(ctx, st, f, block.CachingOptions{}) + if err != nil { + t.Fatalf("err: %v", err) + } + + mgr, err = NewManager(ctx, bm) + if err != nil { + t.Fatalf("err: %v", err) + } + + cases := []struct { + desc string + f func() error + }{ + {"GetRaw", func() error { _, err := mgr.GetRaw(ctx, "anything"); return err }}, + {"GetMetadata", func() error { _, err := mgr.GetMetadata(ctx, "anything"); return err }}, + {"Get", func() error { return mgr.Get(ctx, "anything", nil) }}, + {"Delete", func() error { return mgr.Delete(ctx, "anything") }}, + {"Find", func() error { _, err := mgr.Find(ctx, nil); return err }}, + {"Put", func() error { + _, err := mgr.Put(ctx, map[string]string{ + "type": "foo", + }, map[string]string{ + "some": "value", + }) + return err + }}, + } + + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + err := tc.f() + if err == nil || !strings.Contains(err.Error(), "invalid checksum") { + t.Errorf("invalid error when initializing malformed manifest manager: %v", err) + } + }) + } +} + func addAndVerify(ctx context.Context, t *testing.T, mgr *Manager, labels map[string]string, data map[string]int) string { t.Helper() id, err := mgr.Put(ctx, labels, data) From 160d02a6eb1bedbc6899500126514419d819fbdf Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sun, 23 Dec 2018 11:59:17 -0800 Subject: [PATCH 44/74] block cache: fixed a bug where we remove wrong item from the cache, added tests --- block/block_cache.go | 38 +++++++++++++++------------ block/block_cache_test.go | 48 ++++++++++++++++++++++++++++++---- internal/storagetesting/map.go | 15 +++++++++++ 3 files changed, 79 insertions(+), 22 deletions(-) diff --git a/block/block_cache.go b/block/block_cache.go index d3e8c174e..dd2609081 100644 --- a/block/block_cache.go +++ b/block/block_cache.go @@ -14,15 +14,17 @@ ) const ( - sweepCacheFrequency = 1 * time.Minute - touchThreshold = 10 * time.Minute + defaultSweepFrequency = 1 * time.Minute + defaultTouchThreshold = 10 * time.Minute ) type blockCache struct { - st storage.Storage - cacheStorage storage.Storage - maxSizeBytes int64 - hmacSecret []byte + st storage.Storage + cacheStorage storage.Storage + maxSizeBytes int64 + hmacSecret []byte + sweepFrequency time.Duration + touchThreshold time.Duration mu sync.Mutex lastTotalSizeBytes int64 @@ -75,7 +77,7 @@ func (c *blockCache) readAndVerifyCacheBlock(ctx context.Context, cacheKey strin b, err = verifyAndStripHMAC(b, c.hmacSecret) if err == nil { if t, ok := c.cacheStorage.(blockToucher); ok { - t.TouchBlock(ctx, cacheKey, touchThreshold) //nolint:errcheck + t.TouchBlock(ctx, cacheKey, c.touchThreshold) //nolint:errcheck } // retrieved from cache and HMAC valid @@ -103,7 +105,7 @@ func (c *blockCache) sweepDirectoryPeriodically(ctx context.Context) { case <-c.closed: return - case <-time.After(sweepCacheFrequency): + case <-time.After(c.sweepFrequency): err := c.sweepDirectory(ctx) if err != nil { log.Warningf("blockCache sweep failed: %v", err) @@ -156,8 +158,8 @@ func (c *blockCache) sweepDirectory(ctx context.Context) (err error) { if totalRetainedSize > c.maxSizeBytes { oldest := heap.Pop(&h).(storage.BlockMetadata) - if delerr := c.cacheStorage.DeleteBlock(ctx, it.BlockID); delerr != nil { - log.Warningf("unable to remove %v: %v", it.BlockID, delerr) + if delerr := c.cacheStorage.DeleteBlock(ctx, oldest.BlockID); delerr != nil { + log.Warningf("unable to remove %v: %v", oldest.BlockID, delerr) } else { totalRetainedSize -= oldest.Length } @@ -195,16 +197,18 @@ func newBlockCache(ctx context.Context, st storage.Storage, caching CachingOptio } } - return newBlockCacheWithCacheStorage(ctx, st, cacheStorage, caching) + return newBlockCacheWithCacheStorage(ctx, st, cacheStorage, caching, defaultTouchThreshold, defaultSweepFrequency) } -func newBlockCacheWithCacheStorage(ctx context.Context, st, cacheStorage storage.Storage, caching CachingOptions) (*blockCache, error) { +func newBlockCacheWithCacheStorage(ctx context.Context, st, cacheStorage storage.Storage, caching CachingOptions, touchThreshold time.Duration, sweepFrequency time.Duration) (*blockCache, error) { c := &blockCache{ - st: st, - cacheStorage: cacheStorage, - maxSizeBytes: caching.MaxCacheSizeBytes, - hmacSecret: append([]byte(nil), caching.HMACSecret...), - closed: make(chan struct{}), + st: st, + cacheStorage: cacheStorage, + maxSizeBytes: caching.MaxCacheSizeBytes, + hmacSecret: append([]byte(nil), caching.HMACSecret...), + closed: make(chan struct{}), + touchThreshold: touchThreshold, + sweepFrequency: sweepFrequency, } if err := c.sweepDirectory(ctx); err != nil { diff --git a/block/block_cache_test.go b/block/block_cache_test.go index 9479a4de4..5e0e71da7 100644 --- a/block/block_cache_test.go +++ b/block/block_cache_test.go @@ -1,6 +1,7 @@ package block import ( + "bytes" "context" "fmt" "io/ioutil" @@ -8,6 +9,7 @@ "reflect" "sort" "testing" + "time" "github.com/kopia/repo/internal/storagetesting" "github.com/kopia/repo/storage" @@ -18,20 +20,56 @@ func newUnderlyingStorageForBlockCacheTesting() storage.Storage { data := map[string][]byte{} st := storagetesting.NewMapStorage(data, nil, nil) st.PutBlock(ctx, "block-1", []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + st.PutBlock(ctx, "block-4k", bytes.Repeat([]byte{1, 2, 3, 4}, 1000)) // 4000 bytes return st } -func TestInMemoryBlockCache(t *testing.T) { +func TestCacheExpiration(t *testing.T) { cacheData := map[string][]byte{} cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil) - cache, err := newBlockCacheWithCacheStorage(context.Background(), newUnderlyingStorageForBlockCacheTesting(), cacheStorage, CachingOptions{ + underlyingStorage := newUnderlyingStorageForBlockCacheTesting() + + cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, cacheStorage, CachingOptions{ MaxCacheSizeBytes: 10000, - }) + }, 0, 500*time.Millisecond) if err != nil { t.Fatalf("err: %v", err) } - verifyBlockCache(t, cache) + defer cache.close() + + ctx := context.Background() + cache.getContentBlock(ctx, "00000a", "block-4k", 0, -1) // 4k + cache.getContentBlock(ctx, "00000b", "block-4k", 0, -1) // 4k + cache.getContentBlock(ctx, "00000c", "block-4k", 0, -1) // 4k + cache.getContentBlock(ctx, "00000d", "block-4k", 0, -1) // 4k + + // wait for a sweep + time.Sleep(2 * time.Second) + + // 00000a and 00000b will be removed from cache because it's the oldest. + // to verify, let's remove block-4k from the underlying storage and make sure we can still read + // 00000c and 00000d from the cache but not 00000a nor 00000b + underlyingStorage.DeleteBlock(ctx, "block-4k") + + cases := []struct { + block string + expectedError error + }{ + {"00000a", storage.ErrBlockNotFound}, + {"00000b", storage.ErrBlockNotFound}, + {"00000c", nil}, + {"00000d", nil}, + } + + for _, tc := range cases { + _, got := cache.getContentBlock(ctx, tc.block, "block-4k", 0, -1) + if want := tc.expectedError; got != want { + t.Errorf("unexpected error when getting block %v: %v wanted %v", tc.block, got, want) + } else { + t.Logf("got correct error %v when reading block %v", tc.expectedError, tc.block) + } + } } func TestDiskBlockCache(t *testing.T) { @@ -51,12 +89,12 @@ func TestDiskBlockCache(t *testing.T) { if err != nil { t.Fatalf("err: %v", err) } + defer cache.close() verifyBlockCache(t, cache) } func verifyBlockCache(t *testing.T, cache *blockCache) { ctx := context.Background() - defer cache.close() t.Run("GetContentBlock", func(t *testing.T) { cases := []struct { diff --git a/internal/storagetesting/map.go b/internal/storagetesting/map.go index 5e303e53e..ed01aa415 100644 --- a/internal/storagetesting/map.go +++ b/internal/storagetesting/map.go @@ -61,6 +61,7 @@ func (s *mapStorage) DeleteBlock(ctx context.Context, id string) error { defer s.mutex.Unlock() delete(s.data, id) + delete(s.keyTime, id) return nil } @@ -100,6 +101,20 @@ func (s *mapStorage) Close(ctx context.Context) error { return nil } +func (s *mapStorage) TouchBlock(ctx context.Context, blockID string, threshold time.Duration) error { + s.mutex.Lock() + defer s.mutex.Unlock() + + if v, ok := s.keyTime[blockID]; ok { + n := s.timeNow() + if n.Sub(v) >= threshold { + s.keyTime[blockID] = n + } + } + + return nil +} + func (s *mapStorage) ConnectionInfo() storage.ConnectionInfo { // unsupported return storage.ConnectionInfo{} From c4eb75481838d9ebf2d8dcf18ce4f37c0f08c518 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sun, 23 Dec 2018 18:23:42 -0800 Subject: [PATCH 45/74] block cache: additional tests for cache faults --- block/block_cache_test.go | 118 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/block/block_cache_test.go b/block/block_cache_test.go index 5e0e71da7..ce9a54677 100644 --- a/block/block_cache_test.go +++ b/block/block_cache_test.go @@ -3,11 +3,13 @@ import ( "bytes" "context" + "errors" "fmt" "io/ioutil" "os" "reflect" "sort" + "strings" "testing" "time" @@ -154,6 +156,122 @@ func verifyBlockCache(t *testing.T, cache *blockCache) { }) } +func TestCacheFailureToOpen(t *testing.T) { + someError := errors.New("some error") + + cacheData := map[string][]byte{} + cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil) + underlyingStorage := newUnderlyingStorageForBlockCacheTesting() + faultyCache := &storagetesting.FaultyStorage{ + Base: cacheStorage, + Faults: map[string][]*storagetesting.Fault{ + "ListBlocks": { + {Err: someError}, + }, + }, + } + + // Will fail because of ListBlocks failure. + cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{ + MaxCacheSizeBytes: 10000, + }, 0, 5*time.Hour) + if err == nil || !strings.Contains(err.Error(), someError.Error()) { + t.Errorf("invalid error %v, wanted: %v", err, someError) + } + + // ListBlocks fails only once, next time it succeeds. + cache, err = newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{ + MaxCacheSizeBytes: 10000, + }, 0, 100*time.Millisecond) + if err != nil { + t.Fatalf("err: %v", err) + } + + defer cache.close() +} + +func TestCacheFailureToWrite(t *testing.T) { + someError := errors.New("some error") + + cacheData := map[string][]byte{} + cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil) + underlyingStorage := newUnderlyingStorageForBlockCacheTesting() + faultyCache := &storagetesting.FaultyStorage{ + Base: cacheStorage, + } + + cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{ + MaxCacheSizeBytes: 10000, + }, 0, 5*time.Hour) + if err != nil { + t.Fatalf("err: %v", err) + } + + defer cache.close() + + ctx := context.Background() + faultyCache.Faults = map[string][]*storagetesting.Fault{ + "PutBlock": { + {Err: someError}, + }, + } + + v, err := cache.getContentBlock(ctx, "aa", "block-1", 0, 3) + if err != nil { + t.Errorf("write failure wasn't ignored: %v", err) + } + + if got, want := v, []byte{1, 2, 3}; !reflect.DeepEqual(got, want) { + t.Errorf("unexpected value retrieved from cache: %v, want: %v", got, want) + } + + all, err := storage.ListAllBlocks(ctx, cacheStorage, "") + if err != nil { + t.Errorf("error listing cache: %v", err) + } + if len(all) != 0 { + t.Errorf("invalid test - cache was written") + } +} + +func TestCacheFailureToRead(t *testing.T) { + someError := errors.New("some error") + + cacheData := map[string][]byte{} + cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil) + underlyingStorage := newUnderlyingStorageForBlockCacheTesting() + faultyCache := &storagetesting.FaultyStorage{ + Base: cacheStorage, + } + + cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{ + MaxCacheSizeBytes: 10000, + }, 0, 5*time.Hour) + if err != nil { + t.Fatalf("err: %v", err) + } + + defer cache.close() + + ctx := context.Background() + faultyCache.Faults = map[string][]*storagetesting.Fault{ + "GetBlock": { + {Err: someError, Repeat: 100}, + }, + } + + for i := 0; i < 2; i++ { + v, err := cache.getContentBlock(ctx, "aa", "block-1", 0, 3) + if err != nil { + t.Errorf("read failure wasn't ignored: %v", err) + } + + if got, want := v, []byte{1, 2, 3}; !reflect.DeepEqual(got, want) { + t.Errorf("unexpected value retrieved from cache: %v, want: %v", got, want) + } + } +} + func verifyStorageBlockList(t *testing.T, st storage.Storage, expectedBlocks ...string) { t.Helper() var foundBlocks []string From afad2ad237419c4c5d6f39286112dbf016ebf645 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sun, 23 Dec 2018 20:53:44 -0800 Subject: [PATCH 46/74] repo: moved repository_test to top level --- tests/repository_test/repository_test.go => repository_test.go | 2 +- tests/repository_test/repository.go | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) rename tests/repository_test/repository_test.go => repository_test.go (99%) delete mode 100644 tests/repository_test/repository.go diff --git a/tests/repository_test/repository_test.go b/repository_test.go similarity index 99% rename from tests/repository_test/repository_test.go rename to repository_test.go index 29fb036ae..fb7afb708 100644 --- a/tests/repository_test/repository_test.go +++ b/repository_test.go @@ -1,4 +1,4 @@ -package repository_test +package repo_test import ( "bytes" diff --git a/tests/repository_test/repository.go b/tests/repository_test/repository.go deleted file mode 100644 index 8fb601007..000000000 --- a/tests/repository_test/repository.go +++ /dev/null @@ -1,3 +0,0 @@ -package repository - -// dummy package From 641b25a508518e2ee630f84c8736fbf27df453d3 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sun, 23 Dec 2018 21:12:53 -0800 Subject: [PATCH 47/74] repo: added tests for Disconnect() --- internal/repotesting/repotesting.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/internal/repotesting/repotesting.go b/internal/repotesting/repotesting.go index cfb1d879b..5acb59c4d 100644 --- a/internal/repotesting/repotesting.go +++ b/internal/repotesting/repotesting.go @@ -25,6 +25,7 @@ type Environment struct { configDir string storageDir string + connected bool } // Setup sets up a test environment. @@ -77,6 +78,8 @@ func (e *Environment) Setup(t *testing.T, opts ...func(*repo.NewRepositoryOption t.Fatalf("can't connect: %v", err) } + e.connected = true + e.Repository, err = repo.Open(ctx, e.configFile(), masterPassword, &repo.Options{}) if err != nil { t.Fatalf("can't open: %v", err) @@ -90,8 +93,13 @@ func (e *Environment) Close(t *testing.T) { if err := e.Repository.Close(context.Background()); err != nil { t.Fatalf("unable to close: %v", err) } - - if err := os.RemoveAll(e.configDir); err != nil { + if e.connected { + if err := repo.Disconnect(e.configFile()); err != nil { + t.Errorf("error disconnecting: %v", err) + } + } + if err := os.Remove(e.configDir); err != nil { + // should be empty, assuming Disconnect was successful t.Errorf("error removing config directory: %v", err) } if err := os.RemoveAll(e.storageDir); err != nil { From 53b4a9abd23a1ac986ba47e2169af6f30ac001e2 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sun, 23 Dec 2018 23:04:12 -0800 Subject: [PATCH 48/74] jsonstream: added test cases --- internal/jsonstream/stream_test.go | 46 ++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/internal/jsonstream/stream_test.go b/internal/jsonstream/stream_test.go index 32ccf25fc..6beff5f0c 100644 --- a/internal/jsonstream/stream_test.go +++ b/internal/jsonstream/stream_test.go @@ -115,3 +115,49 @@ func TestInvalidHeader(t *testing.T) { t.Errorf("got incorrect error: %v", err) } } + +func TestInvalidStream(t *testing.T) { + cases := []string{ + `x`, + `{}`, + `{"not-stream":"hdr"}`, + `{{}}`, + `{"stream":"non-hdr"}`, + `{"stream":"hdr","nonEntries":[]}`, + `{"stream":"hdr","entries":{}}`, + `{"stream":"hdr","entries":[]}`, + `{"stream":"hdr","entries":[`, + `{"stream":"hdr","entries":[}`, + `{"stream":"hdr","entries":[]`, + `{"stream":"hdr","entries":[],"summary"`, + `{"stream":"hdr","entries":[],1.222.33`, + `{"stream":"hdr","entries":[],"sxummary":{"x":"1",`, + } + + for _, tc := range cases { + r, err := NewReader(strings.NewReader(tc), "hdr", nil) + if err != nil { + if !isInvalidStream(err) { + t.Errorf("got invalid error when creating reader: %v", err) + } + continue + } + + for { + v := map[string]interface{}{} + if err := r.Read(v); err != nil { + if err == io.EOF { + break + } + if !isInvalidStream(err) { + t.Errorf("got invalid error when creating reader: %v", err) + } + break + } + } + } +} + +func isInvalidStream(e error) bool { + return e != nil && strings.Contains(e.Error(), "invalid stream format") +} From 7d03d82cd1bf4c51ee46304d0b4da5f1fa63b22f Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sun, 23 Dec 2018 23:20:29 -0800 Subject: [PATCH 49/74] retry: added tests for retry loop --- internal/retry/retry.go | 2 +- internal/retry/retry_test.go | 60 ++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 internal/retry/retry_test.go diff --git a/internal/retry/retry.go b/internal/retry/retry.go index 3bcb53b5c..24e7036b2 100644 --- a/internal/retry/retry.go +++ b/internal/retry/retry.go @@ -10,7 +10,7 @@ var log = repologging.Logger("repo/retry") -const ( +var ( maxAttempts = 10 retryInitialSleepAmount = 1 * time.Second retryMaxSleepAmount = 32 * time.Second diff --git a/internal/retry/retry_test.go b/internal/retry/retry_test.go new file mode 100644 index 000000000..ae7af310a --- /dev/null +++ b/internal/retry/retry_test.go @@ -0,0 +1,60 @@ +package retry + +import ( + "errors" + "fmt" + "reflect" + "testing" + "time" +) + +var ( + errRetriable = errors.New("retriable") + errNonRetriable = errors.New("non-retriable") +) + +func isRetriable(e error) bool { + return e == errRetriable +} + +func TestRetry(t *testing.T) { + retryInitialSleepAmount = 10 * time.Millisecond + retryMaxSleepAmount = 20 * time.Millisecond + maxAttempts = 3 + + cnt := 0 + + cases := []struct { + desc string + f func() (interface{}, error) + want interface{} + wantError error + }{ + {"success-nil", func() (interface{}, error) { return nil, nil }, nil, nil}, + {"success", func() (interface{}, error) { return 3, nil }, 3, nil}, + {"retriable-succeeds", func() (interface{}, error) { + cnt++ + if cnt < 2 { + return nil, errRetriable + } + return 4, nil + }, 4, nil}, + {"retriable-never-succeeds", func() (interface{}, error) { return nil, errRetriable }, nil, fmt.Errorf("unable to complete retriable-never-succeeds despite 3 retries")}, + } + + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + tc := tc + t.Parallel() + + got, err := WithExponentialBackoff(tc.desc, tc.f, isRetriable) + if !reflect.DeepEqual(err, tc.wantError) { + t.Errorf("invalid error %q, wanted %q", err, tc.wantError) + } + + if got != tc.want { + t.Errorf("invalid value %v, wanted %v", got, tc.want) + } + }) + } +} From 86d9df8ad637c5ad8c662be004b2de2d0ecbd65b Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sun, 23 Dec 2018 23:34:18 -0800 Subject: [PATCH 50/74] makefile: filter out *testing/ packages from code coverage results --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 64a419a63..855c31712 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,8 @@ lint: gometalinter.v2 ./... test: - GO111MODULE=on go test -tags test -count=1 -coverprofile=tmp.cov --coverpkg ./... -timeout 90s ./... + GO111MODULE=on go test -tags test -count=1 -coverprofile=raw.cov --coverpkg ./... -timeout 90s ./... + grep -v testing/ raw.cov > tmp.cov upload-coverage: goveralls -service=travis-ci -coverprofile=tmp.cov From 704dcd3e46050758bc73c8f3f0d4e534238b0dc5 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sun, 23 Dec 2018 23:36:46 -0800 Subject: [PATCH 51/74] travis: use latest point release of Go 1.11 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1e1b96740..ada3a2350 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: go go: -- '1.11' +- '1.11.x' os: - linux before_install: From b042d0098539512ea0c04482150aa6c2ac6950b2 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 24 Dec 2018 08:24:00 -0800 Subject: [PATCH 52/74] object: dead code --- object/object_manager.go | 8 +------- object/object_writer.go | 2 -- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/object/object_manager.go b/object/object_manager.go index c85ed783c..0ebe9538e 100644 --- a/object/object_manager.go +++ b/object/object_manager.go @@ -46,19 +46,13 @@ type Manager struct { // NewWriter creates an ObjectWriter for writing to the repository. func (om *Manager) NewWriter(ctx context.Context, opt WriterOptions) Writer { - w := &objectWriter{ + return &objectWriter{ ctx: ctx, repo: om, splitter: om.newSplitter(), description: opt.Description, prefix: opt.Prefix, } - - if opt.splitter != nil { - w.splitter = opt.splitter - } - - return w } // Open creates new ObjectReader for reading given object from a repository. diff --git a/object/object_writer.go b/object/object_writer.go index b8f4ddba7..4b40ec4f7 100644 --- a/object/object_writer.go +++ b/object/object_writer.go @@ -143,6 +143,4 @@ func (w *objectWriter) Result() (ID, error) { type WriterOptions struct { Description string Prefix string // empty string or a single-character ('g'..'z') - - splitter objectSplitter } From e5af47227c58b7a012ac0b81e590eacd5a8806a4 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 24 Dec 2018 10:35:02 -0800 Subject: [PATCH 53/74] manifest: removed manifest.EntryIDs --- manifest/manifest_entry.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/manifest/manifest_entry.go b/manifest/manifest_entry.go index 09778b4e3..cc2ead40f 100644 --- a/manifest/manifest_entry.go +++ b/manifest/manifest_entry.go @@ -10,12 +10,3 @@ type EntryMetadata struct { Labels map[string]string ModTime time.Time } - -// EntryIDs returns the list of IDs for the provided list of EntryMetadata. -func EntryIDs(entries []*EntryMetadata) []string { - var ids []string - for _, e := range entries { - ids = append(ids, e.ID) - } - return ids -} From 6e8a48ed4f7197ceb5b98959c248199259125953 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 29 Dec 2018 15:08:29 -0800 Subject: [PATCH 54/74] Revamped hash and encryption algorithm handling. This enables many more combinations of hash and encryption algorithm to be selected, some with dramatically better performance. --- block/block_formatter.go | 232 ++++++++++++++++++++-------- block/block_formatter_test.go | 70 +++++---- block/block_formatting_options.go | 12 +- block/block_index_recovery.go | 2 +- block/block_manager.go | 92 +++++++++-- block/block_manager_test.go | 16 +- initialize.go | 12 +- internal/repotesting/repotesting.go | 4 +- manifest/manifest_manager_test.go | 8 +- repository_test.go | 2 +- tests/stress_test/stress_test.go | 8 +- 11 files changed, 319 insertions(+), 139 deletions(-) diff --git a/block/block_formatter.go b/block/block_formatter.go index cc7297bdf..23a46fe41 100644 --- a/block/block_formatter.go +++ b/block/block_formatter.go @@ -8,13 +8,23 @@ "fmt" "hash" "sort" + + "golang.org/x/crypto/blake2b" + "golang.org/x/crypto/blake2s" + "golang.org/x/crypto/salsa20" + "golang.org/x/crypto/sha3" + + "golang.org/x/crypto/ripemd160" ) -// Formatter performs data block ID computation and encryption of a block of data when storing object in a repository. -type Formatter interface { - // ComputeBlockID computes ID of the storage block for the specified block of data and returns it in ObjectID. - ComputeBlockID(data []byte) []byte +// HashFunc computes hash of block of data using a cryptographic hash function, possibly with HMAC and/or truncation. +type HashFunc func(data []byte) []byte +// HashFuncFactory returns a hash function for given formatting options. +type HashFuncFactory func(o FormattingOptions) (HashFunc, error) + +// Encryptor performs encryption and decryption of blocks of data. +type Encryptor interface { // Encrypt returns encrypted bytes corresponding to the given plaintext. Must not clobber the input slice. Encrypt(plainText []byte, blockID []byte) ([]byte, error) @@ -22,48 +32,39 @@ type Formatter interface { Decrypt(cipherText []byte, blockID []byte) ([]byte, error) } -// digestFunction computes the digest (hash, optionally HMAC) of a given block of bytes. -type digestFunction func([]byte) []byte +// EncryptorFactory creates new Encryptor for given FormattingOptions +type EncryptorFactory func(o FormattingOptions) (Encryptor, error) -// unencryptedFormat implements non-encrypted format. -type unencryptedFormat struct { - digestFunc digestFunction +var hashFunctions = map[string]HashFuncFactory{} +var encryptors = map[string]EncryptorFactory{} + +// nullEncryptor implements non-encrypted format. +type nullEncryptor struct { } -func (fi *unencryptedFormat) ComputeBlockID(data []byte) []byte { - return fi.digestFunc(data) -} - -func (fi *unencryptedFormat) Encrypt(plainText []byte, blockID []byte) ([]byte, error) { +func (fi nullEncryptor) Encrypt(plainText []byte, blockID []byte) ([]byte, error) { return cloneBytes(plainText), nil } -func (fi *unencryptedFormat) Decrypt(cipherText []byte, blockID []byte) ([]byte, error) { +func (fi nullEncryptor) Decrypt(cipherText []byte, blockID []byte) ([]byte, error) { return cloneBytes(cipherText), nil } -// syntheticIVEncryptionFormat implements encrypted format with single master AES key and StorageBlock==IV that's -// derived from HMAC-SHA256(content, secret). -type syntheticIVEncryptionFormat struct { - digestFunc digestFunction - createCipher func(key []byte) (cipher.Block, error) - aesKey []byte +// ctrEncryptor implements encrypted format which uses CTR mode of a block cipher with nonce==IV. +type ctrEncryptor struct { + createCipher func() (cipher.Block, error) } -func (fi *syntheticIVEncryptionFormat) ComputeBlockID(data []byte) []byte { - return fi.digestFunc(data) +func (fi ctrEncryptor) Encrypt(plainText []byte, blockID []byte) ([]byte, error) { + return symmetricEncrypt(fi.createCipher, blockID, plainText) } -func (fi *syntheticIVEncryptionFormat) Encrypt(plainText []byte, blockID []byte) ([]byte, error) { - return symmetricEncrypt(fi.createCipher, fi.aesKey, blockID, plainText) +func (fi ctrEncryptor) Decrypt(cipherText []byte, blockID []byte) ([]byte, error) { + return symmetricEncrypt(fi.createCipher, blockID, cipherText) } -func (fi *syntheticIVEncryptionFormat) Decrypt(cipherText []byte, blockID []byte) ([]byte, error) { - return symmetricEncrypt(fi.createCipher, fi.aesKey, blockID, cipherText) -} - -func symmetricEncrypt(createCipher func(key []byte) (cipher.Block, error), key []byte, iv []byte, b []byte) ([]byte, error) { - blockCipher, err := createCipher(key) +func symmetricEncrypt(createCipher func() (cipher.Block, error), iv []byte, b []byte) ([]byte, error) { + blockCipher, err := createCipher() if err != nil { return nil, err } @@ -74,47 +75,146 @@ func symmetricEncrypt(createCipher func(key []byte) (cipher.Block, error), key [ return result, nil } -// SupportedFormats is a list of supported object formats including: -// -// UNENCRYPTED_HMAC_SHA256_128 - unencrypted, block IDs are 128-bit (32 characters long) -// UNENCRYPTED_HMAC_SHA256 - unencrypted, block IDs are 256-bit (64 characters long) -// ENCRYPTED_HMAC_SHA256_AES256_SIV - encrypted with AES-256 (shared key), IV==FOLD(HMAC-SHA256(content), 128) -var SupportedFormats []string +type salsaEncryptor struct { + nonceSize int + key *[32]byte +} -// FormatterFactories maps known block formatters to their factory functions. -var FormatterFactories map[string]func(f FormattingOptions) (Formatter, error) +func (s salsaEncryptor) Decrypt(input []byte, blockID []byte) ([]byte, error) { + return s.encryptDecrypt(input, blockID) +} + +func (s salsaEncryptor) Encrypt(input []byte, blockID []byte) ([]byte, error) { + return s.encryptDecrypt(input, blockID) +} + +func (s salsaEncryptor) encryptDecrypt(input []byte, blockID []byte) ([]byte, error) { + if len(blockID) < s.nonceSize { + return nil, fmt.Errorf("hash too short, expected >=%v bytes, got %v", s.nonceSize, len(blockID)) + } + result := make([]byte, len(input)) + nonce := blockID[0:s.nonceSize] + salsa20.XORKeyStream(result, input, nonce, s.key) + return result, nil +} + +// truncatedHMACHashFuncFactory returns a HashFuncFactory that computes HMAC(hash, secret) of a given block of bytes +// and truncates results to the given size. +func truncatedHMACHashFuncFactory(hf func() hash.Hash, truncate int) HashFuncFactory { + return func(o FormattingOptions) (HashFunc, error) { + return func(b []byte) []byte { + h := hmac.New(hf, o.HMACSecret) + h.Write(b) // nolint:errcheck + return h.Sum(nil)[0:truncate] + }, nil + } +} + +// truncatedKeyedHashFuncFactory returns a HashFuncFactory that computes keyed hash of a given block of bytes +// and truncates results to the given size. +func truncatedKeyedHashFuncFactory(hf func(key []byte) (hash.Hash, error), truncate int) HashFuncFactory { + return func(o FormattingOptions) (HashFunc, error) { + if _, err := hf(o.HMACSecret); err != nil { + return nil, err + } + + return func(b []byte) []byte { + h, _ := hf(o.HMACSecret) + h.Write(b) // nolint:errcheck + return h.Sum(nil)[0:truncate] + }, nil + } +} + +// newCTREncryptorFactory returns new EncryptorFactory that uses CTR with symmetric encryption (such as AES) and a given key size. +func newCTREncryptorFactory(keySize int, createCipherWithKey func(key []byte) (cipher.Block, error)) EncryptorFactory { + return func(o FormattingOptions) (Encryptor, error) { + key, err := adjustKey(o.MasterKey, keySize) + if err != nil { + return nil, fmt.Errorf("unable to get encryption key: %v", err) + } + + return ctrEncryptor{ + createCipher: func() (cipher.Block, error) { + return createCipherWithKey(key) + }, + }, nil + } +} + +// RegisterHash registers a hash function with a given name. +func RegisterHash(name string, newHashFunc HashFuncFactory) { + hashFunctions[name] = newHashFunc +} + +func SupportedHashAlgorithms() []string { + var result []string + for k := range hashFunctions { + result = append(result, k) + } + sort.Strings(result) + return result +} + +func SupportedEncryptionAlgorithms() []string { + var result []string + for k := range encryptors { + result = append(result, k) + } + sort.Strings(result) + return result +} + +// RegisterEncryption registers new encryption algorithm. +func RegisterEncryption(name string, newEncryptor EncryptorFactory) { + encryptors[name] = newEncryptor +} + +// DefaultHash is the name of the default hash algorithm. +const DefaultHash = "HMAC-SHA256-128" + +// DefaultEncryption is the name of the default encryption algorithm. +const DefaultEncryption = "AES-256-CTR" func init() { - FormatterFactories = map[string]func(f FormattingOptions) (Formatter, error){ - "UNENCRYPTED_HMAC_SHA256": func(f FormattingOptions) (Formatter, error) { - return &unencryptedFormat{computeHMAC(sha256.New, f.HMACSecret, sha256.Size)}, nil - }, - "UNENCRYPTED_HMAC_SHA256_128": func(f FormattingOptions) (Formatter, error) { - return &unencryptedFormat{computeHMAC(sha256.New, f.HMACSecret, 16)}, nil - }, - "ENCRYPTED_HMAC_SHA256_AES256_SIV": func(f FormattingOptions) (Formatter, error) { - if len(f.MasterKey) < 32 { - return nil, fmt.Errorf("master key is not set") - } - return &syntheticIVEncryptionFormat{computeHMAC(sha256.New, f.HMACSecret, aes.BlockSize), aes.NewCipher, f.MasterKey}, nil - }, - } + RegisterHash("HMAC-SHA256", truncatedHMACHashFuncFactory(sha256.New, 32)) + RegisterHash("HMAC-SHA256-128", truncatedHMACHashFuncFactory(sha256.New, 16)) + RegisterHash("HMAC-SHA224", truncatedHMACHashFuncFactory(sha256.New224, 28)) + RegisterHash("HMAC-RIPEMD-160", truncatedHMACHashFuncFactory(ripemd160.New, 20)) + RegisterHash("HMAC-SHA3-224", truncatedHMACHashFuncFactory(sha3.New224, 28)) + RegisterHash("HMAC-SHA3-256", truncatedHMACHashFuncFactory(sha3.New256, 32)) - for formatName := range FormatterFactories { - SupportedFormats = append(SupportedFormats, formatName) - } + RegisterHash("BLAKE2S-128", truncatedKeyedHashFuncFactory(blake2s.New128, 16)) + RegisterHash("BLAKE2S-256", truncatedKeyedHashFuncFactory(blake2s.New256, 32)) + RegisterHash("BLAKE2B-256-128", truncatedKeyedHashFuncFactory(blake2b.New256, 16)) + RegisterHash("BLAKE2B-256", truncatedKeyedHashFuncFactory(blake2b.New256, 32)) - sort.Strings(SupportedFormats) + RegisterEncryption("NONE", func(f FormattingOptions) (Encryptor, error) { + return nullEncryptor{}, nil + }) + RegisterEncryption("AES-128-CTR", newCTREncryptorFactory(16, aes.NewCipher)) + RegisterEncryption("AES-192-CTR", newCTREncryptorFactory(24, aes.NewCipher)) + RegisterEncryption("AES-256-CTR", newCTREncryptorFactory(32, aes.NewCipher)) + RegisterEncryption("SALSA20", func(f FormattingOptions) (Encryptor, error) { + var k [32]byte + copy(k[:], f.MasterKey[0:32]) + return salsaEncryptor{8, &k}, nil + }) + RegisterEncryption("XSALSA20", func(f FormattingOptions) (Encryptor, error) { + var k [32]byte + copy(k[:], f.MasterKey[0:32]) + return salsaEncryptor{24, &k}, nil + }) } -// DefaultFormat is the block format that should be used by default when creating new repositories. -const DefaultFormat = "ENCRYPTED_HMAC_SHA256_AES256_SIV" - -// computeHMAC returns a digestFunction that computes HMAC(hash, secret) of a given block of bytes and truncates results to the given size. -func computeHMAC(hf func() hash.Hash, secret []byte, truncate int) digestFunction { - return func(b []byte) []byte { - h := hmac.New(hf, secret) - h.Write(b) // nolint:errcheck - return h.Sum(nil)[0:truncate] +func adjustKey(masterKey []byte, desiredKeySize int) ([]byte, error) { + if len(masterKey) == desiredKeySize { + return masterKey, nil } + + if desiredKeySize < len(masterKey) { + return masterKey[0:desiredKeySize], nil + } + + return nil, fmt.Errorf("required key too long %v, but only have %v", desiredKeySize, len(masterKey)) } diff --git a/block/block_formatter_test.go b/block/block_formatter_test.go index 59bcbdafe..5d4b2c83c 100644 --- a/block/block_formatter_test.go +++ b/block/block_formatter_test.go @@ -2,47 +2,61 @@ import ( "bytes" - "crypto/rand" "crypto/sha1" + "math/rand" "testing" ) +// combinations of hash and encryption that are not compatible. +var incompatibleAlgorithms = map[string]string{ + "BLAKE2B-256-128/XSALSA20": "invalid encryptor: hash too short, expected >=24 bytes, got 16", + "BLAKE2S-128/XSALSA20": "invalid encryptor: hash too short, expected >=24 bytes, got 16", + "HMAC-RIPEMD-160/XSALSA20": "invalid encryptor: hash too short, expected >=24 bytes, got 20", + "HMAC-SHA256-128/XSALSA20": "invalid encryptor: hash too short, expected >=24 bytes, got 16", +} + func TestFormatters(t *testing.T) { secret := []byte("secret") - f := FormattingOptions{HMACSecret: secret, MasterKey: make([]byte, 32)} - for k, v := range FormatterFactories { - data := make([]byte, 100) - rand.Read(data) + data := make([]byte, 100) + rand.Read(data) + h0 := sha1.Sum(data) - h0 := sha1.Sum(data) + for _, hashAlgo := range SupportedHashAlgorithms() { + for _, encryptionAlgo := range SupportedEncryptionAlgorithms() { + h, e, err := CreateHashAndEncryptor(FormattingOptions{ + HMACSecret: secret, + MasterKey: make([]byte, 32), + Hash: hashAlgo, + Encryption: encryptionAlgo, + }) - of, err := v(f) - if err != nil { - t.Errorf("error creating object formatter for %v: %v", k, err) - continue - } + if err != nil { + key := hashAlgo + "/" + encryptionAlgo + errmsg := incompatibleAlgorithms[key] + if err.Error() == errmsg { + continue + } + t.Errorf("Algorithm %v not marked as incompatible and failed with %v", key, err) + continue + } - t.Logf("testing %v", k) - blockID := of.ComputeBlockID(data) - cipherText, err := of.Encrypt(data, blockID) - if err != nil || cipherText == nil { - t.Errorf("invalid response from Encrypt: %v %v", cipherText, err) - } + blockID := h(data) + cipherText, err := e.Encrypt(data, blockID) + if err != nil || cipherText == nil { + t.Errorf("invalid response from Encrypt: %v %v", cipherText, err) + } - plainText, err := of.Decrypt(cipherText, blockID) - if err != nil || plainText == nil { - t.Errorf("invalid response from Decrypt: %v %v", plainText, err) - } + plainText, err := e.Decrypt(cipherText, blockID) + if err != nil || plainText == nil { + t.Errorf("invalid response from Decrypt: %v %v", plainText, err) + } - h1 := sha1.Sum(plainText) + h1 := sha1.Sum(plainText) - if !bytes.Equal(h0[:], h1[:]) { - t.Errorf("Encrypt()/Decrypt() does not round-trip: %x %x", h0, h1) - } - - if len(blockID)%16 != 0 { - t.Errorf("block ID for %v not a multiple of 16: %v", k, blockID) + if !bytes.Equal(h0[:], h1[:]) { + t.Errorf("Encrypt()/Decrypt() does not round-trip: %x %x", h0, h1) + } } } } diff --git a/block/block_formatting_options.go b/block/block_formatting_options.go index 674c454e3..547353880 100644 --- a/block/block_formatting_options.go +++ b/block/block_formatting_options.go @@ -2,9 +2,11 @@ // FormattingOptions describes the rules for formatting blocks in repository. type FormattingOptions struct { - Version int `json:"version,omitempty"` // version number, must be "1" - BlockFormat string `json:"objectFormat,omitempty"` // identifier of the block format - HMACSecret []byte `json:"secret,omitempty"` // HMAC secret used to generate encryption keys - MasterKey []byte `json:"masterKey,omitempty"` // master encryption key (SIV-mode encryption only) - MaxPackSize int `json:"maxPackSize,omitempty"` // maximum size of a pack object + Version int `json:"version,omitempty"` // version number, must be "1" + LegacyBlockFormat string `json:"objectFormat,omitempty"` // identifier of the block format (legacy) + Hash string `json:"hash,omitempty"` // identifier of the hash algorithm used + Encryption string `json:"encryption,omitempty"` // identifier of the encryption algorithm used + HMACSecret []byte `json:"secret,omitempty"` // HMAC secret used to generate encryption keys + MasterKey []byte `json:"masterKey,omitempty"` // master encryption key (SIV-mode encryption only) + MaxPackSize int `json:"maxPackSize,omitempty"` // maximum size of a pack object } diff --git a/block/block_index_recovery.go b/block/block_index_recovery.go index f4c4bc231..c15f5b2d1 100644 --- a/block/block_index_recovery.go +++ b/block/block_index_recovery.go @@ -165,7 +165,7 @@ func (bm *Manager) appendPackFileIndexRecoveryData(blockData []byte, pending pac } localIndexIV := bm.hashData(localIndex) - encryptedLocalIndex, err := bm.formatter.Encrypt(localIndex, localIndexIV) + encryptedLocalIndex, err := bm.encryptor.Encrypt(localIndex, localIndexIV) if err != nil { return nil, err } diff --git a/block/block_manager.go b/block/block_manager.go index baf6f7f6f..54075f1a5 100644 --- a/block/block_manager.go +++ b/block/block_manager.go @@ -5,6 +5,7 @@ "bytes" "context" "crypto/aes" + "crypto/cipher" cryptorand "crypto/rand" "encoding/hex" "fmt" @@ -76,7 +77,8 @@ type Manager struct { writeFormatVersion int32 // format version to write maxPackSize int - formatter Formatter + hasher HashFunc + encryptor Encryptor minPreambleLength int maxPreambleLength int @@ -398,7 +400,7 @@ func (bm *Manager) maybeEncryptBlockDataForPacking(data []byte, blockID string) if err != nil { return nil, fmt.Errorf("unable to get packed block IV for %q: %v", blockID, err) } - return bm.formatter.Encrypt(data, iv) + return bm.encryptor.Encrypt(data, iv) } func appendRandomBytes(b []byte, count int) ([]byte, error) { @@ -658,7 +660,7 @@ func (bm *Manager) encryptAndWriteBlockNotLocked(ctx context.Context, data []byt // Encrypt the block in-place. atomic.AddInt64(&bm.stats.EncryptedBytes, int64(len(data))) - data2, err := bm.formatter.Encrypt(data, hash) + data2, err := bm.encryptor.Encrypt(data, hash) if err != nil { return "", err } @@ -675,7 +677,7 @@ func (bm *Manager) encryptAndWriteBlockNotLocked(ctx context.Context, data []byt func (bm *Manager) hashData(data []byte) []byte { // Hash the block and compute encryption key. - blockID := bm.formatter.ComputeBlockID(data) + blockID := bm.hasher(data) atomic.AddInt32(&bm.stats.HashedBlocks, 1) atomic.AddInt64(&bm.stats.HashedBytes, int64(len(data))) return blockID @@ -801,7 +803,7 @@ func (bm *Manager) getBlockContentsUnlocked(ctx context.Context, bi Info) ([]byt } func (bm *Manager) decryptAndVerify(encrypted []byte, iv []byte) ([]byte, error) { - decrypted, err := bm.formatter.Decrypt(encrypted, iv) + decrypted, err := bm.encryptor.Decrypt(encrypted, iv) if err != nil { return nil, err } @@ -827,7 +829,7 @@ func (bm *Manager) getPhysicalBlockInternal(ctx context.Context, blockID string) atomic.AddInt32(&bm.stats.ReadBlocks, 1) atomic.AddInt64(&bm.stats.ReadBytes, int64(len(payload))) - payload, err = bm.formatter.Decrypt(payload, iv) + payload, err = bm.encryptor.Decrypt(payload, iv) atomic.AddInt64(&bm.stats.DecryptedBytes, int64(len(payload))) if err != nil { return nil, err @@ -854,7 +856,7 @@ func getPhysicalBlockIV(s string) ([]byte, error) { } func (bm *Manager) verifyChecksum(data []byte, blockID []byte) error { - expected := bm.formatter.ComputeBlockID(data) + expected := bm.hasher(data) expected = expected[len(expected)-aes.BlockSize:] if !bytes.HasSuffix(blockID, expected) { atomic.AddInt32(&bm.stats.InvalidBlocks, 1) @@ -933,9 +935,11 @@ func newManagerWithOptions(ctx context.Context, st storage.Storage, f Formatting return nil, fmt.Errorf("can't handle repositories created using version %v (min supported %v, max supported %v)", f.Version, minSupportedReadVersion, maxSupportedReadVersion) } - formatter, err := createFormatter(f) + applyLegacyBlockFormat(&f) + + hasher, encryptor, err := CreateHashAndEncryptor(f) if err != nil { - return nil, fmt.Errorf("unable to create block formatter: %v", err) + return nil, err } blockCache, err := newBlockCache(ctx, st, caching) @@ -958,7 +962,8 @@ func newManagerWithOptions(ctx context.Context, st storage.Storage, f Formatting timeNow: timeNow, flushPackIndexesAfter: timeNow().Add(flushPackIndexTimeout), maxPackSize: f.MaxPackSize, - formatter: formatter, + encryptor: encryptor, + hasher: hasher, currentPackItems: make(map[string]Info), packIndexBuilder: make(packIndexBuilder), committedBlocks: blockIndex, @@ -983,11 +988,68 @@ func newManagerWithOptions(ctx context.Context, st storage.Storage, f Formatting return m, nil } -func createFormatter(f FormattingOptions) (Formatter, error) { - sf := FormatterFactories[f.BlockFormat] - if sf == nil { - return nil, fmt.Errorf("unsupported block format: %v", f.BlockFormat) +func CreateHashAndEncryptor(f FormattingOptions) (HashFunc, Encryptor, error) { + h, err := createHashFunc(f) + if err != nil { + return nil, nil, fmt.Errorf("unable to create hash: %v", err) + } + e, err := createEncryptor(f) + if err != nil { + return nil, nil, fmt.Errorf("unable to create encryptor: %v", err) } - return sf(f) + blockID := h(nil) + _, err = e.Encrypt(nil, blockID) + if err != nil { + return nil, nil, fmt.Errorf("invalid encryptor: %v", err) + } + + return h, e, nil +} + +func createHashFunc(f FormattingOptions) (HashFunc, error) { + h := hashFunctions[f.Hash] + if h == nil { + return nil, fmt.Errorf("unknown hash function %v", f.Hash) + } + + hashFunc, err := h(f) + if err != nil { + return nil, fmt.Errorf("unable to initialize hash: %v", err) + } + + if hashFunc == nil { + return nil, fmt.Errorf("nil hash function returned for %v", f.Hash) + } + + return hashFunc, nil +} + +func createEncryptor(f FormattingOptions) (Encryptor, error) { + e := encryptors[f.Encryption] + if e == nil { + return nil, fmt.Errorf("unknown encryption algorithm: %v", f.Encryption) + } + + return e(f) +} + +func curryEncryptionKey(n func(k []byte) (cipher.Block, error), key []byte) func() (cipher.Block, error) { + return func() (cipher.Block, error) { + return n(key) + } +} + +func applyLegacyBlockFormat(f *FormattingOptions) { + switch f.LegacyBlockFormat { + case "UNENCRYPTED_HMAC_SHA256": + f.Hash = "HMAC-SHA256" + f.Encryption = "NONE" + case "UNENCRYPTED_HMAC_SHA256_128": + f.Hash = "HMAC-SHA256-128" + f.Encryption = "NONE" + case "ENCRYPTED_HMAC_SHA256_AES256_SIV": + f.Hash = "HMAC-SHA256-128" + f.Encryption = "AES-256-CTR" + } } diff --git a/block/block_manager_test.go b/block/block_manager_test.go index 832a40e15..c0854397a 100644 --- a/block/block_manager_test.go +++ b/block/block_manager_test.go @@ -270,11 +270,11 @@ func TestBlockManagerFailedToWritePack(t *testing.T) { st = faulty bm, err := newManagerWithOptions(context.Background(), st, FormattingOptions{ - Version: 1, - BlockFormat: "ENCRYPTED_HMAC_SHA256_AES256_SIV", - MaxPackSize: maxPackSize, - HMACSecret: []byte("foo"), - MasterKey: []byte("0123456789abcdef0123456789abcdef"), + Version: 1, + LegacyBlockFormat: "ENCRYPTED_HMAC_SHA256_AES256_SIV", + MaxPackSize: maxPackSize, + HMACSecret: []byte("foo"), + MasterKey: []byte("0123456789abcdef0123456789abcdef"), }, CachingOptions{}, fakeTimeNowFrozen(fakeTime)) if err != nil { t.Fatalf("can't create bm: %v", err) @@ -785,9 +785,9 @@ func newTestBlockManager(data map[string][]byte, keyTime map[string]time.Time, t } st := storagetesting.NewMapStorage(data, keyTime, timeFunc) bm, err := newManagerWithOptions(context.Background(), st, FormattingOptions{ - BlockFormat: "UNENCRYPTED_HMAC_SHA256", - HMACSecret: hmacSecret, - MaxPackSize: maxPackSize, + LegacyBlockFormat: "UNENCRYPTED_HMAC_SHA256", + HMACSecret: hmacSecret, + MaxPackSize: maxPackSize, }, CachingOptions{}, timeFunc) if err != nil { panic("can't create block manager: " + err.Error()) diff --git a/initialize.go b/initialize.go index 2f7dbc087..2f910d30d 100644 --- a/initialize.go +++ b/initialize.go @@ -82,11 +82,13 @@ func formatBlockFromOptions(opt *NewRepositoryOptions) *formatBlock { func repositoryObjectFormatFromOptions(opt *NewRepositoryOptions) *repositoryObjectFormat { f := &repositoryObjectFormat{ FormattingOptions: block.FormattingOptions{ - Version: 1, - BlockFormat: applyDefaultString(opt.BlockFormat.BlockFormat, block.DefaultFormat), - HMACSecret: applyDefaultRandomBytes(opt.BlockFormat.HMACSecret, 32), - MasterKey: applyDefaultRandomBytes(opt.BlockFormat.MasterKey, 32), - MaxPackSize: applyDefaultInt(opt.BlockFormat.MaxPackSize, applyDefaultInt(opt.ObjectFormat.MaxBlockSize, 20<<20)), // 20 MB + Version: 1, + LegacyBlockFormat: opt.BlockFormat.LegacyBlockFormat, + Hash: applyDefaultString(opt.BlockFormat.Hash, block.DefaultHash), + Encryption: applyDefaultString(opt.BlockFormat.Encryption, block.DefaultEncryption), + HMACSecret: applyDefaultRandomBytes(opt.BlockFormat.HMACSecret, 32), + MasterKey: applyDefaultRandomBytes(opt.BlockFormat.MasterKey, 32), + MaxPackSize: applyDefaultInt(opt.BlockFormat.MaxPackSize, applyDefaultInt(opt.ObjectFormat.MaxBlockSize, 20<<20)), // 20 MB }, Format: object.Format{ Splitter: applyDefaultString(opt.ObjectFormat.Splitter, object.DefaultSplitter), diff --git a/internal/repotesting/repotesting.go b/internal/repotesting/repotesting.go index 5acb59c4d..3be8c431a 100644 --- a/internal/repotesting/repotesting.go +++ b/internal/repotesting/repotesting.go @@ -45,8 +45,8 @@ func (e *Environment) Setup(t *testing.T, opts ...func(*repo.NewRepositoryOption opt := &repo.NewRepositoryOptions{ BlockFormat: block.FormattingOptions{ - HMACSecret: []byte{}, - BlockFormat: "UNENCRYPTED_HMAC_SHA256", + HMACSecret: []byte{}, + LegacyBlockFormat: "UNENCRYPTED_HMAC_SHA256", }, ObjectFormat: object.Format{ Splitter: "FIXED", diff --git a/manifest/manifest_manager_test.go b/manifest/manifest_manager_test.go index a2d2a5756..913f5a81f 100644 --- a/manifest/manifest_manager_test.go +++ b/manifest/manifest_manager_test.go @@ -129,8 +129,8 @@ func TestManifestInitCorruptedBlock(t *testing.T) { st := storagetesting.NewMapStorage(data, nil, nil) f := block.FormattingOptions{ - BlockFormat: "UNENCRYPTED_HMAC_SHA256_128", - MaxPackSize: 100000, + LegacyBlockFormat: "UNENCRYPTED_HMAC_SHA256_128", + MaxPackSize: 100000, } // write some data to storage @@ -266,8 +266,8 @@ func newManagerForTesting(ctx context.Context, t *testing.T, data map[string][]b st := storagetesting.NewMapStorage(data, nil, nil) bm, err := block.NewManager(ctx, st, block.FormattingOptions{ - BlockFormat: "UNENCRYPTED_HMAC_SHA256_128", - MaxPackSize: 100000, + LegacyBlockFormat: "UNENCRYPTED_HMAC_SHA256_128", + MaxPackSize: 100000, }, block.CachingOptions{}) if err != nil { return nil, fmt.Errorf("can't create block manager: %v", err) diff --git a/repository_test.go b/repository_test.go index fb7afb708..79e83d4c8 100644 --- a/repository_test.go +++ b/repository_test.go @@ -244,7 +244,7 @@ func TestFormats(t *testing.T) { ctx := context.Background() makeFormat := func(blockFormat string) func(*repo.NewRepositoryOptions) { return func(n *repo.NewRepositoryOptions) { - n.BlockFormat.BlockFormat = blockFormat + n.BlockFormat.LegacyBlockFormat = blockFormat n.BlockFormat.HMACSecret = []byte("key") n.ObjectFormat.MaxBlockSize = 10000 n.ObjectFormat.Splitter = "FIXED" diff --git a/tests/stress_test/stress_test.go b/tests/stress_test/stress_test.go index ecb7e9b3f..1641769bf 100644 --- a/tests/stress_test/stress_test.go +++ b/tests/stress_test/stress_test.go @@ -38,10 +38,10 @@ func stressTestWithStorage(t *testing.T, st storage.Storage, duration time.Durat openMgr := func() (*block.Manager, error) { return block.NewManager(ctx, st, block.FormattingOptions{ - Version: 1, - BlockFormat: "ENCRYPTED_HMAC_SHA256_AES256_SIV", - MaxPackSize: 20000000, - MasterKey: []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + Version: 1, + LegacyBlockFormat: "ENCRYPTED_HMAC_SHA256_AES256_SIV", + MaxPackSize: 20000000, + MasterKey: []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, }, block.CachingOptions{}) } From 552369931c60b3e947ba9fb096b072d98a169783 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sun, 30 Dec 2018 06:32:02 -0800 Subject: [PATCH 55/74] Makefile: added build-all target --- Makefile | 11 ++++++++++- go.mod | 2 +- go.sum | 4 ++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 855c31712..a0982eeb8 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ all: test lint -travis: test upload-coverage +travis: build-all test upload-coverage setup: GO111MODULE=off go get github.com/mattn/goveralls @@ -13,6 +13,15 @@ travis-setup: lint: gometalinter.v2 ./... +build-all: + # this downloads all dependencies for all OS/architectures and updates go.mod + # TODO(jkowalski): parallelize this once we're on 1.12 + CGO_ENABLED=0 GO111MODULE=on GOARCH=amd64 GOOS=linux go build ./... + CGO_ENABLED=0 GO111MODULE=on GOARCH=amd64 GOOS=windows go build ./... + CGO_ENABLED=0 GO111MODULE=on GOARCH=amd64 GOOS=darwin go build ./... + CGO_ENABLED=0 GO111MODULE=on GOARCH=arm GOOS=linux go build ./... + CGO_ENABLED=0 GO111MODULE=on GOARCH=arm64 GOOS=linux go build ./... + test: GO111MODULE=on go test -tags test -count=1 -coverprofile=raw.cov --coverpkg ./... -timeout 90s ./... grep -v testing/ raw.cov > tmp.cov diff --git a/go.mod b/go.mod index 82e3f568b..b4d74f415 100644 --- a/go.mod +++ b/go.mod @@ -16,7 +16,7 @@ require ( golang.org/x/net v0.0.0-20181102091132-c10e9556a7bc golang.org/x/oauth2 v0.0.0-20181102170140-232e45548389 golang.org/x/sys v0.0.0-20181031143558-9b800f95dbbc // indirect - google.golang.org/api v0.0.0-20181102150758-04bb50b6b83d + google.golang.org/api v0.0.0-20181229000844-f26a60c56f14 google.golang.org/genproto v0.0.0-20181101192439-c830210a61df // indirect google.golang.org/grpc v1.16.0 // indirect ) diff --git a/go.sum b/go.sum index 23325fb5e..c8f387064 100644 --- a/go.sum +++ b/go.sum @@ -58,8 +58,8 @@ golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= -google.golang.org/api v0.0.0-20181102150758-04bb50b6b83d h1:wDkjCUR876SHY2B6YgBr9kAIUzPm1zb1X3d6sxFjpfo= -google.golang.org/api v0.0.0-20181102150758-04bb50b6b83d/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= +google.golang.org/api v0.0.0-20181229000844-f26a60c56f14 h1:ATP5OSAvJIQ7cxB9BfsnylT+uFZ1FvJRW4NH1T2NKOU= +google.golang.org/api v0.0.0-20181229000844-f26a60c56f14/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20180831171423-11092d34479b/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= From 89ed30d03ef27589b4c8fed7648d1e16b0d1ab9b Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sun, 30 Dec 2018 10:16:38 -0800 Subject: [PATCH 56/74] Updated readme --- README.md | 188 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 182 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f05d8901a..09763df0b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Repository +Kopia Repository ===== ![Kopia](kopia.svg) @@ -8,13 +8,189 @@ Repository [![Coverage Status](https://coveralls.io/repos/github/kopia/repo/badge.svg?branch=master)](https://coveralls.io/github/kopia/repo?branch=master) [![Go Report Card](https://goreportcard.com/badge/github.com/kopia/repo)](https://goreportcard.com/report/github.com/kopia/repo) -This library implements Content-Addressable Storage Repository used by [Kopia](https://github.com/kopia/kopia) to -store its snapshots. +Features +--- -> **NOTICE**: +Kppia Repository organizes raw blob storage, such as Google Cloud Storage or Amazon S3 buckets into content-addressable storage with: + +* deduplication +* client-side encryption +* caching +* object splitting and merging +* packaging and indexing (organizing many small objects into larger ones) +* shared access from multiple computers +* simple manifest management for storing label-addressable content + +All Repository features are implemented client-side, without any need for a custom server, thus encryption keys never leave the client. + +The primary user of Repository is [Kopia](https://github.com/kopia/kopia) which stores its filesystem snapshots in content-addressable storage, but Repository is designed to be a general-purpose storage system. + +Repository implements 4 storage layers: + +* [Object Storage](https://godoc.org/github.com/kopia/repo/object) for storing objects of arbitrary size with encryption and deduplication +* [Manifest Storage](https://godoc.org/github.com/kopia/repo/manifest) for storing small JSON-based manifests indexed by arbitrary labels (`key=value`) +* [Block Storage](https://godoc.org/github.com/kopia/repo/block) for storing content-addressable, indivisible blocks of relatively small sizes (up to 10-20MB each) with encryption and deduplication +* [Raw BLOB storage](https://godoc.org/github.com/kopia/repo/storage) provides raw access to physical blocks + +Usage +--- + +Initialize repository in a given storage (this is done only once). + +```golang +// connect to a Google Cloud Storage blucket. +st, err := gcs.New(ctx, &gcs.Options{ + Bucket: "my-bucket", +}) +password := "my-super-secret-password" +if err := repo.Initialize(ctx, st, &repo.NewRepositoryOptions{ + BlockFormat: block.FormattingOptions{ + Hash: "HMAC-SHA256-128", + Encryption: "AES-256-CTR", + }, +}, password); err != nil { + log.Fatalf("unable to initialize repository: %v", err) +} +``` + +Now connect to repository, which creates a local configuration file that persists all connection details. + +```golang +configFile := "/tmp/my-repo.config" +if err := repo.Connect(ctx, configFile, st, password, repo.ConnectOptions{ + CachingOptions: block.CachingOptions{ + CacheDirectory: cacheDirectory, + MaxCacheSizeBytes: 100000000, +}, +}); err != nil { + log.Fatalf("unable to connect to repository: %v", err) +} +``` + +To open repository use: + +```golang +ctx := context.Background() +rep, err := repo.Open(ctx, configFile, password, nil) +if err != nil { + log.Fatalf("unable to open the repository: %v", err) +} + +// repository must be closed at the end. +defer rep.Close(ctx) +``` + +Writing objects: + +```golang + +w := rep.Objects.NewWriter(ctx, object.WriterOptions{}) +defer w.Close() + +// w implements io.Writer +fmt.Fprintf(w, "hello world") + +// Object ID is a function of contents written, so every time we write "hello world" we're guaranteed to get exactly the same ID. +objectID, err := w.Result() +if err != nil { + log.Fatalf("upload failed: %v", err) +} +``` + +Reading objects: + +```golang +rd, err := rep.Objects.Open(ctx, objectID) +if err != nil { + log.Fatalf("open failed: %v", err) +} +defer rd.Close() + +data, err := ioutil.ReadAll(rd) +if err != nil { + log.Fatalf("read failed: %v", err) +} + +// Outputs "hello world" +log.Printf("data: %v", string(data)) +``` + +Saving manifest with a given set of labels: + +```golang +labels := map[string]string{ + "type": "custom-object", + "my-kind": "greeting", +} + +payload := map[string]string{ + "myObjectID": objectID, +} + +manifestID, err := rep.Manifests.Put(ctx, labels, payload) +if err != nil { + log.Fatalf("manifest put failed: %v", err) +} + +log.Printf("saved manifest %v", manifestID) +``` + +Loading manifests matching labels: + +```golang +manifests, err := rep.Manifests.Find(ctx, labels) +if err != nil { + log.Fatalf("unable to find manifests: %v", err) +} +for _, m := range manifests { + var val map[string]string + + if err := rep.Manifests.Get(ctx, m.ID, &val); err != nil { + log.Fatalf("unable to load manfiest %v: %v", m.ID, err) + } + + log.Printf("loaded manifest: %v created at %v", val["myObjectID"], m.ModTime) +} +``` + + +FAQ +--- + +1. How stable is it? + +This library is still in development and is **not ready for general use**. + +The repository data format is still subject to change, including backwards-incompatible changes, which will require data migration, although at some point before v1.0 we will declare the format to be stable and will maintain backward compatibility going forward. + +2. How big can a repository get? + +There's no inherent size limit, but a rule of thumb should be no more than `10 TB` (at least for now, until we test with larger repositories). + +The data is efficiently packed into a small number of files and stored, but indexes need to be cached locally and will consume disk space and RAM. + +>For example: > -> This library is still in early stages of development and is **not ready for general use**. -> The repository data format is subject to change, including backwards-incompatible changes. Use at your own risk. +>One sample repository of `480 GB` of data from home NAS containing typical mix of photos, videos, documents and music files contains: +> * `1874361` content-addressable blocks/objects +> * `27485` physical objects (packs) in cloud storage bucket >(typically between 20MB and 30MB each) +> * `70 MB` of indexes + +3. How safe is the data? + +Your data can only be as safe as the underlying storage, so it's recommended to use a high-quality cloud storage solution, which nowadays provide very high-durability, high-throughput and low-latency for access to your data at a very reasonable price. + +In addition to that, Kopia employs several data protection techniques, such as encryption, checksumming to detect accidental bit flips, redundant storage of indexes, and others. + +> Having said that, it's not recommended to trust all your data to Kopia just yet - **always have another backup**. + +4. I'd like to contribute + +Sure, get started by [filing an Issue](https://github.com/kopia/repo/issues) or sending a Pull request. + +5. I found a security issue + +Please notify us privately at `jaak@jkowalski.net` so we can work on addressing the issue and releasing a patch. Licensing --- From bf2b07994348b300903358f6b545fcadd4cc6def Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sun, 30 Dec 2018 12:45:56 -0800 Subject: [PATCH 57/74] readme: tweaks --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 09763df0b..0befd8135 100644 --- a/README.md +++ b/README.md @@ -171,18 +171,18 @@ The data is efficiently packed into a small number of files and stored, but inde >For example: > ->One sample repository of `480 GB` of data from home NAS containing typical mix of photos, videos, documents and music files contains: +>One sample repository of `480 GB` of data from home NAS containing a mix of photos, videos, documents and music files contains: > * `1874361` content-addressable blocks/objects -> * `27485` physical objects (packs) in cloud storage bucket >(typically between 20MB and 30MB each) +> * `27485` physical objects (packs) in cloud storage bucket (typically between 20MB and 30MB each) > * `70 MB` of indexes 3. How safe is the data? -Your data can only be as safe as the underlying storage, so it's recommended to use a high-quality cloud storage solution, which nowadays provide very high-durability, high-throughput and low-latency for access to your data at a very reasonable price. +Your data can only be as safe as the underlying storage, so it's recommended to use one of high-quality cloud storage solutions, which nowadays provide very high-durability, high-throughput and low-latency for access to your data at a very reasonable price. In addition to that, Kopia employs several data protection techniques, such as encryption, checksumming to detect accidental bit flips, redundant storage of indexes, and others. -> Having said that, it's not recommended to trust all your data to Kopia just yet - **always have another backup**. +> **WARNING: It's not recommended to trust all your data to Kopia just yet - always have another backup**. 4. I'd like to contribute From b4c80348f8051228df4f40a3f8b6bee75afd4952 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 31 Dec 2018 14:15:58 -0800 Subject: [PATCH 58/74] object: removed dependency on jsonstream for parsing indirect objects --- internal/jsonstream/doc.go | 2 - internal/jsonstream/reader.go | 112 -------------------- internal/jsonstream/stream_test.go | 163 ----------------------------- internal/jsonstream/writer.go | 59 ----------- object/object_manager.go | 43 ++++---- object/object_manager_test.go | 20 +--- object/object_writer.go | 16 ++- 7 files changed, 33 insertions(+), 382 deletions(-) delete mode 100644 internal/jsonstream/doc.go delete mode 100644 internal/jsonstream/reader.go delete mode 100644 internal/jsonstream/stream_test.go delete mode 100644 internal/jsonstream/writer.go diff --git a/internal/jsonstream/doc.go b/internal/jsonstream/doc.go deleted file mode 100644 index 157858468..000000000 --- a/internal/jsonstream/doc.go +++ /dev/null @@ -1,2 +0,0 @@ -// Package jsonstream implements streaming reader and writer for JSON objects. -package jsonstream diff --git a/internal/jsonstream/reader.go b/internal/jsonstream/reader.go deleted file mode 100644 index c1ddb2fbc..000000000 --- a/internal/jsonstream/reader.go +++ /dev/null @@ -1,112 +0,0 @@ -package jsonstream - -import ( - "encoding/json" - "fmt" - "io" -) - -// Reader reads a stream of JSON objects. -type Reader struct { - decoder *json.Decoder - summary interface{} -} - -// Read reads the next JSON objects from the stream, returns io.EOF on the end of stream. -func (r *Reader) Read(v interface{}) error { - if r.decoder.More() { - return r.decoder.Decode(v) - } - - if err := ensureDelimiter(r.decoder, json.Delim(']')); err != nil { - return invalidStreamFormatError(err) - } - - tok, err := r.decoder.Token() - if err != nil { - return invalidStreamFormatError(err) - } - - switch tok { - case json.Delim('}'): - // end of stream, all good - return io.EOF - - case "summary": - s := r.summary - if s == nil { - s = map[string]interface{}{} - } - if err := r.decoder.Decode(s); err != nil { - return invalidStreamFormatError(err) - } - } - - if err := ensureDelimiter(r.decoder, json.Delim('}')); err != nil { - return invalidStreamFormatError(err) - } - - return io.EOF -} - -func ensureDelimiter(d *json.Decoder, expected json.Delim) error { - t, err := d.Token() - if err != nil { - return err - } - - if t != expected { - return fmt.Errorf("expected '%v', got %v", expected.String(), t) - } - - return nil -} -func ensureStringToken(d *json.Decoder, expected string) error { - t, err := d.Token() - if err != nil { - return err - } - - if s, ok := t.(string); ok { - if s == expected { - return nil - } - } - - return fmt.Errorf("expected '%v', got '%v'", expected, t) -} - -func invalidStreamFormatError(cause error) error { - return fmt.Errorf("invalid stream format: %v", cause) -} - -// NewReader returns new Reader on top of a given buffered reader. -// The provided header must match the beginning of a stream. -func NewReader(r io.Reader, header string, summary interface{}) (*Reader, error) { - dr := Reader{ - decoder: json.NewDecoder(r), - summary: summary, - } - - if err := ensureDelimiter(dr.decoder, json.Delim('{')); err != nil { - return nil, invalidStreamFormatError(err) - } - - if err := ensureStringToken(dr.decoder, "stream"); err != nil { - return nil, invalidStreamFormatError(err) - } - - if err := ensureStringToken(dr.decoder, header); err != nil { - return nil, invalidStreamFormatError(err) - } - - if err := ensureStringToken(dr.decoder, "entries"); err != nil { - return nil, invalidStreamFormatError(err) - } - - if err := ensureDelimiter(dr.decoder, json.Delim('[')); err != nil { - return nil, invalidStreamFormatError(err) - } - - return &dr, nil -} diff --git a/internal/jsonstream/stream_test.go b/internal/jsonstream/stream_test.go deleted file mode 100644 index 6beff5f0c..000000000 --- a/internal/jsonstream/stream_test.go +++ /dev/null @@ -1,163 +0,0 @@ -package jsonstream - -import ( - "bufio" - "bytes" - "io" - "log" - "strings" - "testing" -) - -type TestObj struct { - Name string `json:"name,omitempty"` -} -type TestSummary struct { - Value int `json:"val"` -} - -var testHeader1 = "01234567" -var testHeader2 = "0123456x" - -func TestStream(t *testing.T) { - var buf bytes.Buffer - - data := []TestObj{ - {Name: "foo"}, - {Name: "bar"}, - {Name: "baz"}, - } - - w := NewWriter(&buf, testHeader1) - for _, d := range data { - if err := w.Write(&d); err != nil { - t.Errorf("write error: %v", err) - } - } - w.Finalize() - log.Printf("wrote: %v", buf.String()) - r, err := NewReader(bufio.NewReader(&buf), testHeader1, nil) - if err != nil { - t.Errorf("err: %v", err) - return - } - for _, d := range data { - v := &TestObj{} - if readerr := r.Read(v); readerr != nil { - t.Errorf("read error: %v", readerr) - } - if v.Name != d.Name { - t.Errorf("invalid value: '%v', expected '%v'", v.Name, d.Name) - } - } - v := &TestObj{} - err = r.Read(v) - if err != io.EOF { - t.Errorf("expected EOF, got %v", err) - } -} - -func TestStreamWithSummary(t *testing.T) { - var buf bytes.Buffer - - data := []TestObj{ - {Name: "foo"}, - {Name: "bar"}, - {Name: "baz"}, - } - - w := NewWriter(&buf, testHeader1) - for _, d := range data { - if err := w.Write(&d); err != nil { - t.Errorf("write error: %v", err) - } - } - w.FinalizeWithSummary(TestSummary{Value: 123}) - log.Printf("wrote: %v", buf.String()) - - var summary TestSummary - r, err := NewReader(bufio.NewReader(&buf), testHeader1, &summary) - if err != nil { - t.Errorf("err: %v", err) - return - } - for _, d := range data { - v := &TestObj{} - if readerr := r.Read(v); readerr != nil { - t.Errorf("read error: %v", readerr) - } - if v.Name != d.Name { - t.Errorf("invalid value: '%v', expected '%v'", v.Name, d.Name) - } - } - v := &TestObj{} - err = r.Read(v) - if err != io.EOF { - t.Errorf("expected EOF, got %v", err) - } - if got, want := summary.Value, 123; got != want { - t.Errorf("unexpected summary value: %v, wanted %v", got, want) - } -} - -func TestInvalidHeader(t *testing.T) { - var buf bytes.Buffer - - w := NewWriter(&buf, testHeader1) - if err := w.Write(&TestObj{Name: "foo"}); err != nil { - t.Errorf("write error: %v", err) - } - - _, err := NewReader(bufio.NewReader(&buf), testHeader2, nil) - if err == nil { - t.Errorf("expected error, got none") - } else if !strings.Contains(err.Error(), "invalid stream format") { - t.Errorf("got incorrect error: %v", err) - } -} - -func TestInvalidStream(t *testing.T) { - cases := []string{ - `x`, - `{}`, - `{"not-stream":"hdr"}`, - `{{}}`, - `{"stream":"non-hdr"}`, - `{"stream":"hdr","nonEntries":[]}`, - `{"stream":"hdr","entries":{}}`, - `{"stream":"hdr","entries":[]}`, - `{"stream":"hdr","entries":[`, - `{"stream":"hdr","entries":[}`, - `{"stream":"hdr","entries":[]`, - `{"stream":"hdr","entries":[],"summary"`, - `{"stream":"hdr","entries":[],1.222.33`, - `{"stream":"hdr","entries":[],"sxummary":{"x":"1",`, - } - - for _, tc := range cases { - r, err := NewReader(strings.NewReader(tc), "hdr", nil) - if err != nil { - if !isInvalidStream(err) { - t.Errorf("got invalid error when creating reader: %v", err) - } - continue - } - - for { - v := map[string]interface{}{} - if err := r.Read(v); err != nil { - if err == io.EOF { - break - } - if !isInvalidStream(err) { - t.Errorf("got invalid error when creating reader: %v", err) - } - break - } - } - } -} - -func isInvalidStream(e error) bool { - return e != nil && strings.Contains(e.Error(), "invalid stream format") -} diff --git a/internal/jsonstream/writer.go b/internal/jsonstream/writer.go deleted file mode 100644 index a79049bca..000000000 --- a/internal/jsonstream/writer.go +++ /dev/null @@ -1,59 +0,0 @@ -package jsonstream - -import ( - "encoding/json" - "fmt" - "io" -) - -var commaBytes = []byte(",\n") - -// Writer writes a stream of JSON objects. -type Writer struct { - output io.Writer - header string - separator []byte -} - -// Write JSON object to the output. -func (w *Writer) Write(v interface{}) error { - if _, err := w.output.Write(w.separator); err != nil { - return err - } - j, err := json.Marshal(v) - if err != nil { - return err - } - // log.Printf("*** %v: %v", w.header, string(j)) - if _, err := w.output.Write(j); err != nil { - return err - } - w.separator = commaBytes - - return nil -} - -// FinalizeWithSummary writes the postamble to the JSON stream with a given summary object. -func (w *Writer) FinalizeWithSummary(summary interface{}) error { - b, err := json.Marshal(summary) - if err != nil { - return err - } - _, err = fmt.Fprintf(w.output, "\n],\"summary\":%v}", string(b)) - return err -} - -// Finalize writes the postamble to the JSON stream. -func (w *Writer) Finalize() error { - _, err := fmt.Fprintf(w.output, "\n]}") - return err -} - -// NewWriter creates a new Writer on top of a specified writer with a specified optional header. -func NewWriter(w io.Writer, header string) *Writer { - fmt.Fprintf(w, "{\"stream\":\"%v\",\"entries\":[\n", header) //nolint:errcheck - return &Writer{ - header: header, - output: w, - } -} diff --git a/object/object_manager.go b/object/object_manager.go index 0ebe9538e..c205bd076 100644 --- a/object/object_manager.go +++ b/object/object_manager.go @@ -2,14 +2,13 @@ package object import ( - "bufio" "bytes" "context" + "encoding/json" "fmt" "io" "github.com/kopia/repo/block" - "github.com/kopia/repo/internal/jsonstream" ) // Reader allows reading, seeking, getting the length of and closing of a repository object. @@ -184,29 +183,31 @@ func NewObjectManager(ctx context.Context, bm blockManager, f Format, opts Manag return om, nil } +/* + +{"stream":"kopia:indirect","entries":[ +{"l":1698099,"o":"D13ea27f9ad891ad4a2edfa983906863d"}, +{"s":1698099,"l":1302081,"o":"De8ca8327cd3af5f4edbd5ed1009c525e"}, +{"s":3000180,"l":4352499,"o":"D6b6eb48ca5361d06d72fe193813e42e1"}, +{"s":7352679,"l":1170821,"o":"Dd14653f76b63802ed48be64a0e67fea9"}, + +{"s":91094118,"l":1645153,"o":"Daa55df764d881a1daadb5ea9de17abbb"} +]} +*/ + +type indirectObject struct { + StreamID string `json:"stream"` + Entries []indirectObjectEntry `json:"entries"` +} + func (om *Manager) flattenListChunk(rawReader io.Reader) ([]indirectObjectEntry, error) { - pr, err := jsonstream.NewReader(bufio.NewReader(rawReader), indirectStreamType, nil) - if err != nil { - return nil, err - } - var seekTable []indirectObjectEntry + var ind indirectObject - for { - var oe indirectObjectEntry - - err := pr.Read(&oe) - if err == io.EOF { - break - } - - if err != nil { - return nil, fmt.Errorf("failed to read indirect object: %v", err) - } - - seekTable = append(seekTable, oe) + if err := json.NewDecoder(rawReader).Decode(&ind); err != nil { + return nil, fmt.Errorf("invalid indirect object: %v", err) } - return seekTable, nil + return ind.Entries, nil } func (om *Manager) newRawReader(ctx context.Context, objectID ID) (Reader, error) { diff --git a/object/object_manager_test.go b/object/object_manager_test.go index f2b712fb4..1bc65fa86 100644 --- a/object/object_manager_test.go +++ b/object/object_manager_test.go @@ -6,8 +6,8 @@ cryptorand "crypto/rand" "crypto/sha256" "encoding/hex" + "encoding/json" "fmt" - "io" "io/ioutil" "math/rand" "reflect" @@ -16,7 +16,6 @@ "testing" "github.com/kopia/repo/block" - "github.com/kopia/repo/internal/jsonstream" "github.com/kopia/repo/storage" ) @@ -149,20 +148,9 @@ func verifyIndirectBlock(ctx context.Context, t *testing.T, r *Manager, oid ID) } defer rd.Close() - pr, err := jsonstream.NewReader(rd, indirectStreamType, nil) - if err != nil { - t.Errorf("cannot open indirect stream: %v", err) - return - } - for { - v := indirectObjectEntry{} - if err := pr.Read(&v); err != nil { - if err == io.EOF { - break - } - t.Errorf("err: %v", err) - break - } + var ind indirectObject + if err := json.NewDecoder(rd).Decode(&ind); err != nil { + t.Errorf("cannot parse indirect stream: %v", err) } } } diff --git a/object/object_writer.go b/object/object_writer.go index 4b40ec4f7..a0e7e8f5b 100644 --- a/object/object_writer.go +++ b/object/object_writer.go @@ -3,11 +3,10 @@ import ( "bytes" "context" + "encoding/json" "fmt" "io" "sync" - - "github.com/kopia/repo/internal/jsonstream" ) // Writer allows writing content to the storage and supports automatic deduplication and encryption @@ -123,14 +122,13 @@ func (w *objectWriter) Result() (ID, error) { prefix: w.prefix, } - jw := jsonstream.NewWriter(iw, indirectStreamType) - for _, e := range w.blockIndex { - if err := jw.Write(&e); err != nil { - return "", fmt.Errorf("unable to write indirect block index: %v", err) - } + ind := indirectObject{ + StreamID: "kopia:indirect", + Entries: w.blockIndex, } - if err := jw.Finalize(); err != nil { - return "", fmt.Errorf("unable to finalize indirect block index: %v", err) + + if err := json.NewEncoder(iw).Encode(ind); err != nil { + return "", fmt.Errorf("unable to write indirect block index: %v", err) } oid, err := iw.Result() if err != nil { From 24bd5bbe1f9fc6527dca7cc2bf17e7ff8c0ce104 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 31 Dec 2018 17:08:41 -0800 Subject: [PATCH 59/74] repo: added Repository.Upgrade() API --- go.mod | 24 +++++++------- go.sum | 49 +++++++++++++++-------------- initialize.go | 8 ++--- internal/repotesting/repotesting.go | 2 +- open.go | 3 ++ repository.go | 3 ++ repository_test.go | 14 +++++++++ upgrade.go | 49 +++++++++++++++++++++++++++++ 8 files changed, 112 insertions(+), 40 deletions(-) create mode 100644 upgrade.go diff --git a/go.mod b/go.mod index b4d74f415..4ac01c433 100644 --- a/go.mod +++ b/go.mod @@ -1,22 +1,22 @@ module github.com/kopia/repo require ( - cloud.google.com/go v0.32.0 + cloud.google.com/go v0.34.0 github.com/efarrer/iothrottler v0.0.0-20141121142253-60e7e547c7fe - github.com/go-ini/ini v1.39.0 // indirect - github.com/googleapis/gax-go v2.0.0+incompatible // indirect - github.com/minio/minio-go v6.0.9+incompatible + github.com/go-ini/ini v1.40.0 // indirect + github.com/googleapis/gax-go v2.0.2+incompatible // indirect + github.com/minio/minio-go v6.0.11+incompatible github.com/mitchellh/go-homedir v1.0.0 // indirect github.com/op/go-logging v0.0.0-20160315200505-970db520ece7 github.com/silvasur/buzhash v0.0.0-20160816060738-9bdec3dec7c6 - github.com/studio-b12/gowebdav v0.0.0-20181024110551-cba565a9dcfc + github.com/studio-b12/gowebdav v0.0.0-20181230112802-6c32839dbdfc go.opencensus.io v0.18.0 // indirect - golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16 - golang.org/x/exp v0.0.0-20181022080537-42ba7d4b6eb2 - golang.org/x/net v0.0.0-20181102091132-c10e9556a7bc - golang.org/x/oauth2 v0.0.0-20181102170140-232e45548389 - golang.org/x/sys v0.0.0-20181031143558-9b800f95dbbc // indirect + golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9 + golang.org/x/exp v0.0.0-20181221233300-b68661188fbf + golang.org/x/net v0.0.0-20181220203305-927f97764cc3 // indirect + golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890 + golang.org/x/sys v0.0.0-20181228144115-9a3f9b0469bb // indirect google.golang.org/api v0.0.0-20181229000844-f26a60c56f14 - google.golang.org/genproto v0.0.0-20181101192439-c830210a61df // indirect - google.golang.org/grpc v1.16.0 // indirect + google.golang.org/genproto v0.0.0-20181221175505-bd9b4fb69e2f // indirect + google.golang.org/grpc v1.17.0 // indirect ) diff --git a/go.sum b/go.sum index c8f387064..b10eecb60 100644 --- a/go.sum +++ b/go.sum @@ -1,27 +1,27 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.32.0 h1:DSt59WoyNcfAInilEpfvm2ugq8zvNyaHAm9MkzOwRQ4= -cloud.google.com/go v0.32.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0 h1:eOI3/cP2VTU6uZLDYAoic+eyzzB9YyGmJ7eIjl8rOPg= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/efarrer/iothrottler v0.0.0-20141121142253-60e7e547c7fe h1:WAx1vRufH0I2pTWldQkXPzpc+jndCOi2FH334LFQ1PI= github.com/efarrer/iothrottler v0.0.0-20141121142253-60e7e547c7fe/go.mod h1:zjXkUoNEq44qYz/1TlzBhN2W21rDU3HvDBiJWQAZTq8= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-ini/ini v1.39.0 h1:/CyW/jTlZLjuzy52jc1XnhJm6IUKEuunpJFpecywNeI= -github.com/go-ini/ini v1.39.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= +github.com/go-ini/ini v1.40.0 h1:/pbZah2UXAjMCtUlVRASCb6nX+0A8aCXjmYouBEXu0c= +github.com/go-ini/ini v1.40.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/googleapis/gax-go v2.0.0+incompatible h1:j0GKcs05QVmm7yesiZq2+9cxHkNK9YM6zKx4D2qucQU= -github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY= +github.com/googleapis/gax-go v2.0.2+incompatible h1:silFMLAnr330+NRuag/VjIGF7TLp/LBrV2CJKFLWEww= +github.com/googleapis/gax-go v2.0.2+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY= github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/minio/minio-go v6.0.9+incompatible h1:1GBagCy3VtWteFBwjjNyajSf0JJ/iT0hYVlK8xipsds= -github.com/minio/minio-go v6.0.9+incompatible/go.mod h1:7guKYtitv8dktvNUGrhzmNlA5wrAABTQXCoesZdFQO8= +github.com/minio/minio-go v6.0.11+incompatible h1:ue0S9ZVNhy88iS+GM4y99k3oSSeKIF+OKEe6HRMWLRw= +github.com/minio/minio-go v6.0.11+incompatible/go.mod h1:7guKYtitv8dktvNUGrhzmNlA5wrAABTQXCoesZdFQO8= github.com/mitchellh/go-homedir v1.0.0 h1:vKb8ShqSby24Yrqr/yDYkuFz8d0WUjys40rvnGC8aR0= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/op/go-logging v0.0.0-20160315200505-970db520ece7 h1:lDH9UUVJtmYCjyT0CI4q8xvlXPxeZ0gYCVvWbmPlp88= @@ -33,27 +33,29 @@ github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7q github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/silvasur/buzhash v0.0.0-20160816060738-9bdec3dec7c6 h1:31fhvQj+O9qDqMxUgQDOCQA5RV1iIFMzYPhBUyzg2p0= github.com/silvasur/buzhash v0.0.0-20160816060738-9bdec3dec7c6/go.mod h1:jk5gVE20+MCoyJ2TFiiMrbWPyaH4t9T5F3HwVdthB2w= -github.com/studio-b12/gowebdav v0.0.0-20181024110551-cba565a9dcfc h1:p1iYuFAxSsQ5JDzBOpBEsqFpjgKRyGrnjQpvvq2AK5A= -github.com/studio-b12/gowebdav v0.0.0-20181024110551-cba565a9dcfc/go.mod h1:gCcfDlA1Y7GqOaeEKw5l9dOGx1VLdc/HuQSlQAaZ30s= +github.com/studio-b12/gowebdav v0.0.0-20181230112802-6c32839dbdfc h1:iuD/gqAYTn1N3KSn8LqhdNqKVOrrCXHETAM/42M6x58= +github.com/studio-b12/gowebdav v0.0.0-20181230112802-6c32839dbdfc/go.mod h1:gCcfDlA1Y7GqOaeEKw5l9dOGx1VLdc/HuQSlQAaZ30s= go.opencensus.io v0.18.0 h1:Mk5rgZcggtbvtAun5aJzAtjKKN/t0R3jJPlWILlv938= go.opencensus.io v0.18.0/go.mod h1:vKdFvxhtzZ9onBp9VKHK8z/sRpBMnKAsufL7wlDrCOA= -golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16 h1:y6ce7gCWtnH+m3dCjzQ1PCuwl28DDIc3VNnvY29DlIA= -golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/exp v0.0.0-20181022080537-42ba7d4b6eb2 h1:lpkPb6P4ObnPRN3VbEzv/6CUtwaEDtx0cvCg4eWQuBk= -golang.org/x/exp v0.0.0-20181022080537-42ba7d4b6eb2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9 h1:mKdxBk7AujPs8kU4m80U72y/zjbZ3UcXC7dClwKbUI0= +golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/exp v0.0.0-20181221233300-b68661188fbf h1:rK+9ETFkUX8OofKNLhik8qEGY/3gnM5x4eVNWJf1z/8= +golang.org/x/exp v0.0.0-20181221233300-b68661188fbf/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181102091132-c10e9556a7bc h1:ZMCWScCvS2fUVFw8LOpxyUUW5qiviqr4Dg5NdjLeiLU= -golang.org/x/net v0.0.0-20181102091132-c10e9556a7bc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181106065722-10aee1819953/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181220203305-927f97764cc3 h1:eH6Eip3UpmR+yM/qI9Ijluzb1bNv/cAU/n+6l8tRSis= +golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20181102170140-232e45548389 h1:NSr16yuMknNO4kjJ2yNMJBdS55sdwZiWrXbt3fbM3pI= -golang.org/x/oauth2 v0.0.0-20181102170140-232e45548389/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890 h1:uESlIz09WIHT2I+pasSXcpLYqYK8wHcdCetU3VuMBJE= +golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181031143558-9b800f95dbbc h1:SdCq5U4J+PpbSDIl9bM0V1e1Ug1jsnBkAFvTs1htn7U= -golang.org/x/sys v0.0.0-20181031143558-9b800f95dbbc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181228144115-9a3f9b0469bb h1:pf3XwC90UUdNPYWZdFjhGBE7DUFuK3Ct1zWmZ65QN30= +golang.org/x/sys v0.0.0-20181228144115-9a3f9b0469bb/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -63,11 +65,12 @@ google.golang.org/api v0.0.0-20181229000844-f26a60c56f14/go.mod h1:4mhQ8q/RsB7i+ google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20180831171423-11092d34479b/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20181101192439-c830210a61df h1:Ri2mROsxIxitlzRQ0pYoP8/dsqeLEolHrhh29dltSI4= -google.golang.org/genproto v0.0.0-20181101192439-c830210a61df/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20181221175505-bd9b4fb69e2f h1:eT3B0O2ghdSPzjAOznr3oOLyN1HFeYUncYl7FRwg4VI= +google.golang.org/genproto v0.0.0-20181221175505-bd9b4fb69e2f/go.mod h1:7Ep/1NZk928CDR8SjdVbjWNpdIf6nzjE3BTgJDr2Atg= google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= -google.golang.org/grpc v1.16.0 h1:dz5IJGuC2BB7qXR5AyHNwAUBhZscK2xVez7mznh72sY= google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio= +google.golang.org/grpc v1.17.0 h1:TRJYBgMclJvGYn2rIMjj+h9KtMt5r1Ij7ODVRIZkwhk= +google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/initialize.go b/initialize.go index 2f910d30d..f773d0004 100644 --- a/initialize.go +++ b/initialize.go @@ -26,9 +26,9 @@ // NewRepositoryOptions specifies options that apply to newly created repositories. // All fields are optional, when not provided, reasonable defaults will be used. type NewRepositoryOptions struct { - UniqueID []byte // force the use of particular unique ID for metadata manager - MetadataEncryptionAlgorithm string // identifier of encryption algorithm - KeyDerivationAlgorithm string // identifier of key derivation algorithm + UniqueID []byte // force the use of particular unique ID + FormatEncryptionAlgorithm string // identifier of encryption algorithm + KeyDerivationAlgorithm string // identifier of key derivation algorithm BlockFormat block.FormattingOptions DisableHMAC bool @@ -75,7 +75,7 @@ func formatBlockFromOptions(opt *NewRepositoryOptions) *formatBlock { KeyDerivationAlgorithm: applyDefaultString(opt.KeyDerivationAlgorithm, DefaultKeyDerivationAlgorithm), UniqueID: applyDefaultRandomBytes(opt.UniqueID, 32), Version: "1", - EncryptionAlgorithm: applyDefaultString(opt.MetadataEncryptionAlgorithm, DefaultEncryptionAlgorithm), + EncryptionAlgorithm: applyDefaultString(opt.FormatEncryptionAlgorithm, DefaultEncryptionAlgorithm), } } diff --git a/internal/repotesting/repotesting.go b/internal/repotesting/repotesting.go index 3be8c431a..02d13bcce 100644 --- a/internal/repotesting/repotesting.go +++ b/internal/repotesting/repotesting.go @@ -52,7 +52,7 @@ func (e *Environment) Setup(t *testing.T, opts ...func(*repo.NewRepositoryOption Splitter: "FIXED", MaxBlockSize: 400, }, - MetadataEncryptionAlgorithm: "NONE", + FormatEncryptionAlgorithm: "NONE", } for _, mod := range opts { diff --git a/open.go b/open.go index eced33ac1..c30723683 100644 --- a/open.go +++ b/open.go @@ -122,6 +122,9 @@ func OpenWithConfig(ctx context.Context, st storage.Storage, lc *LocalConfig, pa Manifests: manifests, CacheDirectory: caching.CacheDirectory, UniqueID: f.UniqueID, + + formatBlock: f, + masterKey: masterKey, }, nil } diff --git a/repository.go b/repository.go index 163b941ba..1e438b89d 100644 --- a/repository.go +++ b/repository.go @@ -21,6 +21,9 @@ type Repository struct { ConfigFile string CacheDirectory string + + formatBlock *formatBlock + masterKey []byte } // Close closes the repository and releases all resources. diff --git a/repository_test.go b/repository_test.go index 79e83d4c8..614d32182 100644 --- a/repository_test.go +++ b/repository_test.go @@ -156,6 +156,20 @@ func TestHMAC(t *testing.T) { } } +func TestUpgrade(t *testing.T) { + var env repotesting.Environment + defer env.Setup(t).Close(t) + ctx := context.Background() + + if err := env.Repository.Upgrade(ctx); err != nil { + t.Errorf("upgrade error: %v", err) + } + + if err := env.Repository.Upgrade(ctx); err != nil { + t.Errorf("2nd upgrade error: %v", err) + } +} + func TestReaderStoredBlockNotFound(t *testing.T) { var env repotesting.Environment defer env.Setup(t).Close(t) diff --git a/upgrade.go b/upgrade.go new file mode 100644 index 000000000..c5043a0e4 --- /dev/null +++ b/upgrade.go @@ -0,0 +1,49 @@ +package repo + +import ( + "context" + "fmt" +) + +// Upgrade upgrades repository data structures to the latest version. +func (r *Repository) Upgrade(ctx context.Context) error { + f := r.formatBlock + + log.Debug("decrypting format...") + repoConfig, err := f.decryptFormatBytes(r.masterKey) + if err != nil { + return fmt.Errorf("unable to decrypt repository config: %v", err) + } + + var migrated bool + + if repoConfig.FormattingOptions.LegacyBlockFormat != "" { + log.Infof("upgrading from legacy block format to explicit hash/encryption spec") + switch repoConfig.FormattingOptions.LegacyBlockFormat { + case "UNENCRYPTED_HMAC_SHA256": + repoConfig.FormattingOptions.Hash = "HMAC-SHA256" + repoConfig.FormattingOptions.Encryption = "NONE" + case "UNENCRYPTED_HMAC_SHA256_128": + repoConfig.FormattingOptions.Hash = "HMAC-SHA256-128" + repoConfig.FormattingOptions.Encryption = "NONE" + case "ENCRYPTED_HMAC_SHA256_AES256_SIV": + repoConfig.FormattingOptions.Hash = "HMAC-SHA256-128" + repoConfig.FormattingOptions.Encryption = "AES-256-CTR" + } + repoConfig.FormattingOptions.LegacyBlockFormat = "" + migrated = true + } + + if !migrated { + log.Infof("nothing to do") + return nil + } + + log.Debug("encrypting format...") + if err := encryptFormatBytes(f, repoConfig, r.masterKey, f.UniqueID); err != nil { + return fmt.Errorf("unable to encrypt format bytes") + } + + log.Infof("writing updated format block...") + return writeFormatBlock(ctx, r.Storage, f) +} From 840d5ab7490a80259db7a28a13dd99da76e28f9c Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 31 Dec 2018 19:01:08 -0800 Subject: [PATCH 60/74] removed support for legacy block format, to migrate sync to previous commit and run 'kopia repo upgrade' --- block/block_formatting_options.go | 13 ++++++------- block/block_manager.go | 17 +---------------- block/block_manager_test.go | 18 ++++++++++-------- go.mod | 2 +- initialize.go | 13 ++++++------- internal/repotesting/repotesting.go | 5 +++-- manifest/manifest_manager_test.go | 10 ++++++---- repository_test.go | 9 +++++---- tests/stress_test/stress_test.go | 11 ++++++----- upgrade.go | 18 +----------------- 10 files changed, 45 insertions(+), 71 deletions(-) diff --git a/block/block_formatting_options.go b/block/block_formatting_options.go index 547353880..33520eaf0 100644 --- a/block/block_formatting_options.go +++ b/block/block_formatting_options.go @@ -2,11 +2,10 @@ // FormattingOptions describes the rules for formatting blocks in repository. type FormattingOptions struct { - Version int `json:"version,omitempty"` // version number, must be "1" - LegacyBlockFormat string `json:"objectFormat,omitempty"` // identifier of the block format (legacy) - Hash string `json:"hash,omitempty"` // identifier of the hash algorithm used - Encryption string `json:"encryption,omitempty"` // identifier of the encryption algorithm used - HMACSecret []byte `json:"secret,omitempty"` // HMAC secret used to generate encryption keys - MasterKey []byte `json:"masterKey,omitempty"` // master encryption key (SIV-mode encryption only) - MaxPackSize int `json:"maxPackSize,omitempty"` // maximum size of a pack object + Version int `json:"version,omitempty"` // version number, must be "1" + Hash string `json:"hash,omitempty"` // identifier of the hash algorithm used + Encryption string `json:"encryption,omitempty"` // identifier of the encryption algorithm used + HMACSecret []byte `json:"secret,omitempty"` // HMAC secret used to generate encryption keys + MasterKey []byte `json:"masterKey,omitempty"` // master encryption key (SIV-mode encryption only) + MaxPackSize int `json:"maxPackSize,omitempty"` // maximum size of a pack object } diff --git a/block/block_manager.go b/block/block_manager.go index 54075f1a5..4b461028e 100644 --- a/block/block_manager.go +++ b/block/block_manager.go @@ -935,8 +935,6 @@ func newManagerWithOptions(ctx context.Context, st storage.Storage, f Formatting return nil, fmt.Errorf("can't handle repositories created using version %v (min supported %v, max supported %v)", f.Version, minSupportedReadVersion, maxSupportedReadVersion) } - applyLegacyBlockFormat(&f) - hasher, encryptor, err := CreateHashAndEncryptor(f) if err != nil { return nil, err @@ -993,6 +991,7 @@ func CreateHashAndEncryptor(f FormattingOptions) (HashFunc, Encryptor, error) { if err != nil { return nil, nil, fmt.Errorf("unable to create hash: %v", err) } + e, err := createEncryptor(f) if err != nil { return nil, nil, fmt.Errorf("unable to create encryptor: %v", err) @@ -1039,17 +1038,3 @@ func curryEncryptionKey(n func(k []byte) (cipher.Block, error), key []byte) func return n(key) } } - -func applyLegacyBlockFormat(f *FormattingOptions) { - switch f.LegacyBlockFormat { - case "UNENCRYPTED_HMAC_SHA256": - f.Hash = "HMAC-SHA256" - f.Encryption = "NONE" - case "UNENCRYPTED_HMAC_SHA256_128": - f.Hash = "HMAC-SHA256-128" - f.Encryption = "NONE" - case "ENCRYPTED_HMAC_SHA256_AES256_SIV": - f.Hash = "HMAC-SHA256-128" - f.Encryption = "AES-256-CTR" - } -} diff --git a/block/block_manager_test.go b/block/block_manager_test.go index c0854397a..9ec1b6356 100644 --- a/block/block_manager_test.go +++ b/block/block_manager_test.go @@ -270,11 +270,12 @@ func TestBlockManagerFailedToWritePack(t *testing.T) { st = faulty bm, err := newManagerWithOptions(context.Background(), st, FormattingOptions{ - Version: 1, - LegacyBlockFormat: "ENCRYPTED_HMAC_SHA256_AES256_SIV", - MaxPackSize: maxPackSize, - HMACSecret: []byte("foo"), - MasterKey: []byte("0123456789abcdef0123456789abcdef"), + Version: 1, + Hash: "HMAC-SHA256-128", + Encryption: "AES-256-CTR", + MaxPackSize: maxPackSize, + HMACSecret: []byte("foo"), + MasterKey: []byte("0123456789abcdef0123456789abcdef"), }, CachingOptions{}, fakeTimeNowFrozen(fakeTime)) if err != nil { t.Fatalf("can't create bm: %v", err) @@ -785,9 +786,10 @@ func newTestBlockManager(data map[string][]byte, keyTime map[string]time.Time, t } st := storagetesting.NewMapStorage(data, keyTime, timeFunc) bm, err := newManagerWithOptions(context.Background(), st, FormattingOptions{ - LegacyBlockFormat: "UNENCRYPTED_HMAC_SHA256", - HMACSecret: hmacSecret, - MaxPackSize: maxPackSize, + Hash: "HMAC-SHA256", + Encryption: "NONE", + HMACSecret: hmacSecret, + MaxPackSize: maxPackSize, }, CachingOptions{}, timeFunc) if err != nil { panic("can't create block manager: " + err.Error()) diff --git a/go.mod b/go.mod index 4ac01c433..c415ab412 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( go.opencensus.io v0.18.0 // indirect golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9 golang.org/x/exp v0.0.0-20181221233300-b68661188fbf - golang.org/x/net v0.0.0-20181220203305-927f97764cc3 // indirect + golang.org/x/net v0.0.0-20181220203305-927f97764cc3 golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890 golang.org/x/sys v0.0.0-20181228144115-9a3f9b0469bb // indirect google.golang.org/api v0.0.0-20181229000844-f26a60c56f14 diff --git a/initialize.go b/initialize.go index f773d0004..4f5e254e0 100644 --- a/initialize.go +++ b/initialize.go @@ -82,13 +82,12 @@ func formatBlockFromOptions(opt *NewRepositoryOptions) *formatBlock { func repositoryObjectFormatFromOptions(opt *NewRepositoryOptions) *repositoryObjectFormat { f := &repositoryObjectFormat{ FormattingOptions: block.FormattingOptions{ - Version: 1, - LegacyBlockFormat: opt.BlockFormat.LegacyBlockFormat, - Hash: applyDefaultString(opt.BlockFormat.Hash, block.DefaultHash), - Encryption: applyDefaultString(opt.BlockFormat.Encryption, block.DefaultEncryption), - HMACSecret: applyDefaultRandomBytes(opt.BlockFormat.HMACSecret, 32), - MasterKey: applyDefaultRandomBytes(opt.BlockFormat.MasterKey, 32), - MaxPackSize: applyDefaultInt(opt.BlockFormat.MaxPackSize, applyDefaultInt(opt.ObjectFormat.MaxBlockSize, 20<<20)), // 20 MB + Version: 1, + Hash: applyDefaultString(opt.BlockFormat.Hash, block.DefaultHash), + Encryption: applyDefaultString(opt.BlockFormat.Encryption, block.DefaultEncryption), + HMACSecret: applyDefaultRandomBytes(opt.BlockFormat.HMACSecret, 32), + MasterKey: applyDefaultRandomBytes(opt.BlockFormat.MasterKey, 32), + MaxPackSize: applyDefaultInt(opt.BlockFormat.MaxPackSize, applyDefaultInt(opt.ObjectFormat.MaxBlockSize, 20<<20)), // 20 MB }, Format: object.Format{ Splitter: applyDefaultString(opt.ObjectFormat.Splitter, object.DefaultSplitter), diff --git a/internal/repotesting/repotesting.go b/internal/repotesting/repotesting.go index 02d13bcce..b08994291 100644 --- a/internal/repotesting/repotesting.go +++ b/internal/repotesting/repotesting.go @@ -45,8 +45,9 @@ func (e *Environment) Setup(t *testing.T, opts ...func(*repo.NewRepositoryOption opt := &repo.NewRepositoryOptions{ BlockFormat: block.FormattingOptions{ - HMACSecret: []byte{}, - LegacyBlockFormat: "UNENCRYPTED_HMAC_SHA256", + HMACSecret: []byte{}, + Hash: "HMAC-SHA256", + Encryption: "NONE", }, ObjectFormat: object.Format{ Splitter: "FIXED", diff --git a/manifest/manifest_manager_test.go b/manifest/manifest_manager_test.go index 913f5a81f..61a83f27d 100644 --- a/manifest/manifest_manager_test.go +++ b/manifest/manifest_manager_test.go @@ -129,8 +129,9 @@ func TestManifestInitCorruptedBlock(t *testing.T) { st := storagetesting.NewMapStorage(data, nil, nil) f := block.FormattingOptions{ - LegacyBlockFormat: "UNENCRYPTED_HMAC_SHA256_128", - MaxPackSize: 100000, + Hash: "HMAC-SHA256-128", + Encryption: "NONE", + MaxPackSize: 100000, } // write some data to storage @@ -266,8 +267,9 @@ func newManagerForTesting(ctx context.Context, t *testing.T, data map[string][]b st := storagetesting.NewMapStorage(data, nil, nil) bm, err := block.NewManager(ctx, st, block.FormattingOptions{ - LegacyBlockFormat: "UNENCRYPTED_HMAC_SHA256_128", - MaxPackSize: 100000, + Hash: "HMAC-SHA256-128", + Encryption: "NONE", + MaxPackSize: 100000, }, block.CachingOptions{}) if err != nil { return nil, fmt.Errorf("can't create block manager: %v", err) diff --git a/repository_test.go b/repository_test.go index 614d32182..33e957c40 100644 --- a/repository_test.go +++ b/repository_test.go @@ -256,9 +256,10 @@ func verify(ctx context.Context, t *testing.T, rep *repo.Repository, objectID ob func TestFormats(t *testing.T) { ctx := context.Background() - makeFormat := func(blockFormat string) func(*repo.NewRepositoryOptions) { + makeFormat := func(hash, encryption string) func(*repo.NewRepositoryOptions) { return func(n *repo.NewRepositoryOptions) { - n.BlockFormat.LegacyBlockFormat = blockFormat + n.BlockFormat.Hash = hash + n.BlockFormat.Encryption = encryption n.BlockFormat.HMACSecret = []byte("key") n.ObjectFormat.MaxBlockSize = 10000 n.ObjectFormat.Splitter = "FIXED" @@ -279,13 +280,13 @@ func TestFormats(t *testing.T) { }, }, { - format: makeFormat("UNENCRYPTED_HMAC_SHA256"), + format: makeFormat("HMAC-SHA256", "NONE"), oids: map[string]object.ID{ "The quick brown fox jumps over the lazy dog": "f7bc83f430538424b13298e6aa6fb143ef4d59a14946175997479dbc2d1a3cd8", }, }, { - format: makeFormat("UNENCRYPTED_HMAC_SHA256_128"), + format: makeFormat("HMAC-SHA256-128", "NONE"), oids: map[string]object.ID{ "The quick brown fox jumps over the lazy dog": "f7bc83f430538424b13298e6aa6fb143", }, diff --git a/tests/stress_test/stress_test.go b/tests/stress_test/stress_test.go index 1641769bf..0bc31b268 100644 --- a/tests/stress_test/stress_test.go +++ b/tests/stress_test/stress_test.go @@ -38,10 +38,11 @@ func stressTestWithStorage(t *testing.T, st storage.Storage, duration time.Durat openMgr := func() (*block.Manager, error) { return block.NewManager(ctx, st, block.FormattingOptions{ - Version: 1, - LegacyBlockFormat: "ENCRYPTED_HMAC_SHA256_AES256_SIV", - MaxPackSize: 20000000, - MasterKey: []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + Version: 1, + Hash: "HMAC-SHA256-128", + Encryption: "AES-256-CTR", + MaxPackSize: 20000000, + MasterKey: []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, }, block.CachingOptions{}) } @@ -68,7 +69,7 @@ func stressWorker(ctx context.Context, t *testing.T, deadline time.Time, workerI bm, err := openMgr() if err != nil { - t.Errorf("error opening manager: %v", err) + t.Fatalf("error opening manager: %v", err) } type writtenBlock struct { diff --git a/upgrade.go b/upgrade.go index c5043a0e4..ad081264e 100644 --- a/upgrade.go +++ b/upgrade.go @@ -17,23 +17,7 @@ func (r *Repository) Upgrade(ctx context.Context) error { var migrated bool - if repoConfig.FormattingOptions.LegacyBlockFormat != "" { - log.Infof("upgrading from legacy block format to explicit hash/encryption spec") - switch repoConfig.FormattingOptions.LegacyBlockFormat { - case "UNENCRYPTED_HMAC_SHA256": - repoConfig.FormattingOptions.Hash = "HMAC-SHA256" - repoConfig.FormattingOptions.Encryption = "NONE" - case "UNENCRYPTED_HMAC_SHA256_128": - repoConfig.FormattingOptions.Hash = "HMAC-SHA256-128" - repoConfig.FormattingOptions.Encryption = "NONE" - case "ENCRYPTED_HMAC_SHA256_AES256_SIV": - repoConfig.FormattingOptions.Hash = "HMAC-SHA256-128" - repoConfig.FormattingOptions.Encryption = "AES-256-CTR" - } - repoConfig.FormattingOptions.LegacyBlockFormat = "" - migrated = true - } - + // TODO(jkowalski): add migration code here if !migrated { log.Infof("nothing to do") return nil From 766f5749730502749618783df8b3549df5df7625 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 12 Jan 2019 09:14:34 -0800 Subject: [PATCH 61/74] repo: removed controls for setting format block encryption, instead when block encryption is enabled, the format block is automatically encrypted using default encryption algorithm --- block/block_manager.go | 12 ++++++++---- crypto_key_derivation.go | 8 ++------ format_block.go | 2 ++ initialize.go | 28 ++++++++++++---------------- internal/repotesting/repotesting.go | 1 - 5 files changed, 24 insertions(+), 27 deletions(-) diff --git a/block/block_manager.go b/block/block_manager.go index 4b461028e..f1949baa4 100644 --- a/block/block_manager.go +++ b/block/block_manager.go @@ -459,7 +459,7 @@ func (bm *Manager) loadPackIndexesUnlocked(ctx context.Context) ([]IndexInfo, bo } func (bm *Manager) tryLoadPackIndexBlocksUnlocked(ctx context.Context, blocks []IndexInfo) error { - ch, err := bm.unprocessedIndexBlocksUnlocked(blocks) + ch, unprocessedIndexesSize, err := bm.unprocessedIndexBlocksUnlocked(blocks) if err != nil { return err } @@ -467,6 +467,7 @@ func (bm *Manager) tryLoadPackIndexBlocksUnlocked(ctx context.Context, blocks [] return nil } + log.Infof("downloading %v new index blocks (%v bytes)...", len(ch), unprocessedIndexesSize) var wg sync.WaitGroup errors := make(chan error, parallelFetches) @@ -498,26 +499,29 @@ func (bm *Manager) tryLoadPackIndexBlocksUnlocked(ctx context.Context, blocks [] for err := range errors { return err } + log.Infof("Index blocks downloaded.") return nil } // unprocessedIndexBlocksUnlocked returns a closed channel filled with block IDs that are not in committedBlocks cache. -func (bm *Manager) unprocessedIndexBlocksUnlocked(blocks []IndexInfo) (<-chan string, error) { +func (bm *Manager) unprocessedIndexBlocksUnlocked(blocks []IndexInfo) (<-chan string, int64, error) { + var totalSize int64 ch := make(chan string, len(blocks)) for _, block := range blocks { has, err := bm.committedBlocks.cache.hasIndexBlockID(block.FileName) if err != nil { - return nil, err + return nil, 0, err } if has { log.Debugf("index block %q already in cache, skipping", block.FileName) continue } ch <- block.FileName + totalSize += block.Length } close(ch) - return ch, nil + return ch, totalSize, nil } // Close closes the block manager. diff --git a/crypto_key_derivation.go b/crypto_key_derivation.go index 593a82858..704e263a5 100644 --- a/crypto_key_derivation.go +++ b/crypto_key_derivation.go @@ -6,20 +6,16 @@ "io" "golang.org/x/crypto/hkdf" - "golang.org/x/crypto/pbkdf2" "golang.org/x/crypto/scrypt" ) -// DefaultKeyDerivationAlgorithm is the key derivation algorithm for new configurations. -const DefaultKeyDerivationAlgorithm = "scrypt-65536-8-1" +// defaultKeyDerivationAlgorithm is the key derivation algorithm for new configurations. +const defaultKeyDerivationAlgorithm = "scrypt-65536-8-1" func (f formatBlock) deriveMasterKeyFromPassword(password string) ([]byte, error) { const masterKeySize = 32 switch f.KeyDerivationAlgorithm { - case "pbkdf2-sha256-100000": - return pbkdf2.Key([]byte(password), f.UniqueID, 100000, masterKeySize, sha256.New), nil - case "scrypt-65536-8-1": return scrypt.Key([]byte(password), f.UniqueID, 65536, 8, 1, masterKeySize) diff --git a/format_block.go b/format_block.go index 555e53e41..d4334f53a 100644 --- a/format_block.go +++ b/format_block.go @@ -13,6 +13,8 @@ "github.com/kopia/repo/storage" ) +const defaultFormatEncryption = "AES256_GCM" + // FormatBlockID is the identifier of a storage block that describes repository format. const FormatBlockID = "kopia.repository" diff --git a/initialize.go b/initialize.go index 4f5e254e0..281b33e28 100644 --- a/initialize.go +++ b/initialize.go @@ -17,22 +17,12 @@ BuildVersion = "v0-unofficial" ) -// DefaultEncryptionAlgorithm is the default algorithm for encrypting format block. -var DefaultEncryptionAlgorithm = "AES256_GCM" - -// SupportedEncryptionAlgorithms lists all supported algorithms for encrypting format block. -var SupportedEncryptionAlgorithms = []string{DefaultEncryptionAlgorithm, "NONE"} - // NewRepositoryOptions specifies options that apply to newly created repositories. // All fields are optional, when not provided, reasonable defaults will be used. type NewRepositoryOptions struct { - UniqueID []byte // force the use of particular unique ID - FormatEncryptionAlgorithm string // identifier of encryption algorithm - KeyDerivationAlgorithm string // identifier of key derivation algorithm - - BlockFormat block.FormattingOptions - DisableHMAC bool - + UniqueID []byte // force the use of particular unique ID + BlockFormat block.FormattingOptions + DisableHMAC bool ObjectFormat object.Format // object format } @@ -69,14 +59,20 @@ func Initialize(ctx context.Context, st storage.Storage, opt *NewRepositoryOptio } func formatBlockFromOptions(opt *NewRepositoryOptions) *formatBlock { - return &formatBlock{ + f := &formatBlock{ Tool: "https://github.com/kopia/kopia", BuildInfo: BuildInfo, - KeyDerivationAlgorithm: applyDefaultString(opt.KeyDerivationAlgorithm, DefaultKeyDerivationAlgorithm), + KeyDerivationAlgorithm: defaultKeyDerivationAlgorithm, UniqueID: applyDefaultRandomBytes(opt.UniqueID, 32), Version: "1", - EncryptionAlgorithm: applyDefaultString(opt.FormatEncryptionAlgorithm, DefaultEncryptionAlgorithm), + EncryptionAlgorithm: defaultFormatEncryption, } + + if opt.BlockFormat.Encryption == "NONE" { + f.EncryptionAlgorithm = "NONE" + } + + return f } func repositoryObjectFormatFromOptions(opt *NewRepositoryOptions) *repositoryObjectFormat { diff --git a/internal/repotesting/repotesting.go b/internal/repotesting/repotesting.go index b08994291..b7554ca8d 100644 --- a/internal/repotesting/repotesting.go +++ b/internal/repotesting/repotesting.go @@ -53,7 +53,6 @@ func (e *Environment) Setup(t *testing.T, opts ...func(*repo.NewRepositoryOption Splitter: "FIXED", MaxBlockSize: 400, }, - FormatEncryptionAlgorithm: "NONE", } for _, mod := range opts { From c35037601d141bae834958eb8129785bf6ce3af1 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 12 Jan 2019 09:34:48 -0800 Subject: [PATCH 62/74] repo: embed format block bytes in all pack indexes This allows format block to be recovered if any block indexes survives. The block is not encrypted, but has HMAC is followed by 2-byte size. --- block/block_manager.go | 17 ++++-- block/block_manager_test.go | 4 +- format_block.go | 99 +++++++++++++++++++++++++++++++ format_block_test.go | 72 ++++++++++++++++++++++ manifest/manifest_manager_test.go | 6 +- open.go | 37 ++++++++---- tests/stress_test/stress_test.go | 2 +- 7 files changed, 213 insertions(+), 24 deletions(-) create mode 100644 format_block_test.go diff --git a/block/block_manager.go b/block/block_manager.go index f1949baa4..6fd012342 100644 --- a/block/block_manager.go +++ b/block/block_manager.go @@ -23,8 +23,10 @@ "github.com/kopia/repo/storage" ) -var log = repologging.Logger("kopia/block") -var formatLog = repologging.Logger("kopia/block/format") +var ( + log = repologging.Logger("kopia/block") + formatLog = repologging.Logger("kopia/block/format") +) // PackBlockPrefix is the prefix for all pack storage blocks. const PackBlockPrefix = "p" @@ -84,6 +86,8 @@ type Manager struct { maxPreambleLength int paddingUnit int timeNow func() time.Time + + repositoryFormatBytes []byte } // DeleteBlock marks the given blockID as deleted. @@ -271,6 +275,8 @@ func (bm *Manager) flushPackIndexesLocked(ctx context.Context) error { return fmt.Errorf("unable to build pack index: %v", err) } + buf.Write(bm.repositoryFormatBytes) //nolint:errcheck + data := buf.Bytes() dataCopy := append([]byte(nil), data...) @@ -930,11 +936,11 @@ func listIndexBlocksFromStorage(ctx context.Context, st storage.Storage) ([]Inde } // NewManager creates new block manager with given packing options and a formatter. -func NewManager(ctx context.Context, st storage.Storage, f FormattingOptions, caching CachingOptions) (*Manager, error) { - return newManagerWithOptions(ctx, st, f, caching, time.Now) +func NewManager(ctx context.Context, st storage.Storage, f FormattingOptions, caching CachingOptions, repositoryFormatBytes []byte) (*Manager, error) { + return newManagerWithOptions(ctx, st, f, caching, time.Now, repositoryFormatBytes) } -func newManagerWithOptions(ctx context.Context, st storage.Storage, f FormattingOptions, caching CachingOptions, timeNow func() time.Time) (*Manager, error) { +func newManagerWithOptions(ctx context.Context, st storage.Storage, f FormattingOptions, caching CachingOptions, timeNow func() time.Time, repositoryFormatBytes []byte) (*Manager, error) { if f.Version < minSupportedReadVersion || f.Version > currentWriteVersion { return nil, fmt.Errorf("can't handle repositories created using version %v (min supported %v, max supported %v)", f.Version, minSupportedReadVersion, maxSupportedReadVersion) } @@ -975,6 +981,7 @@ func newManagerWithOptions(ctx context.Context, st storage.Storage, f Formatting blockCache: blockCache, listCache: listCache, st: st, + repositoryFormatBytes: repositoryFormatBytes, writeFormatVersion: int32(f.Version), closed: make(chan struct{}), diff --git a/block/block_manager_test.go b/block/block_manager_test.go index 9ec1b6356..e7dc1d825 100644 --- a/block/block_manager_test.go +++ b/block/block_manager_test.go @@ -276,7 +276,7 @@ func TestBlockManagerFailedToWritePack(t *testing.T) { MaxPackSize: maxPackSize, HMACSecret: []byte("foo"), MasterKey: []byte("0123456789abcdef0123456789abcdef"), - }, CachingOptions{}, fakeTimeNowFrozen(fakeTime)) + }, CachingOptions{}, fakeTimeNowFrozen(fakeTime), nil) if err != nil { t.Fatalf("can't create bm: %v", err) } @@ -790,7 +790,7 @@ func newTestBlockManager(data map[string][]byte, keyTime map[string]time.Time, t Encryption: "NONE", HMACSecret: hmacSecret, MaxPackSize: maxPackSize, - }, CachingOptions{}, timeFunc) + }, CachingOptions{}, timeFunc, nil) if err != nil { panic("can't create block manager: " + err.Error()) } diff --git a/format_block.go b/format_block.go index d4334f53a..ec1926387 100644 --- a/format_block.go +++ b/format_block.go @@ -5,8 +5,11 @@ "context" "crypto/aes" "crypto/cipher" + "crypto/hmac" "crypto/rand" + "crypto/sha256" "encoding/json" + "errors" "fmt" "io" @@ -15,12 +18,19 @@ const defaultFormatEncryption = "AES256_GCM" +const ( + maxChecksummedFormatBytesLength = 65000 + formatBlockChecksumSize = sha256.Size +) + // FormatBlockID is the identifier of a storage block that describes repository format. const FormatBlockID = "kopia.repository" var ( purposeAESKey = []byte("AES") purposeAuthData = []byte("CHECKSUM") + + errFormatBlockNotFound = errors.New("format block not found") ) type formatBlock struct { @@ -52,6 +62,78 @@ func parseFormatBlock(b []byte) (*formatBlock, error) { return f, nil } +// RecoverFormatBlock attempts to recover format block replica from the specified file. +// The format block can be either the prefix or a suffix of the given file. +func RecoverFormatBlock(ctx context.Context, st storage.Storage, filename string) ([]byte, error) { + var foundMetadata storage.BlockMetadata + + if err := st.ListBlocks(ctx, filename, func(bm storage.BlockMetadata) error { + if foundMetadata.BlockID != "" { + return fmt.Errorf("found multiple blocks with a given prefix: %v", filename) + } + foundMetadata = bm + return nil + }); err != nil { + return nil, fmt.Errorf("error: %v", err) + } + + if foundMetadata.BlockID == "" { + return nil, storage.ErrBlockNotFound + } + + return recoverFormatBlockWithLength(ctx, st, foundMetadata.BlockID, foundMetadata.Length) +} + +func recoverFormatBlockWithLength(ctx context.Context, st storage.Storage, filename string, length int64) ([]byte, error) { + chunkLength := int64(65536) + if chunkLength > length { + chunkLength = length + } + + if chunkLength > 4 { + + // try prefix + prefixChunk, err := st.GetBlock(ctx, filename, 0, chunkLength) + if err != nil { + return nil, err + } + if l := int(prefixChunk[0]) + int(prefixChunk[1])<<8; l <= maxChecksummedFormatBytesLength && l+2 < len(prefixChunk) { + if b, ok := verifyFormatBlockChecksum(prefixChunk[2 : 2+l]); ok { + return b, nil + } + } + + // try the suffix + suffixChunk, err := st.GetBlock(ctx, filename, length-chunkLength, chunkLength) + if err != nil { + return nil, err + } + if l := int(suffixChunk[len(suffixChunk)-2]) + int(suffixChunk[len(suffixChunk)-1])<<8; l <= maxChecksummedFormatBytesLength && l+2 < len(suffixChunk) { + if b, ok := verifyFormatBlockChecksum(suffixChunk[len(suffixChunk)-2-l : len(suffixChunk)-2]); ok { + return b, nil + } + } + } + + return nil, errFormatBlockNotFound +} + +func verifyFormatBlockChecksum(b []byte) ([]byte, bool) { + if len(b) < formatBlockChecksumSize { + return nil, false + } + + data, checksum := b[0:len(b)-formatBlockChecksumSize], b[len(b)-formatBlockChecksumSize:] + h := sha256.New() + h.Write(data) //nolint:errcheck + actualChecksum := h.Sum(nil) + if !hmac.Equal(actualChecksum, checksum) { + return nil, false + } + + return data, true +} + func writeFormatBlock(ctx context.Context, st storage.Storage, f *formatBlock) error { var buf bytes.Buffer e := json.NewEncoder(&buf) @@ -152,3 +234,20 @@ func encryptFormatBytes(f *formatBlock, format *repositoryObjectFormat, masterKe return fmt.Errorf("unknown encryption algorithm: '%v'", f.EncryptionAlgorithm) } } + +func addFormatBlockChecksumAndLength(fb []byte) ([]byte, error) { + h := sha256.New() + h.Write(fb) //nolint:errcheck + checksummedFormatBytes := h.Sum(fb) + + l := len(checksummedFormatBytes) + if l > maxChecksummedFormatBytesLength { + return nil, fmt.Errorf("format block too big: %v", l) + } + + // return + result := append([]byte(nil), byte(l), byte(l>>8)) + result = append(result, checksummedFormatBytes...) + result = append(result, byte(l), byte(l>>8)) + return result, nil +} diff --git a/format_block_test.go b/format_block_test.go new file mode 100644 index 000000000..ddcd7d9a2 --- /dev/null +++ b/format_block_test.go @@ -0,0 +1,72 @@ +package repo + +import ( + "context" + "crypto/sha256" + "reflect" + "testing" + + "github.com/kopia/repo/internal/storagetesting" + "github.com/kopia/repo/storage" +) + +func TestFormatBlockRecovery(t *testing.T) { + data := map[string][]byte{} + st := storagetesting.NewMapStorage(data, nil, nil) + ctx := context.Background() + + someDataBlock := []byte("aadsdasdas") + checksummed, err := addFormatBlockChecksumAndLength(someDataBlock) + if err != nil { + t.Errorf("error appending checksum: %v", err) + } + if got, want := len(checksummed), 2+2+sha256.Size+len(someDataBlock); got != want { + t.Errorf("unexpected checksummed length: %v, want %v", got, want) + } + + st.PutBlock(ctx, "some-block-by-itself", checksummed) + st.PutBlock(ctx, "some-block-suffix", append(append([]byte(nil), 1, 2, 3), checksummed...)) + st.PutBlock(ctx, "some-block-prefix", append(append([]byte(nil), checksummed...), 1, 2, 3)) + + // mess up checksum + checksummed[len(checksummed)-3] ^= 1 + st.PutBlock(ctx, "bad-checksum", checksummed) + st.PutBlock(ctx, "zero-len", []byte{}) + st.PutBlock(ctx, "one-len", []byte{1}) + st.PutBlock(ctx, "two-len", []byte{1, 2}) + st.PutBlock(ctx, "three-len", []byte{1, 2, 3}) + st.PutBlock(ctx, "four-len", []byte{1, 2, 3, 4}) + st.PutBlock(ctx, "five-len", []byte{1, 2, 3, 4, 5}) + + cases := []struct { + block string + err error + }{ + {"some-block-by-itself", nil}, + {"some-block-suffix", nil}, + {"some-block-prefix", nil}, + {"bad-checksum", errFormatBlockNotFound}, + {"no-such-block", storage.ErrBlockNotFound}, + {"zero-len", errFormatBlockNotFound}, + {"one-len", errFormatBlockNotFound}, + {"two-len", errFormatBlockNotFound}, + {"three-len", errFormatBlockNotFound}, + {"four-len", errFormatBlockNotFound}, + {"five-len", errFormatBlockNotFound}, + } + + for _, tc := range cases { + t.Run(tc.block, func(t *testing.T) { + v, err := RecoverFormatBlock(ctx, st, tc.block) + if tc.err == nil { + if !reflect.DeepEqual(v, someDataBlock) || err != nil { + t.Errorf("unexpected result or error: v=%v err=%v, expected success", v, err) + } + } else { + if v != nil || err != tc.err { + t.Errorf("unexpected result or error: v=%v err=%v, expected %v", v, err, tc.err) + } + } + }) + } +} diff --git a/manifest/manifest_manager_test.go b/manifest/manifest_manager_test.go index 61a83f27d..685ec0c85 100644 --- a/manifest/manifest_manager_test.go +++ b/manifest/manifest_manager_test.go @@ -135,7 +135,7 @@ func TestManifestInitCorruptedBlock(t *testing.T) { } // write some data to storage - bm, err := block.NewManager(ctx, st, f, block.CachingOptions{}) + bm, err := block.NewManager(ctx, st, f, block.CachingOptions{}, nil) if err != nil { t.Fatalf("err: %v", err) } @@ -159,7 +159,7 @@ func TestManifestInitCorruptedBlock(t *testing.T) { } // make a new block manager based on corrupted data. - bm, err = block.NewManager(ctx, st, f, block.CachingOptions{}) + bm, err = block.NewManager(ctx, st, f, block.CachingOptions{}, nil) if err != nil { t.Fatalf("err: %v", err) } @@ -270,7 +270,7 @@ func newManagerForTesting(ctx context.Context, t *testing.T, data map[string][]b Hash: "HMAC-SHA256-128", Encryption: "NONE", MaxPackSize: 100000, - }, block.CachingOptions{}) + }, block.CachingOptions{}, nil) if err != nil { return nil, fmt.Errorf("can't create block manager: %v", err) } diff --git a/open.go b/open.go index c30723683..ce9080ecf 100644 --- a/open.go +++ b/open.go @@ -15,7 +15,9 @@ "github.com/kopia/repo/storage/logging" ) -var log = repologging.Logger("kopia/repo") +var ( + log = repologging.Logger("kopia/repo") +) // Options provides configuration parameters for connection to a repository. type Options struct { @@ -75,11 +77,21 @@ func Open(ctx context.Context, configFile string, password string, options *Opti func OpenWithConfig(ctx context.Context, st storage.Storage, lc *LocalConfig, password string, options *Options, caching block.CachingOptions) (*Repository, error) { log.Debugf("reading encrypted format block") // Read cache block, potentially from cache. - f, err := readAndCacheFormatBlock(ctx, st, caching.CacheDirectory) + fb, err := readAndCacheFormatBlockBytes(ctx, st, caching.CacheDirectory) if err != nil { return nil, fmt.Errorf("unable to read format block: %v", err) } + f, err := parseFormatBlock(fb) + if err != nil { + return nil, fmt.Errorf("can't parse format block: %v", err) + } + + fb, err = addFormatBlockChecksumAndLength(fb) + if err != nil { + return nil, fmt.Errorf("unable to add checksum") + } + masterKey, err := f.deriveMasterKeyFromPassword(password) if err != nil { return nil, err @@ -98,7 +110,7 @@ func OpenWithConfig(ctx context.Context, st storage.Storage, lc *LocalConfig, pa } log.Debugf("initializing block manager") - bm, err := block.NewManager(ctx, st, fo, caching) + bm, err := block.NewManager(ctx, st, fo, caching, fb) if err != nil { return nil, fmt.Errorf("unable to open block manager: %v", err) } @@ -145,11 +157,16 @@ func SetCachingConfig(ctx context.Context, configFile string, opt block.CachingO return fmt.Errorf("cannot open storage: %v", err) } - f, err := readAndCacheFormatBlock(ctx, st, "") + fb, err := readAndCacheFormatBlockBytes(ctx, st, "") if err != nil { return fmt.Errorf("can't read format block: %v", err) } + f, err := parseFormatBlock(fb) + if err != nil { + return fmt.Errorf("can't parse format block: %v", err) + } + if err = setupCaching(configFile, lc, opt, f.UniqueID); err != nil { return fmt.Errorf("unable to set up caching: %v", err) } @@ -166,13 +183,13 @@ func SetCachingConfig(ctx context.Context, configFile string, opt block.CachingO return nil } -func readAndCacheFormatBlock(ctx context.Context, st storage.Storage, cacheDirectory string) (*formatBlock, error) { +func readAndCacheFormatBlockBytes(ctx context.Context, st storage.Storage, cacheDirectory string) ([]byte, error) { cachedFile := filepath.Join(cacheDirectory, "kopia.repository") if cacheDirectory != "" { b, err := ioutil.ReadFile(cachedFile) if err == nil { // read from cache. - return parseFormatBlock(b) + return b, nil } } @@ -181,17 +198,11 @@ func readAndCacheFormatBlock(ctx context.Context, st storage.Storage, cacheDirec return nil, err } - // block successfully read from storage. - f, err := parseFormatBlock(b) - if err != nil { - return nil, err - } - if cacheDirectory != "" { if err := ioutil.WriteFile(cachedFile, b, 0600); err != nil { log.Warningf("warning: unable to write cache: %v", err) } } - return f, nil + return b, nil } diff --git a/tests/stress_test/stress_test.go b/tests/stress_test/stress_test.go index 0bc31b268..0b8a1b399 100644 --- a/tests/stress_test/stress_test.go +++ b/tests/stress_test/stress_test.go @@ -43,7 +43,7 @@ func stressTestWithStorage(t *testing.T, st storage.Storage, duration time.Durat Encryption: "AES-256-CTR", MaxPackSize: 20000000, MasterKey: []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - }, block.CachingOptions{}) + }, block.CachingOptions{}, nil) } seed0 := time.Now().Nanosecond() From a01b0359dcb5edd38bd2e0707988b248c1410be5 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 14 Jan 2019 06:55:49 -0800 Subject: [PATCH 63/74] added hmac to format block embedded in pack indexes --- format_block.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/format_block.go b/format_block.go index ec1926387..4f5b6381a 100644 --- a/format_block.go +++ b/format_block.go @@ -23,6 +23,11 @@ formatBlockChecksumSize = sha256.Size ) +// formatBlockChecksumSecret is a HMAC secret used for checksumming the format block. +// It's not really a secret, but will provide positive identification of blocks that +// are repository format blocks. +var formatBlockChecksumSecret = []byte("kopia-repository") + // FormatBlockID is the identifier of a storage block that describes repository format. const FormatBlockID = "kopia.repository" @@ -124,7 +129,7 @@ func verifyFormatBlockChecksum(b []byte) ([]byte, bool) { } data, checksum := b[0:len(b)-formatBlockChecksumSize], b[len(b)-formatBlockChecksumSize:] - h := sha256.New() + h := hmac.New(sha256.New, formatBlockChecksumSecret) h.Write(data) //nolint:errcheck actualChecksum := h.Sum(nil) if !hmac.Equal(actualChecksum, checksum) { @@ -236,7 +241,7 @@ func encryptFormatBytes(f *formatBlock, format *repositoryObjectFormat, masterKe } func addFormatBlockChecksumAndLength(fb []byte) ([]byte, error) { - h := sha256.New() + h := hmac.New(sha256.New, formatBlockChecksumSecret) h.Write(fb) //nolint:errcheck checksummedFormatBytes := h.Sum(fb) From 3377e403a5e59d1c04450af80d752ba381191f92 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 19 Jan 2019 05:27:19 -0500 Subject: [PATCH 64/74] repo: moved recovery block from index blocks to pack blocks, with encrypted repositories index blocks are encrypted, which defeats the purpose of recovery blocks, but pack blocks contain individually-encrypted sub-blocks --- block/block_manager.go | 4 +--- format_block.go | 7 ++++++- format_block_test.go | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/block/block_manager.go b/block/block_manager.go index 6fd012342..bc417fb9e 100644 --- a/block/block_manager.go +++ b/block/block_manager.go @@ -275,8 +275,6 @@ func (bm *Manager) flushPackIndexesLocked(ctx context.Context) error { return fmt.Errorf("unable to build pack index: %v", err) } - buf.Write(bm.repositoryFormatBytes) //nolint:errcheck - data := buf.Bytes() dataCopy := append([]byte(nil), data...) @@ -345,7 +343,7 @@ func (bm *Manager) writePackBlockLocked(ctx context.Context) error { func (bm *Manager) preparePackDataBlock(packFile string) ([]byte, packIndexBuilder, error) { formatLog.Debugf("preparing block data with %v items", len(bm.currentPackItems)) - blockData, err := appendRandomBytes(nil, rand.Intn(bm.maxPreambleLength-bm.minPreambleLength+1)+bm.minPreambleLength) + blockData, err := appendRandomBytes(append([]byte(nil), bm.repositoryFormatBytes...), rand.Intn(bm.maxPreambleLength-bm.minPreambleLength+1)+bm.minPreambleLength) if err != nil { return nil, nil, fmt.Errorf("unable to prepare block preamble: %v", err) } diff --git a/format_block.go b/format_block.go index 4f5b6381a..7900df737 100644 --- a/format_block.go +++ b/format_block.go @@ -69,7 +69,12 @@ func parseFormatBlock(b []byte) (*formatBlock, error) { // RecoverFormatBlock attempts to recover format block replica from the specified file. // The format block can be either the prefix or a suffix of the given file. -func RecoverFormatBlock(ctx context.Context, st storage.Storage, filename string) ([]byte, error) { +// optionally the length can be provided (if known) to speed up recovery. +func RecoverFormatBlock(ctx context.Context, st storage.Storage, filename string, optionalLength int64) ([]byte, error) { + if optionalLength > 0 { + return recoverFormatBlockWithLength(ctx, st, filename, optionalLength) + } + var foundMetadata storage.BlockMetadata if err := st.ListBlocks(ctx, filename, func(bm storage.BlockMetadata) error { diff --git a/format_block_test.go b/format_block_test.go index ddcd7d9a2..e6b09711d 100644 --- a/format_block_test.go +++ b/format_block_test.go @@ -57,7 +57,7 @@ func TestFormatBlockRecovery(t *testing.T) { for _, tc := range cases { t.Run(tc.block, func(t *testing.T) { - v, err := RecoverFormatBlock(ctx, st, tc.block) + v, err := RecoverFormatBlock(ctx, st, tc.block, -1) if tc.err == nil { if !reflect.DeepEqual(v, someDataBlock) || err != nil { t.Errorf("unexpected result or error: v=%v err=%v, expected success", v, err) From 1803057657e909403c17b1fc2c423edc7e4ec754 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 21 Jan 2019 13:16:17 -0800 Subject: [PATCH 65/74] manifest: removed legacy uncompressed JSON support --- manifest/manifest_manager.go | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/manifest/manifest_manager.go b/manifest/manifest_manager.go index ef89bf57b..10634f621 100644 --- a/manifest/manifest_manager.go +++ b/manifest/manifest_manager.go @@ -392,19 +392,13 @@ func (m *Manager) loadManifestBlock(ctx context.Context, blockID string) (manife return man, err } - if len(blk) > 2 && blk[0] == '{' { - if err := json.Unmarshal(blk, &man); err != nil { - return man, fmt.Errorf("unable to parse block %q: %v", blockID, err) - } - } else { - gz, err := gzip.NewReader(bytes.NewReader(blk)) - if err != nil { - return man, fmt.Errorf("unable to unpack block %q: %v", blockID, err) - } + gz, err := gzip.NewReader(bytes.NewReader(blk)) + if err != nil { + return man, fmt.Errorf("unable to unpack block %q: %v", blockID, err) + } - if err := json.NewDecoder(gz).Decode(&man); err != nil { - return man, fmt.Errorf("unable to parse block %q: %v", blockID, err) - } + if err := json.NewDecoder(gz).Decode(&man); err != nil { + return man, fmt.Errorf("unable to parse block %q: %v", blockID, err) } return man, nil From 5f2de971aef1a411e61ec53be7e94eab8409e651 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 21 Jan 2019 13:30:12 -0800 Subject: [PATCH 66/74] block: switched default hash and encryption to BLAKE2B-256-128 and SALSA20 respectively, as those are significantly faster than SHA2 and AES256 --- block/block_formatter.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/block/block_formatter.go b/block/block_formatter.go index 23a46fe41..27bd89b1c 100644 --- a/block/block_formatter.go +++ b/block/block_formatter.go @@ -13,8 +13,6 @@ "golang.org/x/crypto/blake2s" "golang.org/x/crypto/salsa20" "golang.org/x/crypto/sha3" - - "golang.org/x/crypto/ripemd160" ) // HashFunc computes hash of block of data using a cryptographic hash function, possibly with HMAC and/or truncation. @@ -171,16 +169,15 @@ func RegisterEncryption(name string, newEncryptor EncryptorFactory) { } // DefaultHash is the name of the default hash algorithm. -const DefaultHash = "HMAC-SHA256-128" +const DefaultHash = "BLAKE2B-256-128" // DefaultEncryption is the name of the default encryption algorithm. -const DefaultEncryption = "AES-256-CTR" +const DefaultEncryption = "SALSA20" func init() { RegisterHash("HMAC-SHA256", truncatedHMACHashFuncFactory(sha256.New, 32)) RegisterHash("HMAC-SHA256-128", truncatedHMACHashFuncFactory(sha256.New, 16)) RegisterHash("HMAC-SHA224", truncatedHMACHashFuncFactory(sha256.New224, 28)) - RegisterHash("HMAC-RIPEMD-160", truncatedHMACHashFuncFactory(ripemd160.New, 20)) RegisterHash("HMAC-SHA3-224", truncatedHMACHashFuncFactory(sha3.New224, 28)) RegisterHash("HMAC-SHA3-256", truncatedHMACHashFuncFactory(sha3.New256, 32)) From 6d597ce941e838c9cdb67ffc3c9bdb00c0ac3d11 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Wed, 20 Mar 2019 08:10:02 -0700 Subject: [PATCH 67/74] travis: updated to go 1.12 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ada3a2350..34abfcbcb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: go go: -- '1.11.x' +- '1.12.x' os: - linux before_install: From bdafe117d904fc98642ad476f4f293c06b57eb9a Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 1 Apr 2019 19:04:06 -0700 Subject: [PATCH 68/74] Makefile: switched linter to golangci-lint and updated goveralls setup fixed lint errors & removed .gometalinter config --- .gitignore | 1 + .gometalinter.json | 7 --- Makefile | 31 ++++++++++--- block/block_cache_test.go | 45 ++++++++++++------- block/block_index_recovery_test.go | 4 +- block/block_manager.go | 18 ++------ block/block_manager_test.go | 22 ++++----- block/merged_test.go | 4 +- block/packindex_test.go | 15 ++++--- connect.go | 4 +- format_block_test.go | 27 ++++++----- internal/retry/retry_test.go | 3 +- manifest/manifest_manager_test.go | 2 +- object/indirect.go | 2 - object/object_manager_test.go | 22 +++++---- object/object_writer.go | 3 +- object/objectid_test.go | 2 - repository_test.go | 16 ++++--- storage/filesystem/filesystem_storage_test.go | 21 ++++++--- storage/s3/s3_storage_test.go | 7 +-- storage/storage_test.go | 8 ++-- storage/webdav/webdav_storage_test.go | 2 +- .../repository_stress_test.go | 13 +++--- 23 files changed, 157 insertions(+), 122 deletions(-) delete mode 100644 .gometalinter.json diff --git a/.gitignore b/.gitignore index 3ff849040..6e6b5a506 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ *.cov *service_account.json +.tools/ diff --git a/.gometalinter.json b/.gometalinter.json deleted file mode 100644 index 5c27fe988..000000000 --- a/.gometalinter.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "Disable": ["maligned","gas","gosec"], - "Exclude": [ - ".+_test\\.go" - ], - "Deadline": "120s" -} \ No newline at end of file diff --git a/Makefile b/Makefile index a0982eeb8..d06fd3717 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +LINTER_TOOL=.tools/bin/golangci-lint +GOVERALLS_TOOL=.tools/bin/goveralls + all: test lint travis: build-all test upload-coverage @@ -10,24 +13,40 @@ setup: travis-setup: GO111MODULE=off go get github.com/mattn/goveralls -lint: - gometalinter.v2 ./... +lint: $(LINTER_TOOL) + $(LINTER_TOOL) run -build-all: - # this downloads all dependencies for all OS/architectures and updates go.mod - # TODO(jkowalski): parallelize this once we're on 1.12 +$(LINTER_TOOL): + mkdir -p .tools + curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b .tools/bin/ v1.16.0 + +$(GOVERALLS_TOOL): + mkdir -p .tools + GO111MODULE=off GOPATH=$(CURDIR)/.tools go get github.com/mattn/goveralls + +build-linux-amd64: CGO_ENABLED=0 GO111MODULE=on GOARCH=amd64 GOOS=linux go build ./... + +build-windows-amd64: CGO_ENABLED=0 GO111MODULE=on GOARCH=amd64 GOOS=windows go build ./... + +build-darwin-amd64: CGO_ENABLED=0 GO111MODULE=on GOARCH=amd64 GOOS=darwin go build ./... + +build-linux-arm: CGO_ENABLED=0 GO111MODULE=on GOARCH=arm GOOS=linux go build ./... + +build-linux-arm64: CGO_ENABLED=0 GO111MODULE=on GOARCH=arm64 GOOS=linux go build ./... +build-all: build-linux-amd64 build-windows-amd64 build-darwin-amd64 build-linux-arm build-linux-arm64 + test: GO111MODULE=on go test -tags test -count=1 -coverprofile=raw.cov --coverpkg ./... -timeout 90s ./... grep -v testing/ raw.cov > tmp.cov upload-coverage: - goveralls -service=travis-ci -coverprofile=tmp.cov + $(GOVERALLS_TOOL) -service=travis-ci -coverprofile=tmp.cov coverage-html: go tool cover -html=tmp.cov diff --git a/block/block_cache_test.go b/block/block_cache_test.go index ce9a54677..485b24e4c 100644 --- a/block/block_cache_test.go +++ b/block/block_cache_test.go @@ -17,12 +17,12 @@ "github.com/kopia/repo/storage" ) -func newUnderlyingStorageForBlockCacheTesting() storage.Storage { +func newUnderlyingStorageForBlockCacheTesting(t *testing.T) storage.Storage { ctx := context.Background() data := map[string][]byte{} st := storagetesting.NewMapStorage(data, nil, nil) - st.PutBlock(ctx, "block-1", []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) - st.PutBlock(ctx, "block-4k", bytes.Repeat([]byte{1, 2, 3, 4}, 1000)) // 4000 bytes + assertNoError(t, st.PutBlock(ctx, "block-1", []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10})) + assertNoError(t, st.PutBlock(ctx, "block-4k", bytes.Repeat([]byte{1, 2, 3, 4}, 1000))) // 4000 bytes return st } @@ -30,7 +30,7 @@ func TestCacheExpiration(t *testing.T) { cacheData := map[string][]byte{} cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil) - underlyingStorage := newUnderlyingStorageForBlockCacheTesting() + underlyingStorage := newUnderlyingStorageForBlockCacheTesting(t) cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, cacheStorage, CachingOptions{ MaxCacheSizeBytes: 10000, @@ -41,10 +41,14 @@ func TestCacheExpiration(t *testing.T) { defer cache.close() ctx := context.Background() - cache.getContentBlock(ctx, "00000a", "block-4k", 0, -1) // 4k - cache.getContentBlock(ctx, "00000b", "block-4k", 0, -1) // 4k - cache.getContentBlock(ctx, "00000c", "block-4k", 0, -1) // 4k - cache.getContentBlock(ctx, "00000d", "block-4k", 0, -1) // 4k + _, err = cache.getContentBlock(ctx, "00000a", "block-4k", 0, -1) // 4k + assertNoError(t, err) + _, err = cache.getContentBlock(ctx, "00000b", "block-4k", 0, -1) // 4k + assertNoError(t, err) + _, err = cache.getContentBlock(ctx, "00000c", "block-4k", 0, -1) // 4k + assertNoError(t, err) + _, err = cache.getContentBlock(ctx, "00000d", "block-4k", 0, -1) // 4k + assertNoError(t, err) // wait for a sweep time.Sleep(2 * time.Second) @@ -52,7 +56,7 @@ func TestCacheExpiration(t *testing.T) { // 00000a and 00000b will be removed from cache because it's the oldest. // to verify, let's remove block-4k from the underlying storage and make sure we can still read // 00000c and 00000d from the cache but not 00000a nor 00000b - underlyingStorage.DeleteBlock(ctx, "block-4k") + assertNoError(t, underlyingStorage.DeleteBlock(ctx, "block-4k")) cases := []struct { block string @@ -83,7 +87,7 @@ func TestDiskBlockCache(t *testing.T) { } defer os.RemoveAll(tmpDir) - cache, err := newBlockCache(ctx, newUnderlyingStorageForBlockCacheTesting(), CachingOptions{ + cache, err := newBlockCache(ctx, newUnderlyingStorageForBlockCacheTesting(t), CachingOptions{ MaxCacheSizeBytes: 10000, CacheDirectory: tmpDir, }) @@ -161,7 +165,7 @@ func TestCacheFailureToOpen(t *testing.T) { cacheData := map[string][]byte{} cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil) - underlyingStorage := newUnderlyingStorageForBlockCacheTesting() + underlyingStorage := newUnderlyingStorageForBlockCacheTesting(t) faultyCache := &storagetesting.FaultyStorage{ Base: cacheStorage, Faults: map[string][]*storagetesting.Fault{ @@ -172,7 +176,7 @@ func TestCacheFailureToOpen(t *testing.T) { } // Will fail because of ListBlocks failure. - cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{ + _, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{ MaxCacheSizeBytes: 10000, }, 0, 5*time.Hour) if err == nil || !strings.Contains(err.Error(), someError.Error()) { @@ -180,7 +184,7 @@ func TestCacheFailureToOpen(t *testing.T) { } // ListBlocks fails only once, next time it succeeds. - cache, err = newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{ + cache, err := newBlockCacheWithCacheStorage(context.Background(), underlyingStorage, faultyCache, CachingOptions{ MaxCacheSizeBytes: 10000, }, 0, 100*time.Millisecond) if err != nil { @@ -195,7 +199,7 @@ func TestCacheFailureToWrite(t *testing.T) { cacheData := map[string][]byte{} cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil) - underlyingStorage := newUnderlyingStorageForBlockCacheTesting() + underlyingStorage := newUnderlyingStorageForBlockCacheTesting(t) faultyCache := &storagetesting.FaultyStorage{ Base: cacheStorage, } @@ -239,7 +243,7 @@ func TestCacheFailureToRead(t *testing.T) { cacheData := map[string][]byte{} cacheStorage := storagetesting.NewMapStorage(cacheData, nil, nil) - underlyingStorage := newUnderlyingStorageForBlockCacheTesting() + underlyingStorage := newUnderlyingStorageForBlockCacheTesting(t) faultyCache := &storagetesting.FaultyStorage{ Base: cacheStorage, } @@ -275,13 +279,20 @@ func TestCacheFailureToRead(t *testing.T) { func verifyStorageBlockList(t *testing.T, st storage.Storage, expectedBlocks ...string) { t.Helper() var foundBlocks []string - st.ListBlocks(context.Background(), "", func(bm storage.BlockMetadata) error { + assertNoError(t, st.ListBlocks(context.Background(), "", func(bm storage.BlockMetadata) error { foundBlocks = append(foundBlocks, bm.BlockID) return nil - }) + })) sort.Strings(foundBlocks) if !reflect.DeepEqual(foundBlocks, expectedBlocks) { t.Errorf("unexpected block list: %v, wanted %v", foundBlocks, expectedBlocks) } } + +func assertNoError(t *testing.T, err error) { + t.Helper() + if err != nil { + t.Errorf("err: %v", err) + } +} diff --git a/block/block_index_recovery_test.go b/block/block_index_recovery_test.go index d42cdc672..287d81af1 100644 --- a/block/block_index_recovery_test.go +++ b/block/block_index_recovery_test.go @@ -22,10 +22,10 @@ func TestBlockIndexRecovery(t *testing.T) { } // delete all index blocks - bm.st.ListBlocks(ctx, newIndexBlockPrefix, func(bi storage.BlockMetadata) error { + assertNoError(t, bm.st.ListBlocks(ctx, newIndexBlockPrefix, func(bi storage.BlockMetadata) error { log.Debugf("deleting %v", bi.BlockID) return bm.st.DeleteBlock(ctx, bi.BlockID) - }) + })) // now with index blocks gone, all blocks appear to not be found bm = newTestBlockManager(data, keyTime, nil) diff --git a/block/block_manager.go b/block/block_manager.go index bc417fb9e..1290959ae 100644 --- a/block/block_manager.go +++ b/block/block_manager.go @@ -5,7 +5,6 @@ "bytes" "context" "crypto/aes" - "crypto/cipher" cryptorand "crypto/rand" "encoding/hex" "fmt" @@ -731,17 +730,14 @@ func (bm *Manager) getBlockInfo(blockID string) (Info, error) { func (bm *Manager) BlockInfo(ctx context.Context, blockID string) (Info, error) { bi, err := bm.getBlockInfo(blockID) if err != nil { + log.Debugf("BlockInfo(%q) - error %v", err) return Info{}, err } - if err == nil { - if bi.Deleted { - log.Debugf("BlockInfo(%q) - deleted", blockID) - } else { - log.Debugf("BlockInfo(%q) - exists in %v", blockID, bi.PackFile) - } + if bi.Deleted { + log.Debugf("BlockInfo(%q) - deleted", blockID) } else { - log.Debugf("BlockInfo(%q) - error %v", err) + log.Debugf("BlockInfo(%q) - exists in %v", blockID, bi.PackFile) } return bi, err @@ -1041,9 +1037,3 @@ func createEncryptor(f FormattingOptions) (Encryptor, error) { return e(f) } - -func curryEncryptionKey(n func(k []byte) (cipher.Block, error), key []byte) func() (cipher.Block, error) { - return func() (cipher.Block, error) { - return n(key) - } -} diff --git a/block/block_manager_test.go b/block/block_manager_test.go index e7dc1d825..ecf71de6d 100644 --- a/block/block_manager_test.go +++ b/block/block_manager_test.go @@ -438,7 +438,7 @@ func TestRewriteNonDeleted(t *testing.T) { block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100)) applyStep(action1) - bm.RewriteBlock(ctx, block1) + assertNoError(t, bm.RewriteBlock(ctx, block1)) applyStep(action2) verifyBlock(ctx, t, bm, block1, seededRandomData(10, 100)) dumpBlockManagerData(t, data) @@ -500,9 +500,9 @@ func TestRewriteDeleted(t *testing.T) { block1 := writeBlockAndVerify(ctx, t, bm, seededRandomData(10, 100)) applyStep(action1) - bm.DeleteBlock(block1) + assertNoError(t, bm.DeleteBlock(block1)) applyStep(action2) - bm.RewriteBlock(ctx, block1) + assertNoError(t, bm.RewriteBlock(ctx, block1)) applyStep(action3) verifyBlockNotFound(ctx, t, bm, block1) dumpBlockManagerData(t, data) @@ -537,12 +537,12 @@ func TestDeleteAndRecreate(t *testing.T) { // delete but at given timestamp but don't commit yet. bm0 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(tc.deletionTime, 1*time.Second)) - bm0.DeleteBlock(block1) + assertNoError(t, bm0.DeleteBlock(block1)) // delete it at t0+10 bm1 := newTestBlockManager(data, keyTime, fakeTimeNowWithAutoAdvance(fakeTime.Add(10*time.Second), 1*time.Second)) verifyBlock(ctx, t, bm1, block1, seededRandomData(10, 100)) - bm1.DeleteBlock(block1) + assertNoError(t, bm1.DeleteBlock(block1)) bm1.Flush(ctx) // recreate at t0+20 @@ -591,12 +591,12 @@ func TestFindUnreferencedStorageFiles(t *testing.T) { // block still present in first pack verifyUnreferencedStorageFilesCount(ctx, t, bm, 0) - bm.RewriteBlock(ctx, blockID) + assertNoError(t, bm.RewriteBlock(ctx, blockID)) if err := bm.Flush(ctx); err != nil { t.Errorf("flush error: %v", err) } verifyUnreferencedStorageFilesCount(ctx, t, bm, 1) - bm.RewriteBlock(ctx, blockID) + assertNoError(t, bm.RewriteBlock(ctx, blockID)) if err := bm.Flush(ctx); err != nil { t.Errorf("flush error: %v", err) } @@ -732,7 +732,7 @@ func verifyVersionCompat(t *testing.T, writeVersion int) { cnt := 0 for blockID := range dataSet { t.Logf("deleting %v", blockID) - mgr.DeleteBlock(blockID) + assertNoError(t, mgr.DeleteBlock(blockID)) delete(dataSet, blockID) cnt++ if cnt >= 3 { @@ -883,7 +883,7 @@ func seededRandomData(seed int, length int) []byte { func hashValue(b []byte) string { h := hmac.New(sha256.New, hmacSecret) - h.Write(b) + h.Write(b) //nolint:errcheck return hex.EncodeToString(h.Sum(nil)) } @@ -894,10 +894,10 @@ func dumpBlockManagerData(t *testing.T, data map[string][]byte) { ndx, err := openPackIndex(bytes.NewReader(v)) if err == nil { t.Logf("index %v (%v bytes)", k, len(v)) - ndx.Iterate("", func(i Info) error { + assertNoError(t, ndx.Iterate("", func(i Info) error { t.Logf(" %+v\n", i) return nil - }) + })) } } else { diff --git a/block/merged_test.go b/block/merged_test.go index 0c7127ce3..58da5a2c0 100644 --- a/block/merged_test.go +++ b/block/merged_test.go @@ -45,7 +45,7 @@ func TestMerged(t *testing.T) { } var inOrder []string - m.Iterate("", func(i Info) error { + assertNoError(t, m.Iterate("", func(i Info) error { inOrder = append(inOrder, i.BlockID) if i.BlockID == "de1e1e" { if i.Deleted { @@ -53,7 +53,7 @@ func TestMerged(t *testing.T) { } } return nil - }) + })) if i, err := m.GetInfo("de1e1e"); err != nil { t.Errorf("error getting deleted block info: %v", err) diff --git a/block/packindex_test.go b/block/packindex_test.go index 88fd5e3fe..3a4d5c658 100644 --- a/block/packindex_test.go +++ b/block/packindex_test.go @@ -139,14 +139,14 @@ func TestPackIndex(t *testing.T) { } cnt := 0 - ndx.Iterate("", func(info2 Info) error { + assertNoError(t, ndx.Iterate("", func(info2 Info) error { info := infoMap[info2.BlockID] if !reflect.DeepEqual(info, info2) { t.Errorf("invalid value retrieved: %+v, wanted %+v", info2, info) } cnt++ return nil - }) + })) if cnt != len(infoMap) { t.Errorf("invalid number of iterations: %v, wanted %v", cnt, len(infoMap)) } @@ -166,13 +166,13 @@ func TestPackIndex(t *testing.T) { for _, prefix := range prefixes { cnt2 := 0 - ndx.Iterate(string(prefix), func(info2 Info) error { + assertNoError(t, ndx.Iterate(string(prefix), func(info2 Info) error { cnt2++ if !strings.HasPrefix(string(info2.BlockID), string(prefix)) { t.Errorf("unexpected item %v when iterating prefix %v", info2.BlockID, prefix) } return nil - }) + })) t.Logf("found %v elements with prefix %q", cnt2, prefix) } } @@ -188,13 +188,14 @@ func fuzzTestIndexOpen(t *testing.T, originalData []byte) { } defer ndx.Close() cnt := 0 - ndx.Iterate("", func(cb Info) error { + assertNoError(t, ndx.Iterate("", func(cb Info) error { if cnt < 10 { - ndx.GetInfo(cb.BlockID) + _, err := ndx.GetInfo(cb.BlockID) + assertNoError(t, err) } cnt++ return nil - }) + })) }) } diff --git a/connect.go b/connect.go index 16859e181..002319ade 100644 --- a/connect.go +++ b/connect.go @@ -73,8 +73,8 @@ func setupCaching(configPath string, lc *LocalConfig, opt block.CachingOptions, } h := sha256.New() - h.Write(uniqueID) - h.Write([]byte(configPath)) + h.Write(uniqueID) //nolint:errcheck + h.Write([]byte(configPath)) //nolint:errcheck lc.Caching.CacheDirectory = filepath.Join(cacheDir, "kopia", hex.EncodeToString(h.Sum(nil))[0:16]) } else { absCacheDir, err := filepath.Abs(opt.CacheDirectory) diff --git a/format_block_test.go b/format_block_test.go index e6b09711d..e1bd302aa 100644 --- a/format_block_test.go +++ b/format_block_test.go @@ -24,19 +24,19 @@ func TestFormatBlockRecovery(t *testing.T) { t.Errorf("unexpected checksummed length: %v, want %v", got, want) } - st.PutBlock(ctx, "some-block-by-itself", checksummed) - st.PutBlock(ctx, "some-block-suffix", append(append([]byte(nil), 1, 2, 3), checksummed...)) - st.PutBlock(ctx, "some-block-prefix", append(append([]byte(nil), checksummed...), 1, 2, 3)) + assertNoError(t, st.PutBlock(ctx, "some-block-by-itself", checksummed)) + assertNoError(t, st.PutBlock(ctx, "some-block-suffix", append(append([]byte(nil), 1, 2, 3), checksummed...))) + assertNoError(t, st.PutBlock(ctx, "some-block-prefix", append(append([]byte(nil), checksummed...), 1, 2, 3))) // mess up checksum checksummed[len(checksummed)-3] ^= 1 - st.PutBlock(ctx, "bad-checksum", checksummed) - st.PutBlock(ctx, "zero-len", []byte{}) - st.PutBlock(ctx, "one-len", []byte{1}) - st.PutBlock(ctx, "two-len", []byte{1, 2}) - st.PutBlock(ctx, "three-len", []byte{1, 2, 3}) - st.PutBlock(ctx, "four-len", []byte{1, 2, 3, 4}) - st.PutBlock(ctx, "five-len", []byte{1, 2, 3, 4, 5}) + assertNoError(t, st.PutBlock(ctx, "bad-checksum", checksummed)) + assertNoError(t, st.PutBlock(ctx, "zero-len", []byte{})) + assertNoError(t, st.PutBlock(ctx, "one-len", []byte{1})) + assertNoError(t, st.PutBlock(ctx, "two-len", []byte{1, 2})) + assertNoError(t, st.PutBlock(ctx, "three-len", []byte{1, 2, 3})) + assertNoError(t, st.PutBlock(ctx, "four-len", []byte{1, 2, 3, 4})) + assertNoError(t, st.PutBlock(ctx, "five-len", []byte{1, 2, 3, 4, 5})) cases := []struct { block string @@ -70,3 +70,10 @@ func TestFormatBlockRecovery(t *testing.T) { }) } } + +func assertNoError(t *testing.T, err error) { + t.Helper() + if err != nil { + t.Errorf("err: %v", err) + } +} diff --git a/internal/retry/retry_test.go b/internal/retry/retry_test.go index ae7af310a..18b2e7e5d 100644 --- a/internal/retry/retry_test.go +++ b/internal/retry/retry_test.go @@ -9,8 +9,7 @@ ) var ( - errRetriable = errors.New("retriable") - errNonRetriable = errors.New("non-retriable") + errRetriable = errors.New("retriable") ) func isRetriable(e error) bool { diff --git a/manifest/manifest_manager_test.go b/manifest/manifest_manager_test.go index 685ec0c85..c261d8795 100644 --- a/manifest/manifest_manager_test.go +++ b/manifest/manifest_manager_test.go @@ -145,7 +145,7 @@ func TestManifestInitCorruptedBlock(t *testing.T) { t.Fatalf("err: %v", err) } - mgr.Put(ctx, map[string]string{"type": "foo"}, map[string]string{"some": "value"}) + mgr.Put(ctx, map[string]string{"type": "foo"}, map[string]string{"some": "value"}) //nolint:errcheck mgr.Flush(ctx) bm.Flush(ctx) diff --git a/object/indirect.go b/object/indirect.go index 2f217e536..7fdadfd4e 100644 --- a/object/indirect.go +++ b/object/indirect.go @@ -1,7 +1,5 @@ package object -var indirectStreamType = "kopia:indirect" - // indirectObjectEntry represents an entry in indirect object stream. type indirectObjectEntry struct { Start int64 `json:"s,omitempty"` diff --git a/object/object_manager_test.go b/object/object_manager_test.go index 1bc65fa86..4fb177811 100644 --- a/object/object_manager_test.go +++ b/object/object_manager_test.go @@ -37,7 +37,7 @@ func (f *fakeBlockManager) GetBlock(ctx context.Context, blockID string) ([]byte func (f *fakeBlockManager) WriteBlock(ctx context.Context, data []byte, prefix string) (string, error) { h := sha256.New() - h.Write(data) + h.Write(data) //nolint:errcheck blockID := prefix + string(hex.EncodeToString(h.Sum(nil))) f.mu.Lock() @@ -96,7 +96,9 @@ func TestWriters(t *testing.T) { writer := om.NewWriter(ctx, WriterOptions{}) - writer.Write(c.data) + if _, err := writer.Write(c.data); err != nil { + t.Errorf("write error: %v", err) + } result, err := writer.Result() if err != nil { @@ -131,8 +133,8 @@ func TestWriterCompleteChunkInTwoWrites(t *testing.T) { bytes := make([]byte, 100) writer := om.NewWriter(ctx, WriterOptions{}) - writer.Write(bytes[0:50]) - writer.Write(bytes[0:50]) + writer.Write(bytes[0:50]) //nolint:errcheck + writer.Write(bytes[0:50]) //nolint:errcheck result, err := writer.Result() if !objectIDsEqual(result, "cd00e292c5970d3c5e2f0ffa5171e555bc46bfc4faddfb4a418b6840b86e79a3") { t.Errorf("unexpected result: %v err: %v", result, err) @@ -176,7 +178,9 @@ func TestIndirection(t *testing.T) { contentBytes := make([]byte, c.dataLength) writer := om.NewWriter(ctx, WriterOptions{}) - writer.Write(contentBytes) + if _, err := writer.Write(contentBytes); err != nil { + t.Errorf("write error: %v", err) + } result, err := writer.Result() if err != nil { t.Errorf("error getting writer results: %v", err) @@ -223,7 +227,7 @@ func TestHMAC(t *testing.T) { _, om := setupTest(t) w := om.NewWriter(ctx, WriterOptions{}) - w.Write(content) + w.Write(content) //nolint:errcheck result, err := w.Result() if result.String() != "cad29ff89951a3c085c86cb7ed22b82b51f7bdfda24f932c7f9601f51d5975ba" { t.Errorf("unexpected result: %v err: %v", result.String(), err) @@ -290,10 +294,12 @@ func TestEndToEndReadAndSeek(t *testing.T) { for _, size := range []int{1, 199, 200, 201, 9999, 512434} { // Create some random data sample of the specified size. randomData := make([]byte, size) - cryptorand.Read(randomData) + cryptorand.Read(randomData) //nolint:errcheck writer := om.NewWriter(ctx, WriterOptions{}) - writer.Write(randomData) + if _, err := writer.Write(randomData); err != nil { + t.Errorf("write error: %v", err) + } objectID, err := writer.Result() writer.Close() if err != nil { diff --git a/object/object_writer.go b/object/object_writer.go index a0e7e8f5b..cf79754f8 100644 --- a/object/object_writer.go +++ b/object/object_writer.go @@ -56,8 +56,7 @@ type objectWriter struct { description string - splitter objectSplitter - pendingBlocksWG sync.WaitGroup + splitter objectSplitter } func (w *objectWriter) Close() error { diff --git a/object/objectid_test.go b/object/objectid_test.go index 1aa29484b..022bebf4a 100644 --- a/object/objectid_test.go +++ b/object/objectid_test.go @@ -4,8 +4,6 @@ "testing" ) -type rawObjectID ID - func TestParseObjectID(t *testing.T) { cases := []struct { text string diff --git a/repository_test.go b/repository_test.go index 33e957c40..822ffe6e0 100644 --- a/repository_test.go +++ b/repository_test.go @@ -37,7 +37,9 @@ func TestWriters(t *testing.T) { defer env.Setup(t).Close(t) writer := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{}) - writer.Write(c.data) + if _, err := writer.Write(c.data); err != nil { + t.Fatalf("write error: %v", err) + } result, err := writer.Result() if err != nil { @@ -64,8 +66,8 @@ func TestWriterCompleteChunkInTwoWrites(t *testing.T) { bytes := make([]byte, 100) writer := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{}) - writer.Write(bytes[0:50]) - writer.Write(bytes[0:50]) + writer.Write(bytes[0:50]) //nolint:errcheck + writer.Write(bytes[0:50]) //nolint:errcheck result, err := writer.Result() if result != "1d804f1f69df08f3f59070bf962de69433e3d61ac18522a805a84d8c92741340" { t.Errorf("unexpected result: %v err: %v", result, err) @@ -149,7 +151,7 @@ func TestHMAC(t *testing.T) { content := bytes.Repeat([]byte{0xcd}, 50) w := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{}) - w.Write(content) + w.Write(content) //nolint:errcheck result, err := w.Result() if result.String() != "367352007ee6ca9fa755ce8352347d092c17a24077fd33c62f655574a8cf906d" { t.Errorf("unexpected result: %v err: %v", result.String(), err) @@ -193,10 +195,10 @@ func TestEndToEndReadAndSeek(t *testing.T) { for _, size := range []int{1, 199, 200, 201, 9999, 512434} { // Create some random data sample of the specified size. randomData := make([]byte, size) - cryptorand.Read(randomData) + cryptorand.Read(randomData) //nolint:errcheck writer := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{}) - writer.Write(randomData) + writer.Write(randomData) //nolint:errcheck objectID, err := writer.Result() writer.Close() if err != nil { @@ -300,7 +302,7 @@ func TestFormats(t *testing.T) { for k, v := range c.oids { bytesToWrite := []byte(k) w := env.Repository.Objects.NewWriter(ctx, object.WriterOptions{}) - w.Write(bytesToWrite) + w.Write(bytesToWrite) //nolint:errcheck oid, err := w.Result() if err != nil { t.Errorf("error: %v", err) diff --git a/storage/filesystem/filesystem_storage_test.go b/storage/filesystem/filesystem_storage_test.go index e0a9e7765..1812f51ef 100644 --- a/storage/filesystem/filesystem_storage_test.go +++ b/storage/filesystem/filesystem_storage_test.go @@ -68,26 +68,26 @@ func TestFileStorageTouch(t *testing.T) { } fs := r.(*fsStorage) - fs.PutBlock(ctx, t1, []byte{1}) + assertNoError(t, fs.PutBlock(ctx, t1, []byte{1})) time.Sleep(1 * time.Second) // sleep a bit to accommodate Apple filesystems with low timestamp resolution - fs.PutBlock(ctx, t2, []byte{1}) + assertNoError(t, fs.PutBlock(ctx, t2, []byte{1})) time.Sleep(1 * time.Second) - fs.PutBlock(ctx, t3, []byte{1}) + assertNoError(t, fs.PutBlock(ctx, t3, []byte{1})) verifyBlockTimestampOrder(t, fs, t1, t2, t3) - fs.TouchBlock(ctx, t2, 1*time.Hour) // has no effect, all timestamps are very new + assertNoError(t, fs.TouchBlock(ctx, t2, 1*time.Hour)) // has no effect, all timestamps are very new verifyBlockTimestampOrder(t, fs, t1, t2, t3) - fs.TouchBlock(ctx, t1, 0) // moves t1 to the top of the pile + assertNoError(t, fs.TouchBlock(ctx, t1, 0)) // moves t1 to the top of the pile verifyBlockTimestampOrder(t, fs, t2, t3, t1) time.Sleep(1 * time.Second) - fs.TouchBlock(ctx, t2, 0) // moves t2 to the top of the pile + assertNoError(t, fs.TouchBlock(ctx, t2, 0)) // moves t2 to the top of the pile verifyBlockTimestampOrder(t, fs, t3, t1, t2) time.Sleep(1 * time.Second) - fs.TouchBlock(ctx, t1, 0) // moves t1 to the top of the pile + assertNoError(t, fs.TouchBlock(ctx, t1, 0)) // moves t1 to the top of the pile verifyBlockTimestampOrder(t, fs, t3, t2, t1) } @@ -111,3 +111,10 @@ func verifyBlockTimestampOrder(t *testing.T, st storage.Storage, want ...string) t.Errorf("incorrect block order: %v, wanted %v", blocks, want) } } + +func assertNoError(t *testing.T, err error) { + t.Helper() + if err != nil { + t.Errorf("err: %v", err) + } +} diff --git a/storage/s3/s3_storage_test.go b/storage/s3/s3_storage_test.go index 8658c780d..1c2c794ef 100644 --- a/storage/s3/s3_storage_test.go +++ b/storage/s3/s3_storage_test.go @@ -61,7 +61,7 @@ func TestS3Storage(t *testing.T) { cleanupOldData(ctx, t) data := make([]byte, 8) - rand.Read(data) + rand.Read(data) //nolint:errcheck st, err := New(context.Background(), &Options{ AccessKeyID: accessKeyID, @@ -86,7 +86,8 @@ func createBucket(t *testing.T) { if err != nil { t.Fatalf("can't initialize minio client: %v", err) } - minioClient.MakeBucket(bucketName, "us-east-1") + // ignore error + _ = minioClient.MakeBucket(bucketName, "us-east-1") } func cleanupOldData(ctx context.Context, t *testing.T) { @@ -101,7 +102,7 @@ func cleanupOldData(ctx context.Context, t *testing.T) { t.Fatalf("err: %v", err) } - st.ListBlocks(ctx, "", func(it storage.BlockMetadata) error { + _ = st.ListBlocks(ctx, "", func(it storage.BlockMetadata) error { age := time.Since(it.Timestamp) if age > cleanupAge { if err := st.DeleteBlock(ctx, it.BlockID); err != nil { diff --git a/storage/storage_test.go b/storage/storage_test.go index a94639694..8f41e4796 100644 --- a/storage/storage_test.go +++ b/storage/storage_test.go @@ -13,9 +13,9 @@ func TestListAllBlocksConsistent(t *testing.T) { ctx := context.Background() data := map[string][]byte{} st := storagetesting.NewMapStorage(data, nil, time.Now) - st.PutBlock(ctx, "foo1", []byte{1, 2, 3}) - st.PutBlock(ctx, "foo2", []byte{1, 2, 3}) - st.PutBlock(ctx, "foo3", []byte{1, 2, 3}) + st.PutBlock(ctx, "foo1", []byte{1, 2, 3}) //nolint:errcheck + st.PutBlock(ctx, "foo2", []byte{1, 2, 3}) //nolint:errcheck + st.PutBlock(ctx, "foo3", []byte{1, 2, 3}) //nolint:errcheck // set up faulty storage that will add a block while a scan is in progress. f := &storagetesting.FaultyStorage{ @@ -23,7 +23,7 @@ func TestListAllBlocksConsistent(t *testing.T) { Faults: map[string][]*storagetesting.Fault{ "ListBlocksItem": { {ErrCallback: func() error { - st.PutBlock(ctx, "foo0", []byte{1, 2, 3}) + st.PutBlock(ctx, "foo0", []byte{1, 2, 3}) //nolint:errcheck return nil }}, }, diff --git a/storage/webdav/webdav_storage_test.go b/storage/webdav/webdav_storage_test.go index 1315d8bfa..ddfb24a22 100644 --- a/storage/webdav/webdav_storage_test.go +++ b/storage/webdav/webdav_storage_test.go @@ -44,7 +44,7 @@ func TestWebDAVStorage(t *testing.T) { if err := os.RemoveAll(tmpDir); err != nil { t.Errorf("can't remove all: %q", tmpDir) } - os.MkdirAll(tmpDir, 0700) + os.MkdirAll(tmpDir, 0700) //nolint:errcheck r, err := New(context.Background(), &Options{ URL: server.URL, diff --git a/tests/repository_stress_test/repository_stress_test.go b/tests/repository_stress_test/repository_stress_test.go index 91c73d88d..785686ae8 100644 --- a/tests/repository_stress_test/repository_stress_test.go +++ b/tests/repository_stress_test/repository_stress_test.go @@ -22,10 +22,6 @@ const masterPassword = "foo-bar-baz-1234" -type testContext struct { - r *repo.Repository -} - var ( knownBlocks []string knownBlocksMutex sync.Mutex @@ -54,7 +50,7 @@ func TestStressRepository(t *testing.T) { configFile1 := filepath.Join(tmpPath, "kopia1.config") configFile2 := filepath.Join(tmpPath, "kopia2.config") - os.MkdirAll(storagePath, 0700) + assertNoError(t, os.MkdirAll(storagePath, 0700)) st, err := filesystem.New(ctx, &filesystem.Options{ Path: storagePath, }) @@ -314,3 +310,10 @@ func writeRandomManifest(ctx context.Context, t *testing.T, r *repo.Repository) }) return err } + +func assertNoError(t *testing.T, err error) { + t.Helper() + if err != nil { + t.Errorf("err: %v", err) + } +} From 731d6abebc163b6ad2afaef3b77e717f8c7b1de2 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 1 Apr 2019 21:53:42 -0700 Subject: [PATCH 69/74] fixed build broken by overly aggressive linter fixes --- .travis.yml | 1 - Makefile | 3 --- block/block_manager_test.go | 4 +++- block/packindex_test.go | 7 +++---- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 34abfcbcb..5a567f575 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,5 +6,4 @@ os: before_install: - openssl aes-256-cbc -K $encrypted_0098ef8519ef_key -iv $encrypted_0098ef8519ef_iv -in test_service_account.json.enc -out storage/gcs/test_service_account.json -d -- make travis-setup script: make travis diff --git a/Makefile b/Makefile index d06fd3717..9dfde8361 100644 --- a/Makefile +++ b/Makefile @@ -10,9 +10,6 @@ setup: GO111MODULE=off go get -u gopkg.in/alecthomas/gometalinter.v2 GO111MODULE=off gometalinter.v2 --install -travis-setup: - GO111MODULE=off go get github.com/mattn/goveralls - lint: $(LINTER_TOOL) $(LINTER_TOOL) run diff --git a/block/block_manager_test.go b/block/block_manager_test.go index ecf71de6d..354786528 100644 --- a/block/block_manager_test.go +++ b/block/block_manager_test.go @@ -502,7 +502,9 @@ func TestRewriteDeleted(t *testing.T) { applyStep(action1) assertNoError(t, bm.DeleteBlock(block1)) applyStep(action2) - assertNoError(t, bm.RewriteBlock(ctx, block1)) + if got, want := bm.RewriteBlock(ctx, block1), storage.ErrBlockNotFound; got != want && got != nil { + t.Errorf("unexpected error %v, wanted %v", got, want) + } applyStep(action3) verifyBlockNotFound(ctx, t, bm, block1) dumpBlockManagerData(t, data) diff --git a/block/packindex_test.go b/block/packindex_test.go index 3a4d5c658..c771ec5a7 100644 --- a/block/packindex_test.go +++ b/block/packindex_test.go @@ -188,14 +188,13 @@ func fuzzTestIndexOpen(t *testing.T, originalData []byte) { } defer ndx.Close() cnt := 0 - assertNoError(t, ndx.Iterate("", func(cb Info) error { + _ = ndx.Iterate("", func(cb Info) error { if cnt < 10 { - _, err := ndx.GetInfo(cb.BlockID) - assertNoError(t, err) + _, _ = ndx.GetInfo(cb.BlockID) } cnt++ return nil - })) + }) }) } From 38102efc73bb211e0da0037b5153334d39b6aae4 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Mon, 1 Apr 2019 22:00:23 -0700 Subject: [PATCH 70/74] Makefile: missing dependency --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9dfde8361..3c0c3df91 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,7 @@ test: GO111MODULE=on go test -tags test -count=1 -coverprofile=raw.cov --coverpkg ./... -timeout 90s ./... grep -v testing/ raw.cov > tmp.cov -upload-coverage: +upload-coverage: $(GOVERALLS_TOOL) $(GOVERALLS_TOOL) -service=travis-ci -coverprofile=tmp.cov coverage-html: From 0ca85ee16928149ce9cd958906e0d0f77c3bc330 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Wed, 3 Apr 2019 18:11:09 -0700 Subject: [PATCH 71/74] travis: switched GO111MODULE=on globally --- .travis.yml | 2 ++ Makefile | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5a567f575..5f2a371c2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,3 +7,5 @@ before_install: - openssl aes-256-cbc -K $encrypted_0098ef8519ef_key -iv $encrypted_0098ef8519ef_iv -in test_service_account.json.enc -out storage/gcs/test_service_account.json -d script: make travis +env: + - GO111MODULE=on diff --git a/Makefile b/Makefile index 3c0c3df91..29e39000d 100644 --- a/Makefile +++ b/Makefile @@ -22,24 +22,24 @@ $(GOVERALLS_TOOL): GO111MODULE=off GOPATH=$(CURDIR)/.tools go get github.com/mattn/goveralls build-linux-amd64: - CGO_ENABLED=0 GO111MODULE=on GOARCH=amd64 GOOS=linux go build ./... + CGO_ENABLED=0 GOARCH=amd64 GOOS=linux go build ./... build-windows-amd64: - CGO_ENABLED=0 GO111MODULE=on GOARCH=amd64 GOOS=windows go build ./... + CGO_ENABLED=0 GOARCH=amd64 GOOS=windows go build ./... build-darwin-amd64: - CGO_ENABLED=0 GO111MODULE=on GOARCH=amd64 GOOS=darwin go build ./... + CGO_ENABLED=0 GOARCH=amd64 GOOS=darwin go build ./... build-linux-arm: - CGO_ENABLED=0 GO111MODULE=on GOARCH=arm GOOS=linux go build ./... + CGO_ENABLED=0 GOARCH=arm GOOS=linux go build ./... build-linux-arm64: - CGO_ENABLED=0 GO111MODULE=on GOARCH=arm64 GOOS=linux go build ./... + CGO_ENABLED=0 GOARCH=arm64 GOOS=linux go build ./... build-all: build-linux-amd64 build-windows-amd64 build-darwin-amd64 build-linux-arm build-linux-arm64 test: - GO111MODULE=on go test -tags test -count=1 -coverprofile=raw.cov --coverpkg ./... -timeout 90s ./... + go test -tags test -count=1 -coverprofile=raw.cov --coverpkg ./... -timeout 90s ./... grep -v testing/ raw.cov > tmp.cov upload-coverage: $(GOVERALLS_TOOL) From beede7c71fb0bdd1d18050936f7b700f47de451c Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Wed, 3 Apr 2019 18:15:18 -0700 Subject: [PATCH 72/74] travis: pre-download modules as part of travis setup --- .travis.yml | 1 + Makefile | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.travis.yml b/.travis.yml index 5f2a371c2..f3fec3d3f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,7 @@ os: before_install: - openssl aes-256-cbc -K $encrypted_0098ef8519ef_key -iv $encrypted_0098ef8519ef_iv -in test_service_account.json.enc -out storage/gcs/test_service_account.json -d +install: make travis-setup script: make travis env: - GO111MODULE=on diff --git a/Makefile b/Makefile index 29e39000d..f64561b61 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,9 @@ GOVERALLS_TOOL=.tools/bin/goveralls all: test lint +travis-setup: + go mod download + travis: build-all test upload-coverage setup: From e2554df46d420491b21929a7b08192b1e68d1b18 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Sat, 11 May 2019 09:15:14 -0700 Subject: [PATCH 73/74] travis: ignore decryption errors for KOPIA_GCS_CREDENTIALS_FILE and skip tests, decryption does not work for pull requests --- .travis.yml | 2 +- storage/gcs/gcs_storage_test.go | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index f3fec3d3f..f8303f07e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,7 @@ os: - linux before_install: - openssl aes-256-cbc -K $encrypted_0098ef8519ef_key -iv $encrypted_0098ef8519ef_iv - -in test_service_account.json.enc -out storage/gcs/test_service_account.json -d + -in test_service_account.json.enc -out storage/gcs/test_service_account.json -d || echo OK install: make travis-setup script: make travis env: diff --git a/storage/gcs/gcs_storage_test.go b/storage/gcs/gcs_storage_test.go index 5805bbb1c..b76c40673 100644 --- a/storage/gcs/gcs_storage_test.go +++ b/storage/gcs/gcs_storage_test.go @@ -17,10 +17,15 @@ func TestGCSStorage(t *testing.T) { t.Skip("KOPIA_GCS_TEST_BUCKET not provided") } + credsFile := os.Getenv("KOPIA_GCS_CREDENTIALS_FILE") + if _, err := os.Stat(credsFile); err != nil { + t.Skip("skipping test because GCS credentials file can't be opened") + } + ctx := context.Background() st, err := gcs.New(ctx, &gcs.Options{ BucketName: bucket, - ServiceAccountCredentials: os.Getenv("KOPIA_GCS_CREDENTIALS_FILE"), + ServiceAccountCredentials: credsFile, }) if err != nil { From bac8adcaecf64fd816a7100c6a94b29e2be5d940 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Fri, 10 May 2019 19:24:45 -0700 Subject: [PATCH 74/74] switched wrapping errors from fmt.Errorf() to errors.Wrap() --- block/block_manager_compaction.go | 8 +++++--- block/index.go | 14 ++++++++------ block/merged_test.go | 5 +++-- connect.go | 12 ++++++------ format_block.go | 22 +++++++++++----------- go.mod | 1 + go.sum | 2 ++ initialize.go | 7 ++++--- manifest/manifest_manager.go | 14 +++++++------- manifest/manifest_manager_test.go | 4 ++-- object/object_manager.go | 5 +++-- object/object_writer.go | 4 +++- open.go | 23 ++++++++++++----------- repository.go | 12 ++++++------ upgrade.go | 4 +++- 15 files changed, 76 insertions(+), 61 deletions(-) diff --git a/block/block_manager_compaction.go b/block/block_manager_compaction.go index 01638b708..33e6c41a5 100644 --- a/block/block_manager_compaction.go +++ b/block/block_manager_compaction.go @@ -5,6 +5,8 @@ "context" "fmt" "time" + + "github.com/pkg/errors" ) var autoCompactionOptions = CompactOptions{ @@ -29,7 +31,7 @@ func (bm *Manager) CompactIndexes(ctx context.Context, opt CompactOptions) error indexBlocks, _, err := bm.loadPackIndexesUnlocked(ctx) if err != nil { - return fmt.Errorf("error loading indexes: %v", err) + return errors.Wrap(err, "error loading indexes") } blocksToCompact := bm.getBlocksToCompact(indexBlocks, opt) @@ -98,12 +100,12 @@ func (bm *Manager) compactAndDeleteIndexBlocks(ctx context.Context, indexBlocks var buf bytes.Buffer if err := bld.Build(&buf); err != nil { - return fmt.Errorf("unable to build an index: %v", err) + return errors.Wrap(err, "unable to build an index") } compactedIndexBlock, err := bm.writePackIndexesNew(ctx, buf.Bytes()) if err != nil { - return fmt.Errorf("unable to write compacted indexes: %v", err) + return errors.Wrap(err, "unable to write compacted indexes") } formatLog.Debugf("wrote compacted index (%v bytes) in %v", compactedIndexBlock, time.Since(t0)) diff --git a/block/index.go b/block/index.go index a4f4a4c1d..89d49e61a 100644 --- a/block/index.go +++ b/block/index.go @@ -7,6 +7,8 @@ "io" "sort" "strings" + + "github.com/pkg/errors" ) // packIndex is a read-only index of packed blocks. @@ -32,7 +34,7 @@ func readHeader(readerAt io.ReaderAt) (headerInfo, error) { var header [8]byte if n, err := readerAt.ReadAt(header[:], 0); err != nil || n != 8 { - return headerInfo{}, fmt.Errorf("invalid header: %v", err) + return headerInfo{}, errors.Wrap(err, "invalid header") } if header[0] != 1 { @@ -58,14 +60,14 @@ func readHeader(readerAt io.ReaderAt) (headerInfo, error) { func (b *index) Iterate(prefix string, cb func(Info) error) error { startPos, err := b.findEntryPosition(prefix) if err != nil { - return fmt.Errorf("could not find starting position: %v", err) + return errors.Wrap(err, "could not find starting position") } stride := b.hdr.keySize + b.hdr.valueSize entry := make([]byte, stride) for i := startPos; i < b.hdr.entryCount; i++ { n, err := b.readerAt.ReadAt(entry, int64(8+stride*i)) if err != nil || n != len(entry) { - return fmt.Errorf("unable to read from index: %v", err) + return errors.Wrap(err, "unable to read from index") } key := entry[0:b.hdr.keySize] @@ -73,7 +75,7 @@ func (b *index) Iterate(prefix string, cb func(Info) error) error { i, err := b.entryToInfo(bytesToContentID(key), value) if err != nil { - return fmt.Errorf("invalid index data: %v", err) + return errors.Wrap(err, "invalid index data") } if !strings.HasPrefix(i.BlockID, prefix) { break @@ -163,7 +165,7 @@ func (b *index) entryToInfo(blockID string, entryData []byte) (Info, error) { packFile := make([]byte, e.PackFileLength()) n, err := b.readerAt.ReadAt(packFile, int64(e.PackFileOffset())) if err != nil || n != int(e.PackFileLength()) { - return Info{}, fmt.Errorf("can't read pack block ID: %v", err) + return Info{}, errors.Wrap(err, "can't read pack block ID") } return Info{ @@ -190,7 +192,7 @@ func (b *index) Close() error { func openPackIndex(readerAt io.ReaderAt) (packIndex, error) { h, err := readHeader(readerAt) if err != nil { - return nil, fmt.Errorf("invalid header: %v", err) + return nil, errors.Wrap(err, "invalid header") } return &index{hdr: h, readerAt: readerAt}, nil } diff --git a/block/merged_test.go b/block/merged_test.go index 58da5a2c0..cea9c0ac4 100644 --- a/block/merged_test.go +++ b/block/merged_test.go @@ -2,9 +2,10 @@ import ( "bytes" - "fmt" "reflect" "testing" + + "github.com/pkg/errors" ) func TestMerged(t *testing.T) { @@ -86,7 +87,7 @@ func indexWithItems(items ...Info) (packIndex, error) { } var buf bytes.Buffer if err := b.Build(&buf); err != nil { - return nil, fmt.Errorf("build error: %v", err) + return nil, errors.Wrap(err, "build error") } return openPackIndex(bytes.NewReader(buf.Bytes())) } diff --git a/connect.go b/connect.go index 002319ade..d6675cf29 100644 --- a/connect.go +++ b/connect.go @@ -5,13 +5,13 @@ "crypto/sha256" "encoding/hex" "encoding/json" - "fmt" "io/ioutil" "os" "path/filepath" "github.com/kopia/repo/block" "github.com/kopia/repo/storage" + "github.com/pkg/errors" ) // ConnectOptions specifies options when persisting configuration to connect to a repository. @@ -23,7 +23,7 @@ type ConnectOptions struct { func Connect(ctx context.Context, configFile string, st storage.Storage, password string, opt ConnectOptions) error { formatBytes, err := st.GetBlock(ctx, FormatBlockID, 0, -1) if err != nil { - return fmt.Errorf("unable to read format block: %v", err) + return errors.Wrap(err, "unable to read format block") } f, err := parseFormatBlock(formatBytes) @@ -35,7 +35,7 @@ func Connect(ctx context.Context, configFile string, st storage.Storage, passwor lc.Storage = st.ConnectionInfo() if err = setupCaching(configFile, &lc, opt.CachingOptions, f.UniqueID); err != nil { - return fmt.Errorf("unable to set up caching: %v", err) + return errors.Wrap(err, "unable to set up caching") } d, err := json.MarshalIndent(&lc, "", " ") @@ -44,11 +44,11 @@ func Connect(ctx context.Context, configFile string, st storage.Storage, passwor } if err = os.MkdirAll(filepath.Dir(configFile), 0700); err != nil { - return fmt.Errorf("unable to create config directory: %v", err) + return errors.Wrap(err, "unable to create config directory") } if err = ioutil.WriteFile(configFile, d, 0600); err != nil { - return fmt.Errorf("unable to write config file: %v", err) + return errors.Wrap(err, "unable to write config file") } // now verify that the repository can be opened with the provided config file. @@ -69,7 +69,7 @@ func setupCaching(configPath string, lc *LocalConfig, opt block.CachingOptions, if opt.CacheDirectory == "" { cacheDir, err := os.UserCacheDir() if err != nil { - return fmt.Errorf("unable to determine cache directory: %v", err) + return errors.Wrap(err, "unable to determine cache directory") } h := sha256.New() diff --git a/format_block.go b/format_block.go index 7900df737..fffda5070 100644 --- a/format_block.go +++ b/format_block.go @@ -9,11 +9,11 @@ "crypto/rand" "crypto/sha256" "encoding/json" - "errors" "fmt" "io" "github.com/kopia/repo/storage" + "github.com/pkg/errors" ) const defaultFormatEncryption = "AES256_GCM" @@ -61,7 +61,7 @@ func parseFormatBlock(b []byte) (*formatBlock, error) { f := &formatBlock{} if err := json.Unmarshal(b, &f); err != nil { - return nil, fmt.Errorf("invalid format block: %v", err) + return nil, errors.Wrap(err, "invalid format block") } return f, nil @@ -84,7 +84,7 @@ func RecoverFormatBlock(ctx context.Context, st storage.Storage, filename string foundMetadata = bm return nil }); err != nil { - return nil, fmt.Errorf("error: %v", err) + return nil, errors.Wrap(err, "error") } if foundMetadata.BlockID == "" { @@ -149,11 +149,11 @@ func writeFormatBlock(ctx context.Context, st storage.Storage, f *formatBlock) e e := json.NewEncoder(&buf) e.SetIndent("", " ") if err := e.Encode(f); err != nil { - return fmt.Errorf("unable to marshal format block: %v", err) + return errors.Wrap(err, "unable to marshal format block") } if err := st.PutBlock(ctx, FormatBlockID, buf.Bytes()); err != nil { - return fmt.Errorf("unable to write format block: %v", err) + return errors.Wrap(err, "unable to write format block") } return nil @@ -167,7 +167,7 @@ func (f *formatBlock) decryptFormatBytes(masterKey []byte) (*repositoryObjectFor case "AES256_GCM": aead, authData, err := initCrypto(masterKey, f.UniqueID) if err != nil { - return nil, fmt.Errorf("cannot initialize cipher: %v", err) + return nil, errors.Wrap(err, "cannot initialize cipher") } content := append([]byte(nil), f.EncryptedFormatBytes...) @@ -184,7 +184,7 @@ func (f *formatBlock) decryptFormatBytes(masterKey []byte) (*repositoryObjectFor var erc encryptedRepositoryConfig if err := json.Unmarshal(plainText, &erc); err != nil { - return nil, fmt.Errorf("invalid repository format: %v", err) + return nil, errors.Wrap(err, "invalid repository format") } return &erc.Format, nil @@ -200,11 +200,11 @@ func initCrypto(masterKey, repositoryID []byte) (cipher.AEAD, []byte, error) { blk, err := aes.NewCipher(aesKey) if err != nil { - return nil, nil, fmt.Errorf("cannot create cipher: %v", err) + return nil, nil, errors.Wrap(err, "cannot create cipher") } aead, err := cipher.NewGCM(blk) if err != nil { - return nil, nil, fmt.Errorf("cannot create cipher: %v", err) + return nil, nil, errors.Wrap(err, "cannot create cipher") } return aead, authData, nil @@ -219,11 +219,11 @@ func encryptFormatBytes(f *formatBlock, format *repositoryObjectFormat, masterKe case "AES256_GCM": content, err := json.Marshal(&encryptedRepositoryConfig{Format: *format}) if err != nil { - return fmt.Errorf("can't marshal format to JSON: %v", err) + return errors.Wrap(err, "can't marshal format to JSON") } aead, authData, err := initCrypto(masterKey, repositoryID) if err != nil { - return fmt.Errorf("unable to initialize crypto: %v", err) + return errors.Wrap(err, "unable to initialize crypto") } nonceLength := aead.NonceSize() noncePlusContentLength := nonceLength + len(content) diff --git a/go.mod b/go.mod index c415ab412..7c1e82c5b 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/minio/minio-go v6.0.11+incompatible github.com/mitchellh/go-homedir v1.0.0 // indirect github.com/op/go-logging v0.0.0-20160315200505-970db520ece7 + github.com/pkg/errors v0.8.1 github.com/silvasur/buzhash v0.0.0-20160816060738-9bdec3dec7c6 github.com/studio-b12/gowebdav v0.0.0-20181230112802-6c32839dbdfc go.opencensus.io v0.18.0 // indirect diff --git a/go.sum b/go.sum index b10eecb60..0829aa8da 100644 --- a/go.sum +++ b/go.sum @@ -27,6 +27,8 @@ github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrk github.com/op/go-logging v0.0.0-20160315200505-970db520ece7 h1:lDH9UUVJtmYCjyT0CI4q8xvlXPxeZ0gYCVvWbmPlp88= github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= +github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= diff --git a/initialize.go b/initialize.go index 281b33e28..a654168c9 100644 --- a/initialize.go +++ b/initialize.go @@ -9,6 +9,7 @@ "github.com/kopia/repo/block" "github.com/kopia/repo/object" "github.com/kopia/repo/storage" + "github.com/pkg/errors" ) // BuildInfo is the build information of Kopia. @@ -44,15 +45,15 @@ func Initialize(ctx context.Context, st storage.Storage, opt *NewRepositoryOptio format := formatBlockFromOptions(opt) masterKey, err := format.deriveMasterKeyFromPassword(password) if err != nil { - return fmt.Errorf("unable to derive master key: %v", err) + return errors.Wrap(err, "unable to derive master key") } if err := encryptFormatBytes(format, repositoryObjectFormatFromOptions(opt), masterKey, format.UniqueID); err != nil { - return fmt.Errorf("unable to encrypt format bytes: %v", err) + return errors.Wrap(err, "unable to encrypt format bytes") } if err := writeFormatBlock(ctx, st, format); err != nil { - return fmt.Errorf("unable to write format block: %v", err) + return errors.Wrap(err, "unable to write format block") } return nil diff --git a/manifest/manifest_manager.go b/manifest/manifest_manager.go index 10634f621..24454c3e1 100644 --- a/manifest/manifest_manager.go +++ b/manifest/manifest_manager.go @@ -8,7 +8,6 @@ "crypto/rand" "encoding/hex" "encoding/json" - "errors" "fmt" "sort" "sync" @@ -16,6 +15,7 @@ "github.com/kopia/repo/internal/repologging" "github.com/kopia/repo/storage" + "github.com/pkg/errors" ) var log = repologging.Logger("kopia/manifest") @@ -62,12 +62,12 @@ func (m *Manager) Put(ctx context.Context, labels map[string]string, payload int random := make([]byte, 16) if _, err := rand.Read(random); err != nil { - return "", fmt.Errorf("can't initialize randomness: %v", err) + return "", errors.Wrap(err, "can't initialize randomness") } b, err := json.Marshal(payload) if err != nil { - return "", fmt.Errorf("marshal error: %v", err) + return "", errors.Wrap(err, "marshal error") } e := &manifestEntry{ @@ -277,7 +277,7 @@ func (m *Manager) loadCommittedBlocksLocked(ctx context.Context) error { for { blocks, err := m.b.ListBlocks(manifestBlockPrefix) if err != nil { - return fmt.Errorf("unable to list manifest blocks: %v", err) + return errors.Wrap(err, "unable to list manifest blocks") } m.committedEntries = map[string]*manifestEntry{} @@ -293,7 +293,7 @@ func (m *Manager) loadCommittedBlocksLocked(ctx context.Context) error { // try again, lost a race with another manifest manager which just did compaction continue } - return fmt.Errorf("unable to load manifest blocks: %v", err) + return errors.Wrap(err, "unable to load manifest blocks") } if err := m.maybeCompactLocked(ctx); err != nil { @@ -419,11 +419,11 @@ func (m *Manager) maybeCompactLocked(ctx context.Context) error { log.Debugf("performing automatic compaction of %v blocks", len(m.committedBlockIDs)) if err := m.compactLocked(ctx); err != nil { - return fmt.Errorf("unable to compact manifest blocks: %v", err) + return errors.Wrap(err, "unable to compact manifest blocks") } if err := m.b.Flush(ctx); err != nil { - return fmt.Errorf("unable to flush blocks after auto-compaction: %v", err) + return errors.Wrap(err, "unable to flush blocks after auto-compaction") } return nil diff --git a/manifest/manifest_manager_test.go b/manifest/manifest_manager_test.go index c261d8795..d2a74ac18 100644 --- a/manifest/manifest_manager_test.go +++ b/manifest/manifest_manager_test.go @@ -2,7 +2,6 @@ import ( "context" - "fmt" "reflect" "sort" "strings" @@ -11,6 +10,7 @@ "github.com/kopia/repo/block" "github.com/kopia/repo/internal/storagetesting" + "github.com/pkg/errors" ) func TestManifest(t *testing.T) { @@ -272,7 +272,7 @@ func newManagerForTesting(ctx context.Context, t *testing.T, data map[string][]b MaxPackSize: 100000, }, block.CachingOptions{}, nil) if err != nil { - return nil, fmt.Errorf("can't create block manager: %v", err) + return nil, errors.Wrap(err, "can't create block manager") } return NewManager(ctx, bm) diff --git a/object/object_manager.go b/object/object_manager.go index c205bd076..e4621ce40 100644 --- a/object/object_manager.go +++ b/object/object_manager.go @@ -9,6 +9,7 @@ "io" "github.com/kopia/repo/block" + "github.com/pkg/errors" ) // Reader allows reading, seeking, getting the length of and closing of a repository object. @@ -98,7 +99,7 @@ func (om *Manager) VerifyObject(ctx context.Context, oid ID) (int64, []string, e func (om *Manager) verifyIndirectObjectInternal(ctx context.Context, indexObjectID ID, blocks *blockTracker) (int64, error) { if _, err := om.verifyObjectInternal(ctx, indexObjectID, blocks); err != nil { - return 0, fmt.Errorf("unable to read index: %v", err) + return 0, errors.Wrap(err, "unable to read index") } rd, err := om.Open(ctx, indexObjectID) if err != nil { @@ -204,7 +205,7 @@ func (om *Manager) flattenListChunk(rawReader io.Reader) ([]indirectObjectEntry, var ind indirectObject if err := json.NewDecoder(rawReader).Decode(&ind); err != nil { - return nil, fmt.Errorf("invalid indirect object: %v", err) + return nil, errors.Wrap(err, "invalid indirect object") } return ind.Entries, nil diff --git a/object/object_writer.go b/object/object_writer.go index cf79754f8..ef0b35e04 100644 --- a/object/object_writer.go +++ b/object/object_writer.go @@ -7,6 +7,8 @@ "fmt" "io" "sync" + + "github.com/pkg/errors" ) // Writer allows writing content to the storage and supports automatic deduplication and encryption @@ -127,7 +129,7 @@ func (w *objectWriter) Result() (ID, error) { } if err := json.NewEncoder(iw).Encode(ind); err != nil { - return "", fmt.Errorf("unable to write indirect block index: %v", err) + return "", errors.Wrap(err, "unable to write indirect block index") } oid, err := iw.Result() if err != nil { diff --git a/open.go b/open.go index ce9080ecf..7c2734695 100644 --- a/open.go +++ b/open.go @@ -13,6 +13,7 @@ "github.com/kopia/repo/object" "github.com/kopia/repo/storage" "github.com/kopia/repo/storage/logging" + "github.com/pkg/errors" ) var ( @@ -55,7 +56,7 @@ func Open(ctx context.Context, configFile string, password string, options *Opti st, err := storage.NewStorage(ctx, lc.Storage) if err != nil { - return nil, fmt.Errorf("cannot open storage: %v", err) + return nil, errors.Wrap(err, "cannot open storage") } if options.TraceStorage != nil { @@ -79,12 +80,12 @@ func OpenWithConfig(ctx context.Context, st storage.Storage, lc *LocalConfig, pa // Read cache block, potentially from cache. fb, err := readAndCacheFormatBlockBytes(ctx, st, caching.CacheDirectory) if err != nil { - return nil, fmt.Errorf("unable to read format block: %v", err) + return nil, errors.Wrap(err, "unable to read format block") } f, err := parseFormatBlock(fb) if err != nil { - return nil, fmt.Errorf("can't parse format block: %v", err) + return nil, errors.Wrap(err, "can't parse format block") } fb, err = addFormatBlockChecksumAndLength(fb) @@ -99,7 +100,7 @@ func OpenWithConfig(ctx context.Context, st storage.Storage, lc *LocalConfig, pa repoConfig, err := f.decryptFormatBytes(masterKey) if err != nil { - return nil, fmt.Errorf("unable to decrypt repository config: %v", err) + return nil, errors.Wrap(err, "unable to decrypt repository config") } caching.HMACSecret = deriveKeyFromMasterKey(masterKey, f.UniqueID, []byte("local-cache-integrity"), 16) @@ -112,19 +113,19 @@ func OpenWithConfig(ctx context.Context, st storage.Storage, lc *LocalConfig, pa log.Debugf("initializing block manager") bm, err := block.NewManager(ctx, st, fo, caching, fb) if err != nil { - return nil, fmt.Errorf("unable to open block manager: %v", err) + return nil, errors.Wrap(err, "unable to open block manager") } log.Debugf("initializing object manager") om, err := object.NewObjectManager(ctx, bm, repoConfig.Format, options.ObjectManagerOptions) if err != nil { - return nil, fmt.Errorf("unable to open object manager: %v", err) + return nil, errors.Wrap(err, "unable to open object manager") } log.Debugf("initializing manifest manager") manifests, err := manifest.NewManager(ctx, bm) if err != nil { - return nil, fmt.Errorf("unable to open manifests: %v", err) + return nil, errors.Wrap(err, "unable to open manifests") } return &Repository{ @@ -154,21 +155,21 @@ func SetCachingConfig(ctx context.Context, configFile string, opt block.CachingO st, err := storage.NewStorage(ctx, lc.Storage) if err != nil { - return fmt.Errorf("cannot open storage: %v", err) + return errors.Wrap(err, "cannot open storage") } fb, err := readAndCacheFormatBlockBytes(ctx, st, "") if err != nil { - return fmt.Errorf("can't read format block: %v", err) + return errors.Wrap(err, "can't read format block") } f, err := parseFormatBlock(fb) if err != nil { - return fmt.Errorf("can't parse format block: %v", err) + return errors.Wrap(err, "can't parse format block") } if err = setupCaching(configFile, lc, opt, f.UniqueID); err != nil { - return fmt.Errorf("unable to set up caching: %v", err) + return errors.Wrap(err, "unable to set up caching") } d, err := json.MarshalIndent(&lc, "", " ") diff --git a/repository.go b/repository.go index 1e438b89d..01f5dff1e 100644 --- a/repository.go +++ b/repository.go @@ -2,13 +2,13 @@ import ( "context" - "fmt" "time" "github.com/kopia/repo/block" "github.com/kopia/repo/manifest" "github.com/kopia/repo/object" "github.com/kopia/repo/storage" + "github.com/pkg/errors" ) // Repository represents storage where both content-addressable and user-addressable data is kept. @@ -29,13 +29,13 @@ type Repository struct { // Close closes the repository and releases all resources. func (r *Repository) Close(ctx context.Context) error { if err := r.Manifests.Flush(ctx); err != nil { - return fmt.Errorf("error flushing manifests: %v", err) + return errors.Wrap(err, "error flushing manifests") } if err := r.Blocks.Flush(ctx); err != nil { - return fmt.Errorf("error closing blocks: %v", err) + return errors.Wrap(err, "error closing blocks") } if err := r.Storage.Close(ctx); err != nil { - return fmt.Errorf("error closing storage: %v", err) + return errors.Wrap(err, "error closing storage") } return nil } @@ -53,7 +53,7 @@ func (r *Repository) Flush(ctx context.Context) error { func (r *Repository) Refresh(ctx context.Context) error { updated, err := r.Blocks.Refresh(ctx) if err != nil { - return fmt.Errorf("error refreshing block index: %v", err) + return errors.Wrap(err, "error refreshing block index") } if !updated { @@ -63,7 +63,7 @@ func (r *Repository) Refresh(ctx context.Context) error { log.Debugf("block index refreshed") if err := r.Manifests.Refresh(ctx); err != nil { - return fmt.Errorf("error reloading manifests: %v", err) + return errors.Wrap(err, "error reloading manifests") } log.Debugf("manifests refreshed") diff --git a/upgrade.go b/upgrade.go index ad081264e..0c2487292 100644 --- a/upgrade.go +++ b/upgrade.go @@ -3,6 +3,8 @@ import ( "context" "fmt" + + "github.com/pkg/errors" ) // Upgrade upgrades repository data structures to the latest version. @@ -12,7 +14,7 @@ func (r *Repository) Upgrade(ctx context.Context) error { log.Debug("decrypting format...") repoConfig, err := f.decryptFormatBytes(r.masterKey) if err != nil { - return fmt.Errorf("unable to decrypt repository config: %v", err) + return errors.Wrap(err, "unable to decrypt repository config") } var migrated bool