diff --git a/internal/completeset/complete_set.go b/internal/completeset/complete_set.go new file mode 100644 index 000000000..0d0ede9e7 --- /dev/null +++ b/internal/completeset/complete_set.go @@ -0,0 +1,72 @@ +// Package completeset manages complete set of blob metadata. +package completeset + +import ( + "strconv" + "strings" + + "github.com/kopia/kopia/repo/blob" +) + +// FindFirst looks for a first complete set of blobs IDs following a naming convention: +// '-s-c' +// where: +// 'prefix' is arbitrary string not containing a dash ('-') +// 'set' is a random string shared by all indexes in the same set +// 'count' is a number that specifies how many items must be in the set to make it complete. +// +// The algorithm returns IDs of blobs that form the first complete set. +func FindFirst(bms []blob.Metadata) []blob.Metadata { + sets := FindAll(bms) + if len(sets) == 0 { + return nil + } + + return sets[0] +} + +// ExcludeIncomplete removes from the provided slice any blobs that are part of incomplete sets. +func ExcludeIncomplete(bms []blob.Metadata) []blob.Metadata { + var result []blob.Metadata + + for _, set := range FindAll(bms) { + result = append(result, set...) + } + + return result +} + +// FindAll returns a list of complete sets in the provided slice, grouped by set ID. +// Blobs that are not in any set are also returned. +func FindAll(bms []blob.Metadata) [][]blob.Metadata { + sets := map[string][]blob.Metadata{} + + var completeSets [][]blob.Metadata + + for _, bm := range bms { + id := bm.BlobID + parts := strings.Split(string(id), "-") + + if len(parts) < 3 || !strings.HasPrefix(parts[1], "s") || !strings.HasPrefix(parts[2], "c") { + // treat blobs with malformed names as a single-item sets of their own. + completeSets = append(completeSets, []blob.Metadata{bm}) + continue + } + + count, err := strconv.Atoi(parts[2][1:]) + if err != nil { + // treat blobs with malformed names as a single-item sets of their own. + completeSets = append(completeSets, []blob.Metadata{bm}) + continue + } + + setID := parts[1] + sets[setID] = append(sets[setID], bm) + + if len(sets[setID]) == count { + completeSets = append(completeSets, sets[setID]) + } + } + + return completeSets +} diff --git a/internal/completeset/complete_set_test.go b/internal/completeset/complete_set_test.go new file mode 100644 index 000000000..9a8ab8e6f --- /dev/null +++ b/internal/completeset/complete_set_test.go @@ -0,0 +1,175 @@ +package completeset_test + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/kopia/kopia/internal/completeset" + "github.com/kopia/kopia/repo/blob" +) + +func TestFindFirstAndAll(t *testing.T) { + cases := []struct { + input []blob.ID + wantFirst []blob.ID + wantAll [][]blob.ID + wantExcludeIncomplete []blob.ID + }{ + { + input: []blob.ID{}, + wantFirst: []blob.ID{}, + wantAll: [][]blob.ID(nil), + wantExcludeIncomplete: []blob.ID{}, + }, + + // one complete session of size 2 + { + input: []blob.ID{ + "a-s0-c2", + "b-s0-c2", + }, + wantFirst: []blob.ID{"a-s0-c2", "b-s0-c2"}, + wantAll: [][]blob.ID{ + {"a-s0-c2", "b-s0-c2"}, + }, + wantExcludeIncomplete: []blob.ID{"a-s0-c2", "b-s0-c2"}, + }, + // one complete session with some malformed name, which by itself forms a complete session. + { + input: []blob.ID{ + "a-s0-c2", + "malformed", + "b-s0-c2", + }, + wantFirst: []blob.ID{ + "malformed", + }, + wantAll: [][]blob.ID{ + {"malformed"}, + {"a-s0-c2", "b-s0-c2"}, + }, + wantExcludeIncomplete: []blob.ID{ + "malformed", + "a-s0-c2", "b-s0-c2", + }, + }, + // one complete session with some malformed blob ID + { + input: []blob.ID{ + "a-s0-c2", + "malformed-s0-x2", + "b-s0-c2", + }, + wantFirst: []blob.ID{ + "malformed-s0-x2", + }, + wantAll: [][]blob.ID{ + {"malformed-s0-x2"}, + {"a-s0-c2", "b-s0-c2"}, + }, + wantExcludeIncomplete: []blob.ID{"malformed-s0-x2", "a-s0-c2", "b-s0-c2"}, + }, + // one complete session with some malformed count + { + input: []blob.ID{ + "a-s0-c2", + "malformed-s0-cNAN", + "b-s0-c2", + }, + wantFirst: []blob.ID{ + "malformed-s0-cNAN", + }, + wantAll: [][]blob.ID{ + {"malformed-s0-cNAN"}, + {"a-s0-c2", "b-s0-c2"}, + }, + wantExcludeIncomplete: []blob.ID{ + "malformed-s0-cNAN", + "a-s0-c2", "b-s0-c2", + }, + }, + // two complete sessions, we pick 's0' as it's the first one to become complete. + { + input: []blob.ID{ + "foo-s0-c2", + "aaa-s1-c2", + "bar-s0-c2", + "bbb-s1-c2", + }, + wantFirst: []blob.ID{ + "foo-s0-c2", "bar-s0-c2", + }, + wantAll: [][]blob.ID{ + {"foo-s0-c2", "bar-s0-c2"}, + {"aaa-s1-c2", "bbb-s1-c2"}, + }, + wantExcludeIncomplete: []blob.ID{ + "foo-s0-c2", "bar-s0-c2", + "aaa-s1-c2", "bbb-s1-c2", + }, + }, + // two incomplete sessions + { + input: []blob.ID{ + "foo-s0-c3", + "aaa-s1-c3", + "bar-s0-c3", + "bbb-s1-c3", + }, + wantFirst: []blob.ID{}, + wantAll: nil, + wantExcludeIncomplete: []blob.ID{}, + }, + // two complete, two incomplete sessions + { + input: []blob.ID{ + "foo-s0-c2", + "aaa-s1-c3", + "bar-s0-c2", + "bbb-s1-c3", + "foo-s2-c2", + "aaa-s3-c3", + "bar-s2-c2", + "bbb-s3-c3", + }, + wantFirst: []blob.ID{ + "foo-s0-c2", + "bar-s0-c2", + }, + wantAll: [][]blob.ID{ + {"foo-s0-c2", "bar-s0-c2"}, + {"foo-s2-c2", "bar-s2-c2"}, + }, + wantExcludeIncomplete: []blob.ID{ + "foo-s0-c2", "bar-s0-c2", "foo-s2-c2", "bar-s2-c2", + }, + }, + } + + for _, tc := range cases { + require.Equal(t, tc.wantFirst, blob.IDsFromMetadata(completeset.FindFirst(dummyMetadataForIDs(tc.input))), "invalid result for FindFirst(%v)", tc.input) + require.Equal(t, tc.wantAll, idsFromMetadataSets(completeset.FindAll(dummyMetadataForIDs(tc.input))), "invalid result for FindAll(%v)", tc.input) + require.Equal(t, tc.wantExcludeIncomplete, blob.IDsFromMetadata(completeset.ExcludeIncomplete(dummyMetadataForIDs(tc.input))), "invalid result for ExcludeIncomplete(%v)", tc.input) + } +} + +func idsFromMetadataSets(sets [][]blob.Metadata) [][]blob.ID { + var result [][]blob.ID + + for _, s := range sets { + result = append(result, blob.IDsFromMetadata(s)) + } + + return result +} + +func dummyMetadataForIDs(ids []blob.ID) []blob.Metadata { + var result []blob.Metadata + + for _, id := range ids { + result = append(result, blob.Metadata{BlobID: id}) + } + + return result +} diff --git a/internal/epoch/complete_set.go b/internal/epoch/complete_set.go deleted file mode 100644 index db66fed7f..000000000 --- a/internal/epoch/complete_set.go +++ /dev/null @@ -1,45 +0,0 @@ -package epoch - -import ( - "strconv" - "strings" - - "github.com/kopia/kopia/repo/blob" -) - -// findCompleteSetOfBlobs looks for a complete set of blobs IDs following a naming convention: -// '-s-c' -// where: -// 'prefix' is arbitrary string not containing a dash ('-') -// 'set' is a random string shared by all indexes in the same set -// 'count' is a number that specifies how many items must be in the set to make it complete. -// -// The algorithm returns IDs of blobs that form the first complete set. -func findCompleteSetOfBlobs(bms []blob.Metadata) []blob.Metadata { - sets := map[string][]blob.Metadata{} - - for _, bm := range bms { - id := bm.BlobID - parts := strings.Split(string(id), "-") - - if len(parts) < 3 || !strings.HasPrefix(parts[1], "s") || !strings.HasPrefix(parts[2], "c") { - // malformed ID, ignore - continue - } - - count, err := strconv.Atoi(parts[2][1:]) - if err != nil { - // malformed ID, ignore - continue - } - - setID := parts[1] - sets[setID] = append(sets[setID], bm) - - if len(sets[setID]) == count { - return sets[setID] - } - } - - return nil -} diff --git a/internal/epoch/complete_set_test.go b/internal/epoch/complete_set_test.go deleted file mode 100644 index 8409e44c9..000000000 --- a/internal/epoch/complete_set_test.go +++ /dev/null @@ -1,96 +0,0 @@ -package epoch - -import ( - "testing" - - "github.com/stretchr/testify/require" - - "github.com/kopia/kopia/repo/blob" -) - -func TestFindCompleteSetOfBlobs(t *testing.T) { - cases := []struct { - input []blob.ID - want []blob.ID - }{ - { - input: []blob.ID{}, - want: []blob.ID{}, - }, - - // one complete session of size 2 - { - input: []blob.ID{ - "a-s0-c2", - "b-s0-c2", - }, - want: []blob.ID{ - "a-s0-c2", - "b-s0-c2", - }, - }, - // one complete session with some malformed name - { - input: []blob.ID{ - "a-s0-c2", - "malformed", - "b-s0-c2", - }, - want: []blob.ID{ - "a-s0-c2", - "b-s0-c2", - }, - }, - // one complete session with some malformed blob ID - { - input: []blob.ID{ - "a-s0-c2", - "malformed-s0-x2", - "b-s0-c2", - }, - want: []blob.ID{ - "a-s0-c2", - "b-s0-c2", - }, - }, - // one complete session with some malformed count - { - input: []blob.ID{ - "a-s0-c2", - "malformed-s0-cNAN", - "b-s0-c2", - }, - want: []blob.ID{ - "a-s0-c2", - "b-s0-c2", - }, - }, - // two complete sessions, we pick 's0' as it's the first one to become complete. - { - input: []blob.ID{ - "foo-s0-c2", - "aaa-s1-c2", - "bar-s0-c2", - "bbb-s1-c2", - }, - want: []blob.ID{ - "foo-s0-c2", - "bar-s0-c2", - }, - }, - } - - for _, tc := range cases { - require.Equal(t, tc.want, blob.IDsFromMetadata(findCompleteSetOfBlobs(dummyMetadataForIDs(tc.input))), "invalid result for %v", tc.input) - } -} - -func dummyMetadataForIDs(ids []blob.ID) []blob.Metadata { - var result []blob.Metadata - - for _, id := range ids { - result = append(result, blob.Metadata{BlobID: id}) - } - - return result -} diff --git a/internal/epoch/epoch_manager.go b/internal/epoch/epoch_manager.go index b0b73c90f..f4ac0a358 100644 --- a/internal/epoch/epoch_manager.go +++ b/internal/epoch/epoch_manager.go @@ -14,6 +14,7 @@ "golang.org/x/sync/errgroup" "github.com/kopia/kopia/internal/clock" + "github.com/kopia/kopia/internal/completeset" "github.com/kopia/kopia/internal/gather" "github.com/kopia/kopia/repo/blob" "github.com/kopia/kopia/repo/logging" @@ -286,7 +287,7 @@ func (e *Manager) loadRangeCheckpoints(ctx context.Context, cs *CurrentSnapshot) for epoch1, m := range groupByEpochRanges(blobs) { for epoch2, bms := range m { - if comp := findCompleteSetOfBlobs(bms); comp != nil { + if comp := completeset.FindFirst(bms); comp != nil { erm := &RangeMetadata{ MinEpoch: epoch1, MaxEpoch: epoch2, @@ -310,7 +311,7 @@ func (e *Manager) loadSingleEpochCompactions(ctx context.Context, cs *CurrentSna } for epoch, bms := range groupByEpochNumber(blobs) { - if comp := findCompleteSetOfBlobs(bms); comp != nil { + if comp := completeset.FindFirst(bms); comp != nil { cs.SingleEpochCompactionSets[epoch] = comp } }