mirror of
https://github.com/kopia/kopia.git
synced 2026-05-07 22:32:45 -04:00
refactor: extracted complete blob set functions to separate package (#1175)
* refactor: extracted complete blob set functions to separate package * completeset: added more functions Also treat malformed blob IDs as their own sets for backwards compat.
This commit is contained in:
72
internal/completeset/complete_set.go
Normal file
72
internal/completeset/complete_set.go
Normal file
@@ -0,0 +1,72 @@
|
||||
// Package completeset manages complete set of blob metadata.
|
||||
package completeset
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/kopia/kopia/repo/blob"
|
||||
)
|
||||
|
||||
// FindFirst looks for a first complete set of blobs IDs following a naming convention:
|
||||
// '<any>-s<set>-c<count>'
|
||||
// where:
|
||||
// 'prefix' is arbitrary string not containing a dash ('-')
|
||||
// 'set' is a random string shared by all indexes in the same set
|
||||
// 'count' is a number that specifies how many items must be in the set to make it complete.
|
||||
//
|
||||
// The algorithm returns IDs of blobs that form the first complete set.
|
||||
func FindFirst(bms []blob.Metadata) []blob.Metadata {
|
||||
sets := FindAll(bms)
|
||||
if len(sets) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return sets[0]
|
||||
}
|
||||
|
||||
// ExcludeIncomplete removes from the provided slice any blobs that are part of incomplete sets.
|
||||
func ExcludeIncomplete(bms []blob.Metadata) []blob.Metadata {
|
||||
var result []blob.Metadata
|
||||
|
||||
for _, set := range FindAll(bms) {
|
||||
result = append(result, set...)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// FindAll returns a list of complete sets in the provided slice, grouped by set ID.
|
||||
// Blobs that are not in any set are also returned.
|
||||
func FindAll(bms []blob.Metadata) [][]blob.Metadata {
|
||||
sets := map[string][]blob.Metadata{}
|
||||
|
||||
var completeSets [][]blob.Metadata
|
||||
|
||||
for _, bm := range bms {
|
||||
id := bm.BlobID
|
||||
parts := strings.Split(string(id), "-")
|
||||
|
||||
if len(parts) < 3 || !strings.HasPrefix(parts[1], "s") || !strings.HasPrefix(parts[2], "c") {
|
||||
// treat blobs with malformed names as a single-item sets of their own.
|
||||
completeSets = append(completeSets, []blob.Metadata{bm})
|
||||
continue
|
||||
}
|
||||
|
||||
count, err := strconv.Atoi(parts[2][1:])
|
||||
if err != nil {
|
||||
// treat blobs with malformed names as a single-item sets of their own.
|
||||
completeSets = append(completeSets, []blob.Metadata{bm})
|
||||
continue
|
||||
}
|
||||
|
||||
setID := parts[1]
|
||||
sets[setID] = append(sets[setID], bm)
|
||||
|
||||
if len(sets[setID]) == count {
|
||||
completeSets = append(completeSets, sets[setID])
|
||||
}
|
||||
}
|
||||
|
||||
return completeSets
|
||||
}
|
||||
175
internal/completeset/complete_set_test.go
Normal file
175
internal/completeset/complete_set_test.go
Normal file
@@ -0,0 +1,175 @@
|
||||
package completeset_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/kopia/kopia/internal/completeset"
|
||||
"github.com/kopia/kopia/repo/blob"
|
||||
)
|
||||
|
||||
func TestFindFirstAndAll(t *testing.T) {
|
||||
cases := []struct {
|
||||
input []blob.ID
|
||||
wantFirst []blob.ID
|
||||
wantAll [][]blob.ID
|
||||
wantExcludeIncomplete []blob.ID
|
||||
}{
|
||||
{
|
||||
input: []blob.ID{},
|
||||
wantFirst: []blob.ID{},
|
||||
wantAll: [][]blob.ID(nil),
|
||||
wantExcludeIncomplete: []blob.ID{},
|
||||
},
|
||||
|
||||
// one complete session of size 2
|
||||
{
|
||||
input: []blob.ID{
|
||||
"a-s0-c2",
|
||||
"b-s0-c2",
|
||||
},
|
||||
wantFirst: []blob.ID{"a-s0-c2", "b-s0-c2"},
|
||||
wantAll: [][]blob.ID{
|
||||
{"a-s0-c2", "b-s0-c2"},
|
||||
},
|
||||
wantExcludeIncomplete: []blob.ID{"a-s0-c2", "b-s0-c2"},
|
||||
},
|
||||
// one complete session with some malformed name, which by itself forms a complete session.
|
||||
{
|
||||
input: []blob.ID{
|
||||
"a-s0-c2",
|
||||
"malformed",
|
||||
"b-s0-c2",
|
||||
},
|
||||
wantFirst: []blob.ID{
|
||||
"malformed",
|
||||
},
|
||||
wantAll: [][]blob.ID{
|
||||
{"malformed"},
|
||||
{"a-s0-c2", "b-s0-c2"},
|
||||
},
|
||||
wantExcludeIncomplete: []blob.ID{
|
||||
"malformed",
|
||||
"a-s0-c2", "b-s0-c2",
|
||||
},
|
||||
},
|
||||
// one complete session with some malformed blob ID
|
||||
{
|
||||
input: []blob.ID{
|
||||
"a-s0-c2",
|
||||
"malformed-s0-x2",
|
||||
"b-s0-c2",
|
||||
},
|
||||
wantFirst: []blob.ID{
|
||||
"malformed-s0-x2",
|
||||
},
|
||||
wantAll: [][]blob.ID{
|
||||
{"malformed-s0-x2"},
|
||||
{"a-s0-c2", "b-s0-c2"},
|
||||
},
|
||||
wantExcludeIncomplete: []blob.ID{"malformed-s0-x2", "a-s0-c2", "b-s0-c2"},
|
||||
},
|
||||
// one complete session with some malformed count
|
||||
{
|
||||
input: []blob.ID{
|
||||
"a-s0-c2",
|
||||
"malformed-s0-cNAN",
|
||||
"b-s0-c2",
|
||||
},
|
||||
wantFirst: []blob.ID{
|
||||
"malformed-s0-cNAN",
|
||||
},
|
||||
wantAll: [][]blob.ID{
|
||||
{"malformed-s0-cNAN"},
|
||||
{"a-s0-c2", "b-s0-c2"},
|
||||
},
|
||||
wantExcludeIncomplete: []blob.ID{
|
||||
"malformed-s0-cNAN",
|
||||
"a-s0-c2", "b-s0-c2",
|
||||
},
|
||||
},
|
||||
// two complete sessions, we pick 's0' as it's the first one to become complete.
|
||||
{
|
||||
input: []blob.ID{
|
||||
"foo-s0-c2",
|
||||
"aaa-s1-c2",
|
||||
"bar-s0-c2",
|
||||
"bbb-s1-c2",
|
||||
},
|
||||
wantFirst: []blob.ID{
|
||||
"foo-s0-c2", "bar-s0-c2",
|
||||
},
|
||||
wantAll: [][]blob.ID{
|
||||
{"foo-s0-c2", "bar-s0-c2"},
|
||||
{"aaa-s1-c2", "bbb-s1-c2"},
|
||||
},
|
||||
wantExcludeIncomplete: []blob.ID{
|
||||
"foo-s0-c2", "bar-s0-c2",
|
||||
"aaa-s1-c2", "bbb-s1-c2",
|
||||
},
|
||||
},
|
||||
// two incomplete sessions
|
||||
{
|
||||
input: []blob.ID{
|
||||
"foo-s0-c3",
|
||||
"aaa-s1-c3",
|
||||
"bar-s0-c3",
|
||||
"bbb-s1-c3",
|
||||
},
|
||||
wantFirst: []blob.ID{},
|
||||
wantAll: nil,
|
||||
wantExcludeIncomplete: []blob.ID{},
|
||||
},
|
||||
// two complete, two incomplete sessions
|
||||
{
|
||||
input: []blob.ID{
|
||||
"foo-s0-c2",
|
||||
"aaa-s1-c3",
|
||||
"bar-s0-c2",
|
||||
"bbb-s1-c3",
|
||||
"foo-s2-c2",
|
||||
"aaa-s3-c3",
|
||||
"bar-s2-c2",
|
||||
"bbb-s3-c3",
|
||||
},
|
||||
wantFirst: []blob.ID{
|
||||
"foo-s0-c2",
|
||||
"bar-s0-c2",
|
||||
},
|
||||
wantAll: [][]blob.ID{
|
||||
{"foo-s0-c2", "bar-s0-c2"},
|
||||
{"foo-s2-c2", "bar-s2-c2"},
|
||||
},
|
||||
wantExcludeIncomplete: []blob.ID{
|
||||
"foo-s0-c2", "bar-s0-c2", "foo-s2-c2", "bar-s2-c2",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
require.Equal(t, tc.wantFirst, blob.IDsFromMetadata(completeset.FindFirst(dummyMetadataForIDs(tc.input))), "invalid result for FindFirst(%v)", tc.input)
|
||||
require.Equal(t, tc.wantAll, idsFromMetadataSets(completeset.FindAll(dummyMetadataForIDs(tc.input))), "invalid result for FindAll(%v)", tc.input)
|
||||
require.Equal(t, tc.wantExcludeIncomplete, blob.IDsFromMetadata(completeset.ExcludeIncomplete(dummyMetadataForIDs(tc.input))), "invalid result for ExcludeIncomplete(%v)", tc.input)
|
||||
}
|
||||
}
|
||||
|
||||
func idsFromMetadataSets(sets [][]blob.Metadata) [][]blob.ID {
|
||||
var result [][]blob.ID
|
||||
|
||||
for _, s := range sets {
|
||||
result = append(result, blob.IDsFromMetadata(s))
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func dummyMetadataForIDs(ids []blob.ID) []blob.Metadata {
|
||||
var result []blob.Metadata
|
||||
|
||||
for _, id := range ids {
|
||||
result = append(result, blob.Metadata{BlobID: id})
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
package epoch
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/kopia/kopia/repo/blob"
|
||||
)
|
||||
|
||||
// findCompleteSetOfBlobs looks for a complete set of blobs IDs following a naming convention:
|
||||
// '<any>-s<set>-c<count>'
|
||||
// where:
|
||||
// 'prefix' is arbitrary string not containing a dash ('-')
|
||||
// 'set' is a random string shared by all indexes in the same set
|
||||
// 'count' is a number that specifies how many items must be in the set to make it complete.
|
||||
//
|
||||
// The algorithm returns IDs of blobs that form the first complete set.
|
||||
func findCompleteSetOfBlobs(bms []blob.Metadata) []blob.Metadata {
|
||||
sets := map[string][]blob.Metadata{}
|
||||
|
||||
for _, bm := range bms {
|
||||
id := bm.BlobID
|
||||
parts := strings.Split(string(id), "-")
|
||||
|
||||
if len(parts) < 3 || !strings.HasPrefix(parts[1], "s") || !strings.HasPrefix(parts[2], "c") {
|
||||
// malformed ID, ignore
|
||||
continue
|
||||
}
|
||||
|
||||
count, err := strconv.Atoi(parts[2][1:])
|
||||
if err != nil {
|
||||
// malformed ID, ignore
|
||||
continue
|
||||
}
|
||||
|
||||
setID := parts[1]
|
||||
sets[setID] = append(sets[setID], bm)
|
||||
|
||||
if len(sets[setID]) == count {
|
||||
return sets[setID]
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,96 +0,0 @@
|
||||
package epoch
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/kopia/kopia/repo/blob"
|
||||
)
|
||||
|
||||
func TestFindCompleteSetOfBlobs(t *testing.T) {
|
||||
cases := []struct {
|
||||
input []blob.ID
|
||||
want []blob.ID
|
||||
}{
|
||||
{
|
||||
input: []blob.ID{},
|
||||
want: []blob.ID{},
|
||||
},
|
||||
|
||||
// one complete session of size 2
|
||||
{
|
||||
input: []blob.ID{
|
||||
"a-s0-c2",
|
||||
"b-s0-c2",
|
||||
},
|
||||
want: []blob.ID{
|
||||
"a-s0-c2",
|
||||
"b-s0-c2",
|
||||
},
|
||||
},
|
||||
// one complete session with some malformed name
|
||||
{
|
||||
input: []blob.ID{
|
||||
"a-s0-c2",
|
||||
"malformed",
|
||||
"b-s0-c2",
|
||||
},
|
||||
want: []blob.ID{
|
||||
"a-s0-c2",
|
||||
"b-s0-c2",
|
||||
},
|
||||
},
|
||||
// one complete session with some malformed blob ID
|
||||
{
|
||||
input: []blob.ID{
|
||||
"a-s0-c2",
|
||||
"malformed-s0-x2",
|
||||
"b-s0-c2",
|
||||
},
|
||||
want: []blob.ID{
|
||||
"a-s0-c2",
|
||||
"b-s0-c2",
|
||||
},
|
||||
},
|
||||
// one complete session with some malformed count
|
||||
{
|
||||
input: []blob.ID{
|
||||
"a-s0-c2",
|
||||
"malformed-s0-cNAN",
|
||||
"b-s0-c2",
|
||||
},
|
||||
want: []blob.ID{
|
||||
"a-s0-c2",
|
||||
"b-s0-c2",
|
||||
},
|
||||
},
|
||||
// two complete sessions, we pick 's0' as it's the first one to become complete.
|
||||
{
|
||||
input: []blob.ID{
|
||||
"foo-s0-c2",
|
||||
"aaa-s1-c2",
|
||||
"bar-s0-c2",
|
||||
"bbb-s1-c2",
|
||||
},
|
||||
want: []blob.ID{
|
||||
"foo-s0-c2",
|
||||
"bar-s0-c2",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
require.Equal(t, tc.want, blob.IDsFromMetadata(findCompleteSetOfBlobs(dummyMetadataForIDs(tc.input))), "invalid result for %v", tc.input)
|
||||
}
|
||||
}
|
||||
|
||||
func dummyMetadataForIDs(ids []blob.ID) []blob.Metadata {
|
||||
var result []blob.Metadata
|
||||
|
||||
for _, id := range ids {
|
||||
result = append(result, blob.Metadata{BlobID: id})
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
@@ -14,6 +14,7 @@
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/kopia/kopia/internal/clock"
|
||||
"github.com/kopia/kopia/internal/completeset"
|
||||
"github.com/kopia/kopia/internal/gather"
|
||||
"github.com/kopia/kopia/repo/blob"
|
||||
"github.com/kopia/kopia/repo/logging"
|
||||
@@ -286,7 +287,7 @@ func (e *Manager) loadRangeCheckpoints(ctx context.Context, cs *CurrentSnapshot)
|
||||
|
||||
for epoch1, m := range groupByEpochRanges(blobs) {
|
||||
for epoch2, bms := range m {
|
||||
if comp := findCompleteSetOfBlobs(bms); comp != nil {
|
||||
if comp := completeset.FindFirst(bms); comp != nil {
|
||||
erm := &RangeMetadata{
|
||||
MinEpoch: epoch1,
|
||||
MaxEpoch: epoch2,
|
||||
@@ -310,7 +311,7 @@ func (e *Manager) loadSingleEpochCompactions(ctx context.Context, cs *CurrentSna
|
||||
}
|
||||
|
||||
for epoch, bms := range groupByEpochNumber(blobs) {
|
||||
if comp := findCompleteSetOfBlobs(bms); comp != nil {
|
||||
if comp := completeset.FindFirst(bms); comp != nil {
|
||||
cs.SingleEpochCompactionSets[epoch] = comp
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user