mirror of
https://github.com/kopia/kopia.git
synced 2026-03-22 16:11:56 -04:00
added configurable splitters to repo.Format
This commit is contained in:
@@ -18,9 +18,12 @@ type Format struct {
|
||||
ObjectFormat string `json:"objectFormat,omitempty"` // identifier of object format
|
||||
Secret []byte `json:"secret,omitempty"` // HMAC secret used to generate encryption keys
|
||||
MaxInlineContentLength int32 `json:"maxInlineContentLength,omitempty"` // maximum size of object to be considered for inline storage within ObjectID
|
||||
MaxBlockSize int32 `json:"maxBlockSize,omitempty"` // maximum size of storage block
|
||||
MasterKey []byte `json:"masterKey,omitempty"` // master encryption key (SIV-mode encryption only)
|
||||
ApproxBlockSize int32 `json:"approxBlockSize,omitempty"` // approximate size of storage block (used with rolling hash)
|
||||
Splitter string `json:"splitter,omitempty"` // splitter used to break objects into storage blocks
|
||||
|
||||
MinBlockSize int32 `json:"minBlockSize,omitempty"` // minimum block size used with dynamic splitter
|
||||
ApproxBlockSize int32 `json:"approxBlockSize,omitempty"` // approximate size of storage block (used with dynamic splitter)
|
||||
MaxBlockSize int32 `json:"maxBlockSize,omitempty"` // maximum size of storage block
|
||||
}
|
||||
|
||||
// Validate checks the validity of a Format and returns an error if invalid.
|
||||
|
||||
@@ -44,17 +44,33 @@ type rollingHashSplitter struct {
|
||||
rh rollinghash.Hash32
|
||||
mask uint32
|
||||
allOnes uint32
|
||||
|
||||
currentBlockSize int32
|
||||
minBlockSize int32
|
||||
maxBlockSize int32
|
||||
}
|
||||
|
||||
func (rs *rollingHashSplitter) add(b byte) bool {
|
||||
rs.rh.Roll(b)
|
||||
return rs.rh.Sum32()&rs.mask == rs.allOnes
|
||||
rs.currentBlockSize++
|
||||
if rs.currentBlockSize < rs.minBlockSize {
|
||||
return false
|
||||
}
|
||||
if rs.currentBlockSize >= rs.maxBlockSize {
|
||||
rs.currentBlockSize = 0
|
||||
return true
|
||||
}
|
||||
if rs.rh.Sum32()&rs.mask == rs.allOnes {
|
||||
rs.currentBlockSize = 0
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func newRollingHashSplitter(rh rollinghash.Hash32, approxBlockSize int32) objectSplitter {
|
||||
func newRollingHashSplitter(rh rollinghash.Hash32, minBlockSize int32, approxBlockSize int32, maxBlockSize int32) objectSplitter {
|
||||
bits := rollingHashBits(approxBlockSize)
|
||||
mask := ^(^uint32(0) << bits)
|
||||
return &rollingHashSplitter{rh, mask, (uint32(0)) ^ mask}
|
||||
return &rollingHashSplitter{rh, mask, (uint32(0)) ^ mask, 0, minBlockSize, maxBlockSize}
|
||||
}
|
||||
|
||||
func rollingHashBits(n int32) uint {
|
||||
@@ -71,7 +87,7 @@ func rollingHashBits(n int32) uint {
|
||||
"FIXED": func(f *Format) objectSplitter {
|
||||
return newFixedSplitter(int(f.MaxBlockSize))
|
||||
},
|
||||
"ROLLING": func(f *Format) objectSplitter {
|
||||
return newRollingHashSplitter(buzhash32.New(), f.MaxBlockSize)
|
||||
"DYNAMIC": func(f *Format) objectSplitter {
|
||||
return newRollingHashSplitter(buzhash32.New(), f.MinBlockSize, f.ApproxBlockSize, f.MaxBlockSize)
|
||||
},
|
||||
}
|
||||
|
||||
@@ -15,8 +15,8 @@ func TestSplitters(t *testing.T) {
|
||||
desc string
|
||||
newSplitter func() objectSplitter
|
||||
}{
|
||||
{"rolling buzhash32 with 3 bits", func() objectSplitter { return newRollingHashSplitter(buzhash32.New(), 3) }},
|
||||
{"rolling adler32 with 5 bits", func() objectSplitter { return newRollingHashSplitter(adler32.New(), 5) }},
|
||||
{"rolling buzhash32 with 3 bits", func() objectSplitter { return newRollingHashSplitter(buzhash32.New(), 0, 8, 20) }},
|
||||
{"rolling adler32 with 5 bits", func() objectSplitter { return newRollingHashSplitter(adler32.New(), 0, 32, 100) }},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
@@ -53,17 +53,24 @@ func TestSplitterStability(t *testing.T) {
|
||||
|
||||
{newNeverSplitter(), 0, 0, math.MaxInt32, 0},
|
||||
|
||||
{newRollingHashSplitter(buzhash32.New(), 32), 156283, 31, 1, 427},
|
||||
{newRollingHashSplitter(buzhash32.New(), 1024), 4794, 1042, 1, 10001},
|
||||
{newRollingHashSplitter(buzhash32.New(), 2048), 2404, 2079, 1, 19312},
|
||||
{newRollingHashSplitter(buzhash32.New(), 32768), 143, 34965, 1, 233567},
|
||||
{newRollingHashSplitter(buzhash32.New(), 65536), 72, 69444, 1, 430586},
|
||||
{newRollingHashSplitter(buzhash32.New(), 0, 32, math.MaxInt32), 156283, 31, 1, 427},
|
||||
{newRollingHashSplitter(buzhash32.New(), 0, 1024, math.MaxInt32), 4794, 1042, 1, 10001},
|
||||
{newRollingHashSplitter(buzhash32.New(), 0, 2048, math.MaxInt32), 2404, 2079, 1, 19312},
|
||||
{newRollingHashSplitter(buzhash32.New(), 0, 32768, math.MaxInt32), 143, 34965, 1, 233567},
|
||||
{newRollingHashSplitter(buzhash32.New(), 0, 65536, math.MaxInt32), 72, 69444, 1, 430586},
|
||||
|
||||
{newRollingHashSplitter(rabinkarp32.New(), 32), 156303, 31, 1, 425},
|
||||
{newRollingHashSplitter(rabinkarp32.New(), 1024), 4985, 1003, 1, 9572},
|
||||
{newRollingHashSplitter(rabinkarp32.New(), 2048), 2497, 2002, 1, 15173},
|
||||
{newRollingHashSplitter(rabinkarp32.New(), 32768), 151, 33112, 790, 164382},
|
||||
{newRollingHashSplitter(rabinkarp32.New(), 65536), 76, 65789, 1124, 295680},
|
||||
{newRollingHashSplitter(rabinkarp32.New(), 0, 32, math.MaxInt32), 156303, 31, 1, 425},
|
||||
{newRollingHashSplitter(rabinkarp32.New(), 0, 1024, math.MaxInt32), 4985, 1003, 1, 9572},
|
||||
{newRollingHashSplitter(rabinkarp32.New(), 0, 2048, math.MaxInt32), 2497, 2002, 1, 15173},
|
||||
{newRollingHashSplitter(rabinkarp32.New(), 0, 32768, math.MaxInt32), 151, 33112, 790, 164382},
|
||||
{newRollingHashSplitter(rabinkarp32.New(), 0, 65536, math.MaxInt32), 76, 65789, 1124, 295680},
|
||||
|
||||
// min and max
|
||||
{newRollingHashSplitter(buzhash32.New(), 0, 32, 64), 179920, 27, 1, 64},
|
||||
{newRollingHashSplitter(buzhash32.New(), 0, 1024, 10000), 4795, 1042, 1, 10000},
|
||||
{newRollingHashSplitter(buzhash32.New(), 0, 2048, 10000), 2432, 2055, 1, 10000},
|
||||
{newRollingHashSplitter(buzhash32.New(), 500, 32768, 100000), 147, 34013, 762, 100000},
|
||||
{newRollingHashSplitter(buzhash32.New(), 500, 65536, 100000), 90, 55555, 762, 100000},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
|
||||
@@ -186,9 +186,20 @@ func New(s blob.Storage, f *Format, options ...RepositoryOption) (*Repository, e
|
||||
r := &Repository{
|
||||
Storage: s,
|
||||
format: *f,
|
||||
newSplitter: func() objectSplitter {
|
||||
return newFixedSplitter(int(f.MaxBlockSize))
|
||||
},
|
||||
}
|
||||
|
||||
sp := f.Splitter
|
||||
if sp == "" {
|
||||
sp = "FIXED"
|
||||
}
|
||||
|
||||
os := SupportedSplitters[sp]
|
||||
if os == nil {
|
||||
return nil, fmt.Errorf("unsupported splitter %q", sp)
|
||||
}
|
||||
|
||||
r.newSplitter = func() objectSplitter {
|
||||
return os(f)
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
Reference in New Issue
Block a user