Files
kopia/object/object_splitter.go

115 lines
2.5 KiB
Go

package object
import (
"math"
"sort"
"github.com/kopia/kopia/internal/config"
"github.com/silvasur/buzhash"
)
type objectSplitter interface {
add(b byte) bool
}
// SupportedSplitters is a list of supported object splitters including:
//
// NEVER - prevents objects from ever splitting
// FIXED - always splits large objects exactly at the maximum block size boundary
// DYNAMIC - dynamically splits large objects based on rolling hash of contents.
var SupportedSplitters []string
var splitterFactories = map[string]func(*config.RepositoryObjectFormat) objectSplitter{
"NEVER": func(f *config.RepositoryObjectFormat) objectSplitter {
return newNeverSplitter()
},
"FIXED": func(f *config.RepositoryObjectFormat) objectSplitter {
return newFixedSplitter(f.MaxBlockSize)
},
"DYNAMIC": func(f *config.RepositoryObjectFormat) objectSplitter {
return newRollingHashSplitter(buzhash.NewBuzHash(32), f.MinBlockSize, f.AvgBlockSize, f.MaxBlockSize)
},
}
func init() {
for k := range splitterFactories {
SupportedSplitters = append(SupportedSplitters, k)
}
sort.Strings(SupportedSplitters)
}
// DefaultSplitter is the name of the splitter used by default for new repositories.
const DefaultSplitter = "DYNAMIC"
type neverSplitter struct{}
func (s *neverSplitter) add(b byte) bool {
return false
}
func newNeverSplitter() objectSplitter {
return &neverSplitter{}
}
type fixedSplitter struct {
cur int
chunkLength int
}
func (s *fixedSplitter) add(b byte) bool {
s.cur++
if s.cur >= s.chunkLength {
s.cur = 0
return true
}
return false
}
func newFixedSplitter(chunkLength int) objectSplitter {
return &fixedSplitter{chunkLength: chunkLength}
}
type rollingHash interface {
HashByte(b byte) uint32
}
type rollingHashSplitter struct {
rh rollingHash
mask uint32
currentBlockSize int
minBlockSize int
maxBlockSize int
}
func (rs *rollingHashSplitter) add(b byte) bool {
sum := rs.rh.HashByte(b)
rs.currentBlockSize++
if rs.currentBlockSize < rs.minBlockSize {
return false
}
if rs.currentBlockSize >= rs.maxBlockSize {
rs.currentBlockSize = 0
return true
}
if sum&rs.mask == 0 {
rs.currentBlockSize = 0
return true
}
return false
}
func newRollingHashSplitter(rh rollingHash, minBlockSize int, approxBlockSize int, maxBlockSize int) objectSplitter {
bits := rollingHashBits(approxBlockSize)
mask := ^(^uint32(0) << bits)
return &rollingHashSplitter{rh, mask, 0, minBlockSize, maxBlockSize}
}
func rollingHashBits(n int) uint {
e := math.Log2(float64(n))
exp := math.Floor(e + 0.5)
return uint(exp)
}