mirror of
https://github.com/kopia/kopia.git
synced 2026-01-25 23:08:01 -05:00
115 lines
2.5 KiB
Go
115 lines
2.5 KiB
Go
package object
|
|
|
|
import (
|
|
"math"
|
|
|
|
"sort"
|
|
|
|
"github.com/kopia/kopia/internal/config"
|
|
"github.com/silvasur/buzhash"
|
|
)
|
|
|
|
type objectSplitter interface {
|
|
add(b byte) bool
|
|
}
|
|
|
|
// SupportedSplitters is a list of supported object splitters including:
|
|
//
|
|
// NEVER - prevents objects from ever splitting
|
|
// FIXED - always splits large objects exactly at the maximum block size boundary
|
|
// DYNAMIC - dynamicaly splits large objects based on rolling hash of contents.
|
|
var SupportedSplitters []string
|
|
|
|
var splitterFactories = map[string]func(*config.RepositoryObjectFormat) objectSplitter{
|
|
"NEVER": func(f *config.RepositoryObjectFormat) objectSplitter {
|
|
return newNeverSplitter()
|
|
},
|
|
"FIXED": func(f *config.RepositoryObjectFormat) objectSplitter {
|
|
return newFixedSplitter(int(f.MaxBlockSize))
|
|
},
|
|
"DYNAMIC": func(f *config.RepositoryObjectFormat) objectSplitter {
|
|
return newRollingHashSplitter(buzhash.NewBuzHash(32), f.MinBlockSize, f.AvgBlockSize, f.MaxBlockSize)
|
|
},
|
|
}
|
|
|
|
func init() {
|
|
for k := range splitterFactories {
|
|
SupportedSplitters = append(SupportedSplitters, k)
|
|
}
|
|
sort.Strings(SupportedSplitters)
|
|
}
|
|
|
|
// DefaultSplitter is the name of the splitter used by default for new repositories.
|
|
const DefaultSplitter = "DYNAMIC"
|
|
|
|
type neverSplitter struct{}
|
|
|
|
func (s *neverSplitter) add(b byte) bool {
|
|
return false
|
|
}
|
|
|
|
func newNeverSplitter() objectSplitter {
|
|
return &neverSplitter{}
|
|
}
|
|
|
|
type fixedSplitter struct {
|
|
cur int
|
|
chunkLength int
|
|
}
|
|
|
|
func (s *fixedSplitter) add(b byte) bool {
|
|
s.cur++
|
|
if s.cur >= s.chunkLength {
|
|
s.cur = 0
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func newFixedSplitter(chunkLength int) objectSplitter {
|
|
return &fixedSplitter{chunkLength: chunkLength}
|
|
}
|
|
|
|
type rollingHash interface {
|
|
HashByte(b byte) uint32
|
|
}
|
|
|
|
type rollingHashSplitter struct {
|
|
rh rollingHash
|
|
mask uint32
|
|
|
|
currentBlockSize int
|
|
minBlockSize int
|
|
maxBlockSize int
|
|
}
|
|
|
|
func (rs *rollingHashSplitter) add(b byte) bool {
|
|
sum := rs.rh.HashByte(b)
|
|
rs.currentBlockSize++
|
|
if rs.currentBlockSize < rs.minBlockSize {
|
|
return false
|
|
}
|
|
if rs.currentBlockSize >= rs.maxBlockSize {
|
|
rs.currentBlockSize = 0
|
|
return true
|
|
}
|
|
if sum&rs.mask == 0 {
|
|
rs.currentBlockSize = 0
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func newRollingHashSplitter(rh rollingHash, minBlockSize int, approxBlockSize int, maxBlockSize int) objectSplitter {
|
|
bits := rollingHashBits(approxBlockSize)
|
|
mask := ^(^uint32(0) << bits)
|
|
return &rollingHashSplitter{rh, mask, 0, minBlockSize, maxBlockSize}
|
|
}
|
|
|
|
func rollingHashBits(n int) uint {
|
|
e := math.Log2(float64(n))
|
|
exp := math.Floor(e + 0.5)
|
|
return uint(exp)
|
|
}
|