Files
kopia/cli/command_benchmark_splitters.go
Jarek Kowalski 03339c18af [breaking change] deprecated DYNAMIC splitter due to license issue
The splitter in question was depending on
github.com/silvasur/buzhash which is not licensed according to FOSSA bot

Switched to new faster implementation of buzhash, which is
unfortunately incompatible and will split the objects in different
places.

This change is be semi-breaking - old repositories can be read, but
when uploading large objects they will be re-uploaded where previously
they would be de-duped.

Also added 'benchmark splitters' subcommand and moved 'block cryptobenchmark'
subcommand to 'benchmark crypto'.
2019-05-30 22:20:45 -07:00

111 lines
2.8 KiB
Go

package cli
import (
"math/rand"
"sort"
"time"
"github.com/kopia/kopia/repo/object"
kingpin "gopkg.in/alecthomas/kingpin.v2"
)
var (
benchmarkSplitterCommand = benchmarkCommands.Command("splitter", "Run splitter benchmarks")
benchmarkSplitterRandSeed = benchmarkSplitterCommand.Flag("rand-seed", "Random seed").Default("42").Int64()
benchmarkSplitterBlockSize = benchmarkSplitterCommand.Flag("data-size", "Size of a data to split").Default("32MB").Bytes()
benchmarkSplitterBlockCount = benchmarkSplitterCommand.Flag("block-count", "Number of data blocks to split").Default("16").Int()
)
func runBenchmarkSplitterAction(ctx *kingpin.ParseContext) error {
type benchResult struct {
splitter string
duration time.Duration
segmentCount int
min int
p10 int
p25 int
p50 int
p75 int
p90 int
max int
}
var results []benchResult
// generate data blocks
var dataBlocks [][]byte
rnd := rand.New(rand.NewSource(*benchmarkSplitterRandSeed))
for i := 0; i < *benchmarkSplitterBlockCount; i++ {
b := make([]byte, *benchmarkSplitterBlockSize)
rnd.Read(b)
dataBlocks = append(dataBlocks, b)
}
log.Infof("splitting %v blocks of %v each", *benchmarkSplitterBlockCount, *benchmarkSplitterBlockSize)
for _, sp := range object.SupportedSplitters {
fact := object.GetSplitterFactory(sp)
var segmentLengths []int
t0 := time.Now()
for _, data := range dataBlocks {
s := fact()
l := 0
for _, d := range data {
l++
if s.ShouldSplit(d) {
segmentLengths = append(segmentLengths, l)
l = 0
}
}
if l > 0 {
segmentLengths = append(segmentLengths, l)
}
}
dur := time.Since(t0)
sort.Ints(segmentLengths)
r := benchResult{
sp,
dur,
len(segmentLengths),
segmentLengths[0],
segmentLengths[len(segmentLengths)*10/100],
segmentLengths[len(segmentLengths)*25/100],
segmentLengths[len(segmentLengths)*50/100],
segmentLengths[len(segmentLengths)*75/100],
segmentLengths[len(segmentLengths)*90/100],
segmentLengths[len(segmentLengths)-1],
}
printStdout("%-25v %6v ms count:%v min:%v 10th:%v 25th:%v 50th:%v 75th:%v 90th:%v max:%v\n",
r.splitter,
r.duration.Nanoseconds()/1e6,
r.segmentCount,
r.min, r.p10, r.p25, r.p50, r.p75, r.p90, r.max)
results = append(results, r)
}
sort.Slice(results, func(i, j int) bool {
return results[i].duration < results[j].duration
})
printStdout("-----------------------------------------------------------------\n")
for ndx, r := range results {
printStdout("%3v. %-25v %6v ms count:%v min:%v 10th:%v 25th:%v 50th:%v 75th:%v 90th:%v max:%v\n",
ndx,
r.splitter,
r.duration.Nanoseconds()/1e6,
r.segmentCount,
r.min, r.p10, r.p25, r.p50, r.p75, r.p90, r.max)
}
return nil
}
func init() {
benchmarkSplitterCommand.Action(runBenchmarkSplitterAction)
}