mirror of
https://github.com/kopia/kopia.git
synced 2026-01-23 05:47:57 -05:00
The splitter in question was depending on github.com/silvasur/buzhash which is not licensed according to FOSSA bot Switched to new faster implementation of buzhash, which is unfortunately incompatible and will split the objects in different places. This change is be semi-breaking - old repositories can be read, but when uploading large objects they will be re-uploaded where previously they would be de-duped. Also added 'benchmark splitters' subcommand and moved 'block cryptobenchmark' subcommand to 'benchmark crypto'.
111 lines
2.8 KiB
Go
111 lines
2.8 KiB
Go
package cli
|
|
|
|
import (
|
|
"math/rand"
|
|
"sort"
|
|
"time"
|
|
|
|
"github.com/kopia/kopia/repo/object"
|
|
|
|
kingpin "gopkg.in/alecthomas/kingpin.v2"
|
|
)
|
|
|
|
var (
|
|
benchmarkSplitterCommand = benchmarkCommands.Command("splitter", "Run splitter benchmarks")
|
|
benchmarkSplitterRandSeed = benchmarkSplitterCommand.Flag("rand-seed", "Random seed").Default("42").Int64()
|
|
benchmarkSplitterBlockSize = benchmarkSplitterCommand.Flag("data-size", "Size of a data to split").Default("32MB").Bytes()
|
|
benchmarkSplitterBlockCount = benchmarkSplitterCommand.Flag("block-count", "Number of data blocks to split").Default("16").Int()
|
|
)
|
|
|
|
func runBenchmarkSplitterAction(ctx *kingpin.ParseContext) error {
|
|
type benchResult struct {
|
|
splitter string
|
|
duration time.Duration
|
|
segmentCount int
|
|
min int
|
|
p10 int
|
|
p25 int
|
|
p50 int
|
|
p75 int
|
|
p90 int
|
|
max int
|
|
}
|
|
|
|
var results []benchResult
|
|
|
|
// generate data blocks
|
|
var dataBlocks [][]byte
|
|
rnd := rand.New(rand.NewSource(*benchmarkSplitterRandSeed))
|
|
|
|
for i := 0; i < *benchmarkSplitterBlockCount; i++ {
|
|
b := make([]byte, *benchmarkSplitterBlockSize)
|
|
rnd.Read(b)
|
|
dataBlocks = append(dataBlocks, b)
|
|
}
|
|
|
|
log.Infof("splitting %v blocks of %v each", *benchmarkSplitterBlockCount, *benchmarkSplitterBlockSize)
|
|
|
|
for _, sp := range object.SupportedSplitters {
|
|
fact := object.GetSplitterFactory(sp)
|
|
var segmentLengths []int
|
|
|
|
t0 := time.Now()
|
|
for _, data := range dataBlocks {
|
|
s := fact()
|
|
l := 0
|
|
for _, d := range data {
|
|
l++
|
|
if s.ShouldSplit(d) {
|
|
segmentLengths = append(segmentLengths, l)
|
|
l = 0
|
|
}
|
|
}
|
|
if l > 0 {
|
|
segmentLengths = append(segmentLengths, l)
|
|
}
|
|
}
|
|
dur := time.Since(t0)
|
|
sort.Ints(segmentLengths)
|
|
|
|
r := benchResult{
|
|
sp,
|
|
dur,
|
|
len(segmentLengths),
|
|
segmentLengths[0],
|
|
segmentLengths[len(segmentLengths)*10/100],
|
|
segmentLengths[len(segmentLengths)*25/100],
|
|
segmentLengths[len(segmentLengths)*50/100],
|
|
segmentLengths[len(segmentLengths)*75/100],
|
|
segmentLengths[len(segmentLengths)*90/100],
|
|
segmentLengths[len(segmentLengths)-1],
|
|
}
|
|
|
|
printStdout("%-25v %6v ms count:%v min:%v 10th:%v 25th:%v 50th:%v 75th:%v 90th:%v max:%v\n",
|
|
r.splitter,
|
|
r.duration.Nanoseconds()/1e6,
|
|
r.segmentCount,
|
|
r.min, r.p10, r.p25, r.p50, r.p75, r.p90, r.max)
|
|
|
|
results = append(results, r)
|
|
}
|
|
|
|
sort.Slice(results, func(i, j int) bool {
|
|
return results[i].duration < results[j].duration
|
|
})
|
|
printStdout("-----------------------------------------------------------------\n")
|
|
for ndx, r := range results {
|
|
printStdout("%3v. %-25v %6v ms count:%v min:%v 10th:%v 25th:%v 50th:%v 75th:%v 90th:%v max:%v\n",
|
|
ndx,
|
|
r.splitter,
|
|
r.duration.Nanoseconds()/1e6,
|
|
r.segmentCount,
|
|
r.min, r.p10, r.p25, r.p50, r.p75, r.p90, r.max)
|
|
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func init() {
|
|
benchmarkSplitterCommand.Action(runBenchmarkSplitterAction)
|
|
}
|