Files
kopia/cli/command_benchmark_splitters.go
Jarek Kowalski faf280616a Splitter throughput improvements (#606)
* object: refactored writer to detect split points before writing

This introduces new primitive that will be moved into splitters
themselves in subsequent commits. I'm doing this in small steps to
ensure we don't regress at any time.

* splitter: refactored TestSplitters test

This is use slow (byte-by-byte) and fast (nextSplitPoint) methods of
determining split points.

Note nextSplitPoint is not implemented by splitters yet, but this
verifies that the test is expecting the right thing.

* object: splitter refactoring - replaced ShouldSplit() with NextSplitPoint() everywhere, still not optimized

* splitter: added additional dimension to splitter_test

We split either in large chunks or one byte at a time to catch
the corner cases in the splitter implementation.

* splitter: optimized splitters using NextSplitPoint primitive

This improves splitter performance by about 40% (buzhash) and makes
it virtually free for FIXED splitter.
2020-09-11 19:45:48 -07:00

122 lines
3.0 KiB
Go

package cli
import (
"math/rand"
"sort"
"time"
kingpin "gopkg.in/alecthomas/kingpin.v2"
"github.com/kopia/kopia/internal/clock"
"github.com/kopia/kopia/repo/splitter"
)
var (
benchmarkSplitterCommand = benchmarkCommands.Command("splitter", "Run splitter benchmarks")
benchmarkSplitterRandSeed = benchmarkSplitterCommand.Flag("rand-seed", "Random seed").Default("42").Int64()
benchmarkSplitterBlockSize = benchmarkSplitterCommand.Flag("data-size", "Size of a data to split").Default("32MB").Bytes()
benchmarkSplitterBlockCount = benchmarkSplitterCommand.Flag("block-count", "Number of data blocks to split").Default("16").Int()
)
func runBenchmarkSplitterAction(ctx *kingpin.ParseContext) error {
type benchResult struct {
splitter string
duration time.Duration
segmentCount int
min int
p10 int
p25 int
p50 int
p75 int
p90 int
max int
}
var results []benchResult
// generate data blocks
var dataBlocks [][]byte
rnd := rand.New(rand.NewSource(*benchmarkSplitterRandSeed)) //nolint:gosec
for i := 0; i < *benchmarkSplitterBlockCount; i++ {
b := make([]byte, *benchmarkSplitterBlockSize)
if _, err := rnd.Read(b); err != nil {
return err
}
dataBlocks = append(dataBlocks, b)
}
printStderr("splitting %v blocks of %v each\n", *benchmarkSplitterBlockCount, *benchmarkSplitterBlockSize)
for _, sp := range splitter.SupportedAlgorithms() {
fact := splitter.GetFactory(sp)
var segmentLengths []int
t0 := clock.Now()
for _, data := range dataBlocks {
s := fact()
d := data
for len(d) > 0 {
n := s.NextSplitPoint(d)
if n < 0 {
segmentLengths = append(segmentLengths, len(d))
break
}
segmentLengths = append(segmentLengths, n)
d = d[n:]
}
}
dur := clock.Since(t0)
sort.Ints(segmentLengths)
r := benchResult{
sp,
dur,
len(segmentLengths),
segmentLengths[0],
segmentLengths[len(segmentLengths)*10/100],
segmentLengths[len(segmentLengths)*25/100],
segmentLengths[len(segmentLengths)*50/100],
segmentLengths[len(segmentLengths)*75/100],
segmentLengths[len(segmentLengths)*90/100],
segmentLengths[len(segmentLengths)-1],
}
printStdout("%-25v %6v ms count:%v min:%v 10th:%v 25th:%v 50th:%v 75th:%v 90th:%v max:%v\n",
r.splitter,
r.duration.Nanoseconds()/1e6,
r.segmentCount,
r.min, r.p10, r.p25, r.p50, r.p75, r.p90, r.max)
results = append(results, r)
}
sort.Slice(results, func(i, j int) bool {
return results[i].duration < results[j].duration
})
printStdout("-----------------------------------------------------------------\n")
for ndx, r := range results {
printStdout("%3v. %-25v %6v ms count:%v min:%v 10th:%v 25th:%v 50th:%v 75th:%v 90th:%v max:%v\n",
ndx,
r.splitter,
r.duration.Nanoseconds()/1e6,
r.segmentCount,
r.min, r.p10, r.p25, r.p50, r.p75, r.p90, r.max)
}
return nil
}
func init() {
benchmarkSplitterCommand.Action(runBenchmarkSplitterAction)
}