From b1020941ddebb0566fd71eb6260569fbf9bc3841 Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Fri, 13 Mar 2020 21:27:29 -0700 Subject: [PATCH] splitter: exposed MaxSegmentSize() for splitters --- repo/splitter/splitter.go | 1 + repo/splitter/splitter_buzhash32.go | 4 ++++ repo/splitter/splitter_fixed.go | 4 ++++ repo/splitter/splitter_rabinkarp64.go | 4 ++++ repo/splitter/splitter_test.go | 4 ++++ 5 files changed, 17 insertions(+) diff --git a/repo/splitter/splitter.go b/repo/splitter/splitter.go index 673db9edc..657c931c8 100644 --- a/repo/splitter/splitter.go +++ b/repo/splitter/splitter.go @@ -13,6 +13,7 @@ // It must return true if the object should be split after byte b is processed. type Splitter interface { ShouldSplit(b byte) bool + MaxSegmentSize() int Reset() Close() } diff --git a/repo/splitter/splitter_buzhash32.go b/repo/splitter/splitter_buzhash32.go index 0d55c9928..6ad2fddf6 100644 --- a/repo/splitter/splitter_buzhash32.go +++ b/repo/splitter/splitter_buzhash32.go @@ -39,6 +39,10 @@ func (rs *buzhash32Splitter) ShouldSplit(b byte) bool { return false } +func (rs *buzhash32Splitter) MaxSegmentSize() int { + return rs.maxSize +} + func newBuzHash32SplitterFactory(avgSize int) Factory { // avgSize must be a power of two, so 0b000001000...0000 // it just so happens that mask is avgSize-1 :) diff --git a/repo/splitter/splitter_fixed.go b/repo/splitter/splitter_fixed.go index 0b503d180..dc795a5d5 100644 --- a/repo/splitter/splitter_fixed.go +++ b/repo/splitter/splitter_fixed.go @@ -23,6 +23,10 @@ func (s *fixedSplitter) ShouldSplit(b byte) bool { return false } +func (s *fixedSplitter) MaxSegmentSize() int { + return s.chunkLength +} + // Fixed returns a factory that creates splitters with fixed chunk length. func Fixed(length int) Factory { return func() Splitter { diff --git a/repo/splitter/splitter_rabinkarp64.go b/repo/splitter/splitter_rabinkarp64.go index 077efbc44..a50147904 100644 --- a/repo/splitter/splitter_rabinkarp64.go +++ b/repo/splitter/splitter_rabinkarp64.go @@ -39,6 +39,10 @@ func (rs *rabinKarp64Splitter) ShouldSplit(b byte) bool { return false } +func (rs *rabinKarp64Splitter) MaxSegmentSize() int { + return rs.maxSize +} + func newRabinKarp64SplitterFactory(avgSize int) Factory { mask := uint64(avgSize - 1) minSize, maxSize := avgSize/2, avgSize*2 //nolint:gomnd diff --git a/repo/splitter/splitter_test.go b/repo/splitter/splitter_test.go index 65bf94505..79c00cdef 100644 --- a/repo/splitter/splitter_test.go +++ b/repo/splitter/splitter_test.go @@ -69,6 +69,10 @@ func TestSplitterStability(t *testing.T) { minSplit := int(math.MaxInt32) count := 0 + if got, want := s.MaxSegmentSize(), tc.maxSplit; got != want { + t.Errorf("unexpected max segment size: %v, want %v", got, want) + } + for i, p := range rnd { if !s.ShouldSplit(p) { continue