From 2ba4e83cef7d08448c0d487b488d74a8cd45ab7d Mon Sep 17 00:00:00 2001 From: Jarek Kowalski Date: Tue, 10 Dec 2019 23:06:59 -0800 Subject: [PATCH] moved all compression to separate package and sanitized identifiers --- cli/command_benchmark_compression.go | 6 +-- cli/command_policy_set.go | 6 +-- repo/compression/compression_ids.go | 25 +++++++++++ repo/compression/compressor.go | 44 +++++++++++++++++++ .../compressor_gzip.go | 14 +++--- .../compressor_pgzip.go | 14 +++--- repo/{object => compression}/compressor_s2.go | 16 +++---- .../compressor_test.go | 6 +-- .../compressor_zstd.go | 16 +++---- repo/object/compressor.go | 41 ----------------- repo/object/object_manager.go | 7 +-- repo/object/object_manager_test.go | 3 +- repo/object/object_writer.go | 5 ++- snapshot/policy/compression_policy.go | 14 +++--- 14 files changed, 124 insertions(+), 93 deletions(-) create mode 100644 repo/compression/compression_ids.go create mode 100644 repo/compression/compressor.go rename repo/{object => compression}/compressor_gzip.go (74%) rename repo/{object => compression}/compressor_pgzip.go (77%) rename repo/{object => compression}/compressor_s2.go (68%) rename repo/{object => compression}/compressor_test.go (94%) rename repo/{object => compression}/compressor_zstd.go (73%) delete mode 100644 repo/object/compressor.go diff --git a/cli/command_benchmark_compression.go b/cli/command_benchmark_compression.go index 7d5303c99..026d73330 100644 --- a/cli/command_benchmark_compression.go +++ b/cli/command_benchmark_compression.go @@ -7,7 +7,7 @@ "time" "github.com/kopia/kopia/internal/units" - "github.com/kopia/kopia/repo/object" + "github.com/kopia/kopia/repo/compression" kingpin "gopkg.in/alecthomas/kingpin.v2" ) @@ -23,7 +23,7 @@ func runBenchmarkCompressionAction(ctx *kingpin.ParseContext) error { type benchResult struct { - compression object.CompressorName + compression compression.Name throughput float64 compressedSize int64 } @@ -41,7 +41,7 @@ type benchResult struct { data = d } - for name, comp := range object.CompressorsByName { + for name, comp := range compression.ByName { log.Infof("Benchmarking compressor '%v' (%v x %v bytes)", name, *benchmarkCompressionRepeat, len(data)) t0 := time.Now() diff --git a/cli/command_policy_set.go b/cli/command_policy_set.go index b4b91c033..e1fe4c627 100644 --- a/cli/command_policy_set.go +++ b/cli/command_policy_set.go @@ -9,7 +9,7 @@ "github.com/pkg/errors" "github.com/kopia/kopia/repo" - "github.com/kopia/kopia/repo/object" + "github.com/kopia/kopia/repo/compression" "github.com/kopia/kopia/snapshot/policy" ) @@ -234,7 +234,7 @@ func setCompressionPolicyFromFlags(p *policy.CompressionPolicy, changeCount *int } else { printStderr(" - setting compression algorithm to %v\n", v) - p.CompressorName = object.CompressorName(v) + p.CompressorName = compression.Name(v) } } @@ -349,7 +349,7 @@ func applyPolicyNumber64(desc string, val *int64, str string, changeCount *int) func supportedCompressionAlgorithms() []string { var res []string - for name := range object.CompressorsByName { + for name := range compression.ByName { res = append(res, string(name)) } diff --git a/repo/compression/compression_ids.go b/repo/compression/compression_ids.go new file mode 100644 index 000000000..07aadf687 --- /dev/null +++ b/repo/compression/compression_ids.go @@ -0,0 +1,25 @@ +package compression + +// HeaderID is a unique identifier of the compressor stored in the compressed block header. +type HeaderID uint32 + +// defined header IDs +const ( + headerGzipDefault HeaderID = 0x1000 + headerGzipBestSpeed HeaderID = 0x1001 + headerGzipBestCompression HeaderID = 0x1002 + + headerZstdDefault HeaderID = 0x1100 + headerZstdFastest HeaderID = 0x1101 + headerZstdBetterCompression HeaderID = 0x1102 + headerZstdBestCompression HeaderID = 0x1103 + + headerS2Default HeaderID = 0x1200 + headerS2Better HeaderID = 0x1201 + headerS2Parallel4 HeaderID = 0x1202 + headerS2Parallel8 HeaderID = 0x1203 + + headerPgzipDefault HeaderID = 0x1300 + headerPgzipBestSpeed HeaderID = 0x1301 + headerPgzipBestCompression HeaderID = 0x1302 +) diff --git a/repo/compression/compressor.go b/repo/compression/compressor.go new file mode 100644 index 000000000..6f4ab9cec --- /dev/null +++ b/repo/compression/compressor.go @@ -0,0 +1,44 @@ +// Package compression manages compression algorithm implementations. +package compression + +import ( + "encoding/binary" + "fmt" +) + +// Name is the name of the compressor to use. +type Name string + +// Compressor implements compression and decompression of a byte slice. +type Compressor interface { + HeaderID() HeaderID + Compress(b []byte) ([]byte, error) + Decompress(b []byte) ([]byte, error) +} + +// maps of registered compressors by header ID and name. +var ( + ByHeaderID = map[HeaderID]Compressor{} + ByName = map[Name]Compressor{} +) + +// RegisterCompressor registers the provided compressor implementation +func RegisterCompressor(name Name, c Compressor) { + if ByHeaderID[c.HeaderID()] != nil { + panic(fmt.Sprintf("compressor with HeaderID %x already registered", c.HeaderID())) + } + + if ByName[name] != nil { + panic(fmt.Sprintf("compressor with name %q already registered", name)) + } + + ByHeaderID[c.HeaderID()] = c + ByName[name] = c +} + +func compressionHeader(id HeaderID) []byte { + b := make([]byte, 4) + binary.BigEndian.PutUint32(b, uint32(id)) + + return b +} diff --git a/repo/object/compressor_gzip.go b/repo/compression/compressor_gzip.go similarity index 74% rename from repo/object/compressor_gzip.go rename to repo/compression/compressor_gzip.go index a2adaf956..dc3b5d7e7 100644 --- a/repo/object/compressor_gzip.go +++ b/repo/compression/compressor_gzip.go @@ -1,4 +1,4 @@ -package object +package compression import ( "bytes" @@ -9,22 +9,22 @@ ) func init() { - RegisterCompressor("gzip", newGZipCompressor(0x1000, gzip.DefaultCompression)) - RegisterCompressor("gzip-best-speed", newGZipCompressor(0x1001, gzip.BestSpeed)) - RegisterCompressor("gzip-best-compression", newGZipCompressor(0x1002, gzip.BestCompression)) + RegisterCompressor("gzip", newGZipCompressor(headerGzipDefault, gzip.DefaultCompression)) + RegisterCompressor("gzip-best-speed", newGZipCompressor(headerGzipBestSpeed, gzip.BestSpeed)) + RegisterCompressor("gzip-best-compression", newGZipCompressor(headerGzipBestCompression, gzip.BestCompression)) } -func newGZipCompressor(id uint32, level int) Compressor { +func newGZipCompressor(id HeaderID, level int) Compressor { return &gzipCompressor{id, compressionHeader(id), level} } type gzipCompressor struct { - id uint32 + id HeaderID header []byte level int } -func (c *gzipCompressor) ID() uint32 { +func (c *gzipCompressor) HeaderID() HeaderID { return c.id } diff --git a/repo/object/compressor_pgzip.go b/repo/compression/compressor_pgzip.go similarity index 77% rename from repo/object/compressor_pgzip.go rename to repo/compression/compressor_pgzip.go index 2f3fce25f..471e2eb6e 100644 --- a/repo/object/compressor_pgzip.go +++ b/repo/compression/compressor_pgzip.go @@ -1,4 +1,4 @@ -package object +package compression import ( "bytes" @@ -9,22 +9,22 @@ ) func init() { - RegisterCompressor("pgzip", newpgzipCompressor(0x1300, pgzip.DefaultCompression)) - RegisterCompressor("pgzip-best-speed", newpgzipCompressor(0x1301, pgzip.BestSpeed)) - RegisterCompressor("pgzip-best-compression", newpgzipCompressor(0x1302, pgzip.BestCompression)) + RegisterCompressor("pgzip", newpgzipCompressor(headerPgzipDefault, pgzip.DefaultCompression)) + RegisterCompressor("pgzip-best-speed", newpgzipCompressor(headerPgzipBestSpeed, pgzip.BestSpeed)) + RegisterCompressor("pgzip-best-compression", newpgzipCompressor(headerPgzipBestCompression, pgzip.BestCompression)) } -func newpgzipCompressor(id uint32, level int) Compressor { +func newpgzipCompressor(id HeaderID, level int) Compressor { return &pgzipCompressor{id, compressionHeader(id), level} } type pgzipCompressor struct { - id uint32 + id HeaderID header []byte level int } -func (c *pgzipCompressor) ID() uint32 { +func (c *pgzipCompressor) HeaderID() HeaderID { return c.id } diff --git a/repo/object/compressor_s2.go b/repo/compression/compressor_s2.go similarity index 68% rename from repo/object/compressor_s2.go rename to repo/compression/compressor_s2.go index da8f7e905..839d7bea6 100644 --- a/repo/object/compressor_s2.go +++ b/repo/compression/compressor_s2.go @@ -1,4 +1,4 @@ -package object +package compression import ( "bytes" @@ -9,23 +9,23 @@ ) func init() { - RegisterCompressor("s2-default", newS2Compressor(0x1200)) - RegisterCompressor("s2-better", newS2Compressor(0x1201, s2.WriterBetterCompression())) - RegisterCompressor("s2-parallel-4", newS2Compressor(0x1202, s2.WriterConcurrency(4))) - RegisterCompressor("s2-parallel-8", newS2Compressor(0x1203, s2.WriterConcurrency(8))) + RegisterCompressor("s2-default", newS2Compressor(headerS2Default)) + RegisterCompressor("s2-better", newS2Compressor(headerS2Better, s2.WriterBetterCompression())) + RegisterCompressor("s2-parallel-4", newS2Compressor(headerS2Parallel4, s2.WriterConcurrency(4))) + RegisterCompressor("s2-parallel-8", newS2Compressor(headerS2Parallel8, s2.WriterConcurrency(8))) } -func newS2Compressor(id uint32, opts ...s2.WriterOption) Compressor { +func newS2Compressor(id HeaderID, opts ...s2.WriterOption) Compressor { return &s2Compressor{id, compressionHeader(id), opts} } type s2Compressor struct { - id uint32 + id HeaderID header []byte opts []s2.WriterOption } -func (c *s2Compressor) ID() uint32 { +func (c *s2Compressor) HeaderID() HeaderID { return c.id } diff --git a/repo/object/compressor_test.go b/repo/compression/compressor_test.go similarity index 94% rename from repo/object/compressor_test.go rename to repo/compression/compressor_test.go index e5cc934dc..a99195e3e 100644 --- a/repo/object/compressor_test.go +++ b/repo/compression/compressor_test.go @@ -1,4 +1,4 @@ -package object +package compression import ( "bytes" @@ -8,7 +8,7 @@ ) func TestCompressor(t *testing.T) { - for id, comp := range Compressors { + for id, comp := range ByHeaderID { id, comp := id, comp t.Run(fmt.Sprintf("compressible-data-%x", id), func(t *testing.T) { @@ -25,7 +25,7 @@ func TestCompressor(t *testing.T) { t.Errorf("compression not effective for all-zero data") } - for id2, comp2 := range Compressors { + for id2, comp2 := range ByHeaderID { if id != id2 { if _, err2 := comp2.Decompress(cData); err2 == nil { t.Errorf("compressor %x was able to decompress results of %x", id2, id) diff --git a/repo/object/compressor_zstd.go b/repo/compression/compressor_zstd.go similarity index 73% rename from repo/object/compressor_zstd.go rename to repo/compression/compressor_zstd.go index 1a4c188a7..077886e25 100644 --- a/repo/object/compressor_zstd.go +++ b/repo/compression/compressor_zstd.go @@ -1,4 +1,4 @@ -package object +package compression import ( "bytes" @@ -9,23 +9,23 @@ ) func init() { - RegisterCompressor("zstd", newZstdCompressor(0x1100, zstd.SpeedDefault)) - RegisterCompressor("zstd-fastest", newZstdCompressor(0x1101, zstd.SpeedFastest)) - RegisterCompressor("zstd-better-compression", newZstdCompressor(0x1102, zstd.SpeedBetterCompression)) - RegisterCompressor("zstd-best-compression", newZstdCompressor(0x1103, zstd.SpeedBestCompression)) + RegisterCompressor("zstd", newZstdCompressor(headerZstdDefault, zstd.SpeedDefault)) + RegisterCompressor("zstd-fastest", newZstdCompressor(headerZstdFastest, zstd.SpeedFastest)) + RegisterCompressor("zstd-better-compression", newZstdCompressor(headerZstdBetterCompression, zstd.SpeedBetterCompression)) + RegisterCompressor("zstd-best-compression", newZstdCompressor(headerZstdBestCompression, zstd.SpeedBestCompression)) } -func newZstdCompressor(id uint32, level zstd.EncoderLevel) Compressor { +func newZstdCompressor(id HeaderID, level zstd.EncoderLevel) Compressor { return &zstdCompressor{id, compressionHeader(id), level} } type zstdCompressor struct { - id uint32 + id HeaderID header []byte level zstd.EncoderLevel } -func (c *zstdCompressor) ID() uint32 { +func (c *zstdCompressor) HeaderID() HeaderID { return c.id } diff --git a/repo/object/compressor.go b/repo/object/compressor.go deleted file mode 100644 index 2452e60b1..000000000 --- a/repo/object/compressor.go +++ /dev/null @@ -1,41 +0,0 @@ -package object - -import ( - "encoding/binary" - "fmt" -) - -// CompressorName is the name of the compressor to use. -type CompressorName string - -type Compressor interface { - ID() uint32 - Compress(b []byte) ([]byte, error) - Decompress(b []byte) ([]byte, error) -} - -var ( - Compressors = map[uint32]Compressor{} - CompressorsByName = map[CompressorName]Compressor{} -) - -// RegisterCompressor registers the provided compressor implementation -func RegisterCompressor(name CompressorName, c Compressor) { - if Compressors[c.ID()] != nil { - panic(fmt.Sprintf("compressor with ID %x already registered", c.ID())) - } - - if CompressorsByName[name] != nil { - panic(fmt.Sprintf("compressor with name %q already registered", name)) - } - - Compressors[c.ID()] = c - CompressorsByName[name] = c -} - -func compressionHeader(id uint32) []byte { - b := make([]byte, 4) - binary.BigEndian.PutUint32(b, id) - - return b -} diff --git a/repo/object/object_manager.go b/repo/object/object_manager.go index 8c9812608..77a958259 100644 --- a/repo/object/object_manager.go +++ b/repo/object/object_manager.go @@ -10,6 +10,7 @@ "github.com/pkg/errors" + "github.com/kopia/kopia/repo/compression" "github.com/kopia/kopia/repo/content" ) @@ -53,7 +54,7 @@ func (om *Manager) NewWriter(ctx context.Context, opt WriterOptions) Writer { splitter: om.newSplitter(), description: opt.Description, prefix: opt.Prefix, - compressor: CompressorsByName[opt.Compressor], + compressor: compression.ByName[opt.Compressor], } } @@ -241,9 +242,9 @@ func (om *Manager) decompress(b []byte) ([]byte, error) { return nil, errors.Errorf("invalid compression header") } - compressorID := binary.BigEndian.Uint32(b[0:4]) + compressorID := compression.HeaderID(binary.BigEndian.Uint32(b[0:4])) - compressor := Compressors[compressorID] + compressor := compression.ByHeaderID[compressorID] if compressor == nil { return nil, errors.Errorf("unsupported compressor %x", compressorID) } diff --git a/repo/object/object_manager_test.go b/repo/object/object_manager_test.go index a12b26972..f2b0c74c1 100644 --- a/repo/object/object_manager_test.go +++ b/repo/object/object_manager_test.go @@ -16,6 +16,7 @@ "testing" "github.com/kopia/kopia/repo/blob" + "github.com/kopia/kopia/repo/compression" "github.com/kopia/kopia/repo/content" ) @@ -329,7 +330,7 @@ func TestEndToEndReadAndSeekWithCompression(t *testing.T) { ctx := context.Background() _, om := setupTest(t) - for compressorName := range CompressorsByName { + for compressorName := range compression.ByName { for _, size := range []int{1, 199, 200, 201, 9999, 512434} { // Create some random data sample of the specified size. randomData := make([]byte, size) diff --git a/repo/object/object_writer.go b/repo/object/object_writer.go index 68c1c207b..edb4c678a 100644 --- a/repo/object/object_writer.go +++ b/repo/object/object_writer.go @@ -9,6 +9,7 @@ "github.com/pkg/errors" + "github.com/kopia/kopia/repo/compression" "github.com/kopia/kopia/repo/content" ) @@ -52,7 +53,7 @@ type objectWriter struct { ctx context.Context repo *Manager - compressor Compressor + compressor compression.Compressor prefix content.ID buffer bytes.Buffer @@ -179,5 +180,5 @@ func (w *objectWriter) Result() (ID, error) { type WriterOptions struct { Description string Prefix content.ID // empty string or a single-character ('g'..'z') - Compressor CompressorName + Compressor compression.Name } diff --git a/snapshot/policy/compression_policy.go b/snapshot/policy/compression_policy.go index 045b41415..d7a37b51f 100644 --- a/snapshot/policy/compression_policy.go +++ b/snapshot/policy/compression_policy.go @@ -5,19 +5,19 @@ "sort" "github.com/kopia/kopia/fs" - "github.com/kopia/kopia/repo/object" + "github.com/kopia/kopia/repo/compression" ) // CompressionPolicy specifies compression policy. type CompressionPolicy struct { - CompressorName object.CompressorName `json:"compressorName,omitempty"` - OnlyCompress []string `json:"onlyCompress,omitempty"` - NeverCompress []string `json:"neverCompress,omitempty"` - MinSize int64 `json:"minSize,omitempty"` - MaxSize int64 `json:"maxSize,omitempty"` + CompressorName compression.Name `json:"compressorName,omitempty"` + OnlyCompress []string `json:"onlyCompress,omitempty"` + NeverCompress []string `json:"neverCompress,omitempty"` + MinSize int64 `json:"minSize,omitempty"` + MaxSize int64 `json:"maxSize,omitempty"` } -func (p *CompressionPolicy) CompressorForFile(e fs.File) object.CompressorName { +func (p *CompressionPolicy) CompressorForFile(e fs.File) compression.Name { ext := filepath.Ext(e.Name()) size := e.Size()