moved all compression to separate package and sanitized identifiers

This commit is contained in:
Jarek Kowalski
2019-12-10 23:06:59 -08:00
parent 8ab93e0e2f
commit 2ba4e83cef
14 changed files with 124 additions and 93 deletions

View File

@@ -7,7 +7,7 @@
"time"
"github.com/kopia/kopia/internal/units"
"github.com/kopia/kopia/repo/object"
"github.com/kopia/kopia/repo/compression"
kingpin "gopkg.in/alecthomas/kingpin.v2"
)
@@ -23,7 +23,7 @@
func runBenchmarkCompressionAction(ctx *kingpin.ParseContext) error {
type benchResult struct {
compression object.CompressorName
compression compression.Name
throughput float64
compressedSize int64
}
@@ -41,7 +41,7 @@ type benchResult struct {
data = d
}
for name, comp := range object.CompressorsByName {
for name, comp := range compression.ByName {
log.Infof("Benchmarking compressor '%v' (%v x %v bytes)", name, *benchmarkCompressionRepeat, len(data))
t0 := time.Now()

View File

@@ -9,7 +9,7 @@
"github.com/pkg/errors"
"github.com/kopia/kopia/repo"
"github.com/kopia/kopia/repo/object"
"github.com/kopia/kopia/repo/compression"
"github.com/kopia/kopia/snapshot/policy"
)
@@ -234,7 +234,7 @@ func setCompressionPolicyFromFlags(p *policy.CompressionPolicy, changeCount *int
} else {
printStderr(" - setting compression algorithm to %v\n", v)
p.CompressorName = object.CompressorName(v)
p.CompressorName = compression.Name(v)
}
}
@@ -349,7 +349,7 @@ func applyPolicyNumber64(desc string, val *int64, str string, changeCount *int)
func supportedCompressionAlgorithms() []string {
var res []string
for name := range object.CompressorsByName {
for name := range compression.ByName {
res = append(res, string(name))
}

View File

@@ -0,0 +1,25 @@
package compression
// HeaderID is a unique identifier of the compressor stored in the compressed block header.
type HeaderID uint32
// defined header IDs
const (
headerGzipDefault HeaderID = 0x1000
headerGzipBestSpeed HeaderID = 0x1001
headerGzipBestCompression HeaderID = 0x1002
headerZstdDefault HeaderID = 0x1100
headerZstdFastest HeaderID = 0x1101
headerZstdBetterCompression HeaderID = 0x1102
headerZstdBestCompression HeaderID = 0x1103
headerS2Default HeaderID = 0x1200
headerS2Better HeaderID = 0x1201
headerS2Parallel4 HeaderID = 0x1202
headerS2Parallel8 HeaderID = 0x1203
headerPgzipDefault HeaderID = 0x1300
headerPgzipBestSpeed HeaderID = 0x1301
headerPgzipBestCompression HeaderID = 0x1302
)

View File

@@ -0,0 +1,44 @@
// Package compression manages compression algorithm implementations.
package compression
import (
"encoding/binary"
"fmt"
)
// Name is the name of the compressor to use.
type Name string
// Compressor implements compression and decompression of a byte slice.
type Compressor interface {
HeaderID() HeaderID
Compress(b []byte) ([]byte, error)
Decompress(b []byte) ([]byte, error)
}
// maps of registered compressors by header ID and name.
var (
ByHeaderID = map[HeaderID]Compressor{}
ByName = map[Name]Compressor{}
)
// RegisterCompressor registers the provided compressor implementation
func RegisterCompressor(name Name, c Compressor) {
if ByHeaderID[c.HeaderID()] != nil {
panic(fmt.Sprintf("compressor with HeaderID %x already registered", c.HeaderID()))
}
if ByName[name] != nil {
panic(fmt.Sprintf("compressor with name %q already registered", name))
}
ByHeaderID[c.HeaderID()] = c
ByName[name] = c
}
func compressionHeader(id HeaderID) []byte {
b := make([]byte, 4)
binary.BigEndian.PutUint32(b, uint32(id))
return b
}

View File

@@ -1,4 +1,4 @@
package object
package compression
import (
"bytes"
@@ -9,22 +9,22 @@
)
func init() {
RegisterCompressor("gzip", newGZipCompressor(0x1000, gzip.DefaultCompression))
RegisterCompressor("gzip-best-speed", newGZipCompressor(0x1001, gzip.BestSpeed))
RegisterCompressor("gzip-best-compression", newGZipCompressor(0x1002, gzip.BestCompression))
RegisterCompressor("gzip", newGZipCompressor(headerGzipDefault, gzip.DefaultCompression))
RegisterCompressor("gzip-best-speed", newGZipCompressor(headerGzipBestSpeed, gzip.BestSpeed))
RegisterCompressor("gzip-best-compression", newGZipCompressor(headerGzipBestCompression, gzip.BestCompression))
}
func newGZipCompressor(id uint32, level int) Compressor {
func newGZipCompressor(id HeaderID, level int) Compressor {
return &gzipCompressor{id, compressionHeader(id), level}
}
type gzipCompressor struct {
id uint32
id HeaderID
header []byte
level int
}
func (c *gzipCompressor) ID() uint32 {
func (c *gzipCompressor) HeaderID() HeaderID {
return c.id
}

View File

@@ -1,4 +1,4 @@
package object
package compression
import (
"bytes"
@@ -9,22 +9,22 @@
)
func init() {
RegisterCompressor("pgzip", newpgzipCompressor(0x1300, pgzip.DefaultCompression))
RegisterCompressor("pgzip-best-speed", newpgzipCompressor(0x1301, pgzip.BestSpeed))
RegisterCompressor("pgzip-best-compression", newpgzipCompressor(0x1302, pgzip.BestCompression))
RegisterCompressor("pgzip", newpgzipCompressor(headerPgzipDefault, pgzip.DefaultCompression))
RegisterCompressor("pgzip-best-speed", newpgzipCompressor(headerPgzipBestSpeed, pgzip.BestSpeed))
RegisterCompressor("pgzip-best-compression", newpgzipCompressor(headerPgzipBestCompression, pgzip.BestCompression))
}
func newpgzipCompressor(id uint32, level int) Compressor {
func newpgzipCompressor(id HeaderID, level int) Compressor {
return &pgzipCompressor{id, compressionHeader(id), level}
}
type pgzipCompressor struct {
id uint32
id HeaderID
header []byte
level int
}
func (c *pgzipCompressor) ID() uint32 {
func (c *pgzipCompressor) HeaderID() HeaderID {
return c.id
}

View File

@@ -1,4 +1,4 @@
package object
package compression
import (
"bytes"
@@ -9,23 +9,23 @@
)
func init() {
RegisterCompressor("s2-default", newS2Compressor(0x1200))
RegisterCompressor("s2-better", newS2Compressor(0x1201, s2.WriterBetterCompression()))
RegisterCompressor("s2-parallel-4", newS2Compressor(0x1202, s2.WriterConcurrency(4)))
RegisterCompressor("s2-parallel-8", newS2Compressor(0x1203, s2.WriterConcurrency(8)))
RegisterCompressor("s2-default", newS2Compressor(headerS2Default))
RegisterCompressor("s2-better", newS2Compressor(headerS2Better, s2.WriterBetterCompression()))
RegisterCompressor("s2-parallel-4", newS2Compressor(headerS2Parallel4, s2.WriterConcurrency(4)))
RegisterCompressor("s2-parallel-8", newS2Compressor(headerS2Parallel8, s2.WriterConcurrency(8)))
}
func newS2Compressor(id uint32, opts ...s2.WriterOption) Compressor {
func newS2Compressor(id HeaderID, opts ...s2.WriterOption) Compressor {
return &s2Compressor{id, compressionHeader(id), opts}
}
type s2Compressor struct {
id uint32
id HeaderID
header []byte
opts []s2.WriterOption
}
func (c *s2Compressor) ID() uint32 {
func (c *s2Compressor) HeaderID() HeaderID {
return c.id
}

View File

@@ -1,4 +1,4 @@
package object
package compression
import (
"bytes"
@@ -8,7 +8,7 @@
)
func TestCompressor(t *testing.T) {
for id, comp := range Compressors {
for id, comp := range ByHeaderID {
id, comp := id, comp
t.Run(fmt.Sprintf("compressible-data-%x", id), func(t *testing.T) {
@@ -25,7 +25,7 @@ func TestCompressor(t *testing.T) {
t.Errorf("compression not effective for all-zero data")
}
for id2, comp2 := range Compressors {
for id2, comp2 := range ByHeaderID {
if id != id2 {
if _, err2 := comp2.Decompress(cData); err2 == nil {
t.Errorf("compressor %x was able to decompress results of %x", id2, id)

View File

@@ -1,4 +1,4 @@
package object
package compression
import (
"bytes"
@@ -9,23 +9,23 @@
)
func init() {
RegisterCompressor("zstd", newZstdCompressor(0x1100, zstd.SpeedDefault))
RegisterCompressor("zstd-fastest", newZstdCompressor(0x1101, zstd.SpeedFastest))
RegisterCompressor("zstd-better-compression", newZstdCompressor(0x1102, zstd.SpeedBetterCompression))
RegisterCompressor("zstd-best-compression", newZstdCompressor(0x1103, zstd.SpeedBestCompression))
RegisterCompressor("zstd", newZstdCompressor(headerZstdDefault, zstd.SpeedDefault))
RegisterCompressor("zstd-fastest", newZstdCompressor(headerZstdFastest, zstd.SpeedFastest))
RegisterCompressor("zstd-better-compression", newZstdCompressor(headerZstdBetterCompression, zstd.SpeedBetterCompression))
RegisterCompressor("zstd-best-compression", newZstdCompressor(headerZstdBestCompression, zstd.SpeedBestCompression))
}
func newZstdCompressor(id uint32, level zstd.EncoderLevel) Compressor {
func newZstdCompressor(id HeaderID, level zstd.EncoderLevel) Compressor {
return &zstdCompressor{id, compressionHeader(id), level}
}
type zstdCompressor struct {
id uint32
id HeaderID
header []byte
level zstd.EncoderLevel
}
func (c *zstdCompressor) ID() uint32 {
func (c *zstdCompressor) HeaderID() HeaderID {
return c.id
}

View File

@@ -1,41 +0,0 @@
package object
import (
"encoding/binary"
"fmt"
)
// CompressorName is the name of the compressor to use.
type CompressorName string
type Compressor interface {
ID() uint32
Compress(b []byte) ([]byte, error)
Decompress(b []byte) ([]byte, error)
}
var (
Compressors = map[uint32]Compressor{}
CompressorsByName = map[CompressorName]Compressor{}
)
// RegisterCompressor registers the provided compressor implementation
func RegisterCompressor(name CompressorName, c Compressor) {
if Compressors[c.ID()] != nil {
panic(fmt.Sprintf("compressor with ID %x already registered", c.ID()))
}
if CompressorsByName[name] != nil {
panic(fmt.Sprintf("compressor with name %q already registered", name))
}
Compressors[c.ID()] = c
CompressorsByName[name] = c
}
func compressionHeader(id uint32) []byte {
b := make([]byte, 4)
binary.BigEndian.PutUint32(b, id)
return b
}

View File

@@ -10,6 +10,7 @@
"github.com/pkg/errors"
"github.com/kopia/kopia/repo/compression"
"github.com/kopia/kopia/repo/content"
)
@@ -53,7 +54,7 @@ func (om *Manager) NewWriter(ctx context.Context, opt WriterOptions) Writer {
splitter: om.newSplitter(),
description: opt.Description,
prefix: opt.Prefix,
compressor: CompressorsByName[opt.Compressor],
compressor: compression.ByName[opt.Compressor],
}
}
@@ -241,9 +242,9 @@ func (om *Manager) decompress(b []byte) ([]byte, error) {
return nil, errors.Errorf("invalid compression header")
}
compressorID := binary.BigEndian.Uint32(b[0:4])
compressorID := compression.HeaderID(binary.BigEndian.Uint32(b[0:4]))
compressor := Compressors[compressorID]
compressor := compression.ByHeaderID[compressorID]
if compressor == nil {
return nil, errors.Errorf("unsupported compressor %x", compressorID)
}

View File

@@ -16,6 +16,7 @@
"testing"
"github.com/kopia/kopia/repo/blob"
"github.com/kopia/kopia/repo/compression"
"github.com/kopia/kopia/repo/content"
)
@@ -329,7 +330,7 @@ func TestEndToEndReadAndSeekWithCompression(t *testing.T) {
ctx := context.Background()
_, om := setupTest(t)
for compressorName := range CompressorsByName {
for compressorName := range compression.ByName {
for _, size := range []int{1, 199, 200, 201, 9999, 512434} {
// Create some random data sample of the specified size.
randomData := make([]byte, size)

View File

@@ -9,6 +9,7 @@
"github.com/pkg/errors"
"github.com/kopia/kopia/repo/compression"
"github.com/kopia/kopia/repo/content"
)
@@ -52,7 +53,7 @@ type objectWriter struct {
ctx context.Context
repo *Manager
compressor Compressor
compressor compression.Compressor
prefix content.ID
buffer bytes.Buffer
@@ -179,5 +180,5 @@ func (w *objectWriter) Result() (ID, error) {
type WriterOptions struct {
Description string
Prefix content.ID // empty string or a single-character ('g'..'z')
Compressor CompressorName
Compressor compression.Name
}

View File

@@ -5,19 +5,19 @@
"sort"
"github.com/kopia/kopia/fs"
"github.com/kopia/kopia/repo/object"
"github.com/kopia/kopia/repo/compression"
)
// CompressionPolicy specifies compression policy.
type CompressionPolicy struct {
CompressorName object.CompressorName `json:"compressorName,omitempty"`
OnlyCompress []string `json:"onlyCompress,omitempty"`
NeverCompress []string `json:"neverCompress,omitempty"`
MinSize int64 `json:"minSize,omitempty"`
MaxSize int64 `json:"maxSize,omitempty"`
CompressorName compression.Name `json:"compressorName,omitempty"`
OnlyCompress []string `json:"onlyCompress,omitempty"`
NeverCompress []string `json:"neverCompress,omitempty"`
MinSize int64 `json:"minSize,omitempty"`
MaxSize int64 `json:"maxSize,omitempty"`
}
func (p *CompressionPolicy) CompressorForFile(e fs.File) object.CompressorName {
func (p *CompressionPolicy) CompressorForFile(e fs.File) compression.Name {
ext := filepath.Ext(e.Name())
size := e.Size()