mirror of
https://github.com/kopia/kopia.git
synced 2026-02-07 05:05:26 -05:00
moved all compression to separate package and sanitized identifiers
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
"time"
|
||||
|
||||
"github.com/kopia/kopia/internal/units"
|
||||
"github.com/kopia/kopia/repo/object"
|
||||
"github.com/kopia/kopia/repo/compression"
|
||||
|
||||
kingpin "gopkg.in/alecthomas/kingpin.v2"
|
||||
)
|
||||
@@ -23,7 +23,7 @@
|
||||
|
||||
func runBenchmarkCompressionAction(ctx *kingpin.ParseContext) error {
|
||||
type benchResult struct {
|
||||
compression object.CompressorName
|
||||
compression compression.Name
|
||||
throughput float64
|
||||
compressedSize int64
|
||||
}
|
||||
@@ -41,7 +41,7 @@ type benchResult struct {
|
||||
data = d
|
||||
}
|
||||
|
||||
for name, comp := range object.CompressorsByName {
|
||||
for name, comp := range compression.ByName {
|
||||
log.Infof("Benchmarking compressor '%v' (%v x %v bytes)", name, *benchmarkCompressionRepeat, len(data))
|
||||
|
||||
t0 := time.Now()
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/kopia/kopia/repo"
|
||||
"github.com/kopia/kopia/repo/object"
|
||||
"github.com/kopia/kopia/repo/compression"
|
||||
"github.com/kopia/kopia/snapshot/policy"
|
||||
)
|
||||
|
||||
@@ -234,7 +234,7 @@ func setCompressionPolicyFromFlags(p *policy.CompressionPolicy, changeCount *int
|
||||
} else {
|
||||
printStderr(" - setting compression algorithm to %v\n", v)
|
||||
|
||||
p.CompressorName = object.CompressorName(v)
|
||||
p.CompressorName = compression.Name(v)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -349,7 +349,7 @@ func applyPolicyNumber64(desc string, val *int64, str string, changeCount *int)
|
||||
|
||||
func supportedCompressionAlgorithms() []string {
|
||||
var res []string
|
||||
for name := range object.CompressorsByName {
|
||||
for name := range compression.ByName {
|
||||
res = append(res, string(name))
|
||||
}
|
||||
|
||||
|
||||
25
repo/compression/compression_ids.go
Normal file
25
repo/compression/compression_ids.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package compression
|
||||
|
||||
// HeaderID is a unique identifier of the compressor stored in the compressed block header.
|
||||
type HeaderID uint32
|
||||
|
||||
// defined header IDs
|
||||
const (
|
||||
headerGzipDefault HeaderID = 0x1000
|
||||
headerGzipBestSpeed HeaderID = 0x1001
|
||||
headerGzipBestCompression HeaderID = 0x1002
|
||||
|
||||
headerZstdDefault HeaderID = 0x1100
|
||||
headerZstdFastest HeaderID = 0x1101
|
||||
headerZstdBetterCompression HeaderID = 0x1102
|
||||
headerZstdBestCompression HeaderID = 0x1103
|
||||
|
||||
headerS2Default HeaderID = 0x1200
|
||||
headerS2Better HeaderID = 0x1201
|
||||
headerS2Parallel4 HeaderID = 0x1202
|
||||
headerS2Parallel8 HeaderID = 0x1203
|
||||
|
||||
headerPgzipDefault HeaderID = 0x1300
|
||||
headerPgzipBestSpeed HeaderID = 0x1301
|
||||
headerPgzipBestCompression HeaderID = 0x1302
|
||||
)
|
||||
44
repo/compression/compressor.go
Normal file
44
repo/compression/compressor.go
Normal file
@@ -0,0 +1,44 @@
|
||||
// Package compression manages compression algorithm implementations.
|
||||
package compression
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Name is the name of the compressor to use.
|
||||
type Name string
|
||||
|
||||
// Compressor implements compression and decompression of a byte slice.
|
||||
type Compressor interface {
|
||||
HeaderID() HeaderID
|
||||
Compress(b []byte) ([]byte, error)
|
||||
Decompress(b []byte) ([]byte, error)
|
||||
}
|
||||
|
||||
// maps of registered compressors by header ID and name.
|
||||
var (
|
||||
ByHeaderID = map[HeaderID]Compressor{}
|
||||
ByName = map[Name]Compressor{}
|
||||
)
|
||||
|
||||
// RegisterCompressor registers the provided compressor implementation
|
||||
func RegisterCompressor(name Name, c Compressor) {
|
||||
if ByHeaderID[c.HeaderID()] != nil {
|
||||
panic(fmt.Sprintf("compressor with HeaderID %x already registered", c.HeaderID()))
|
||||
}
|
||||
|
||||
if ByName[name] != nil {
|
||||
panic(fmt.Sprintf("compressor with name %q already registered", name))
|
||||
}
|
||||
|
||||
ByHeaderID[c.HeaderID()] = c
|
||||
ByName[name] = c
|
||||
}
|
||||
|
||||
func compressionHeader(id HeaderID) []byte {
|
||||
b := make([]byte, 4)
|
||||
binary.BigEndian.PutUint32(b, uint32(id))
|
||||
|
||||
return b
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package object
|
||||
package compression
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -9,22 +9,22 @@
|
||||
)
|
||||
|
||||
func init() {
|
||||
RegisterCompressor("gzip", newGZipCompressor(0x1000, gzip.DefaultCompression))
|
||||
RegisterCompressor("gzip-best-speed", newGZipCompressor(0x1001, gzip.BestSpeed))
|
||||
RegisterCompressor("gzip-best-compression", newGZipCompressor(0x1002, gzip.BestCompression))
|
||||
RegisterCompressor("gzip", newGZipCompressor(headerGzipDefault, gzip.DefaultCompression))
|
||||
RegisterCompressor("gzip-best-speed", newGZipCompressor(headerGzipBestSpeed, gzip.BestSpeed))
|
||||
RegisterCompressor("gzip-best-compression", newGZipCompressor(headerGzipBestCompression, gzip.BestCompression))
|
||||
}
|
||||
|
||||
func newGZipCompressor(id uint32, level int) Compressor {
|
||||
func newGZipCompressor(id HeaderID, level int) Compressor {
|
||||
return &gzipCompressor{id, compressionHeader(id), level}
|
||||
}
|
||||
|
||||
type gzipCompressor struct {
|
||||
id uint32
|
||||
id HeaderID
|
||||
header []byte
|
||||
level int
|
||||
}
|
||||
|
||||
func (c *gzipCompressor) ID() uint32 {
|
||||
func (c *gzipCompressor) HeaderID() HeaderID {
|
||||
return c.id
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package object
|
||||
package compression
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -9,22 +9,22 @@
|
||||
)
|
||||
|
||||
func init() {
|
||||
RegisterCompressor("pgzip", newpgzipCompressor(0x1300, pgzip.DefaultCompression))
|
||||
RegisterCompressor("pgzip-best-speed", newpgzipCompressor(0x1301, pgzip.BestSpeed))
|
||||
RegisterCompressor("pgzip-best-compression", newpgzipCompressor(0x1302, pgzip.BestCompression))
|
||||
RegisterCompressor("pgzip", newpgzipCompressor(headerPgzipDefault, pgzip.DefaultCompression))
|
||||
RegisterCompressor("pgzip-best-speed", newpgzipCompressor(headerPgzipBestSpeed, pgzip.BestSpeed))
|
||||
RegisterCompressor("pgzip-best-compression", newpgzipCompressor(headerPgzipBestCompression, pgzip.BestCompression))
|
||||
}
|
||||
|
||||
func newpgzipCompressor(id uint32, level int) Compressor {
|
||||
func newpgzipCompressor(id HeaderID, level int) Compressor {
|
||||
return &pgzipCompressor{id, compressionHeader(id), level}
|
||||
}
|
||||
|
||||
type pgzipCompressor struct {
|
||||
id uint32
|
||||
id HeaderID
|
||||
header []byte
|
||||
level int
|
||||
}
|
||||
|
||||
func (c *pgzipCompressor) ID() uint32 {
|
||||
func (c *pgzipCompressor) HeaderID() HeaderID {
|
||||
return c.id
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package object
|
||||
package compression
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -9,23 +9,23 @@
|
||||
)
|
||||
|
||||
func init() {
|
||||
RegisterCompressor("s2-default", newS2Compressor(0x1200))
|
||||
RegisterCompressor("s2-better", newS2Compressor(0x1201, s2.WriterBetterCompression()))
|
||||
RegisterCompressor("s2-parallel-4", newS2Compressor(0x1202, s2.WriterConcurrency(4)))
|
||||
RegisterCompressor("s2-parallel-8", newS2Compressor(0x1203, s2.WriterConcurrency(8)))
|
||||
RegisterCompressor("s2-default", newS2Compressor(headerS2Default))
|
||||
RegisterCompressor("s2-better", newS2Compressor(headerS2Better, s2.WriterBetterCompression()))
|
||||
RegisterCompressor("s2-parallel-4", newS2Compressor(headerS2Parallel4, s2.WriterConcurrency(4)))
|
||||
RegisterCompressor("s2-parallel-8", newS2Compressor(headerS2Parallel8, s2.WriterConcurrency(8)))
|
||||
}
|
||||
|
||||
func newS2Compressor(id uint32, opts ...s2.WriterOption) Compressor {
|
||||
func newS2Compressor(id HeaderID, opts ...s2.WriterOption) Compressor {
|
||||
return &s2Compressor{id, compressionHeader(id), opts}
|
||||
}
|
||||
|
||||
type s2Compressor struct {
|
||||
id uint32
|
||||
id HeaderID
|
||||
header []byte
|
||||
opts []s2.WriterOption
|
||||
}
|
||||
|
||||
func (c *s2Compressor) ID() uint32 {
|
||||
func (c *s2Compressor) HeaderID() HeaderID {
|
||||
return c.id
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package object
|
||||
package compression
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -8,7 +8,7 @@
|
||||
)
|
||||
|
||||
func TestCompressor(t *testing.T) {
|
||||
for id, comp := range Compressors {
|
||||
for id, comp := range ByHeaderID {
|
||||
id, comp := id, comp
|
||||
|
||||
t.Run(fmt.Sprintf("compressible-data-%x", id), func(t *testing.T) {
|
||||
@@ -25,7 +25,7 @@ func TestCompressor(t *testing.T) {
|
||||
t.Errorf("compression not effective for all-zero data")
|
||||
}
|
||||
|
||||
for id2, comp2 := range Compressors {
|
||||
for id2, comp2 := range ByHeaderID {
|
||||
if id != id2 {
|
||||
if _, err2 := comp2.Decompress(cData); err2 == nil {
|
||||
t.Errorf("compressor %x was able to decompress results of %x", id2, id)
|
||||
@@ -1,4 +1,4 @@
|
||||
package object
|
||||
package compression
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -9,23 +9,23 @@
|
||||
)
|
||||
|
||||
func init() {
|
||||
RegisterCompressor("zstd", newZstdCompressor(0x1100, zstd.SpeedDefault))
|
||||
RegisterCompressor("zstd-fastest", newZstdCompressor(0x1101, zstd.SpeedFastest))
|
||||
RegisterCompressor("zstd-better-compression", newZstdCompressor(0x1102, zstd.SpeedBetterCompression))
|
||||
RegisterCompressor("zstd-best-compression", newZstdCompressor(0x1103, zstd.SpeedBestCompression))
|
||||
RegisterCompressor("zstd", newZstdCompressor(headerZstdDefault, zstd.SpeedDefault))
|
||||
RegisterCompressor("zstd-fastest", newZstdCompressor(headerZstdFastest, zstd.SpeedFastest))
|
||||
RegisterCompressor("zstd-better-compression", newZstdCompressor(headerZstdBetterCompression, zstd.SpeedBetterCompression))
|
||||
RegisterCompressor("zstd-best-compression", newZstdCompressor(headerZstdBestCompression, zstd.SpeedBestCompression))
|
||||
}
|
||||
|
||||
func newZstdCompressor(id uint32, level zstd.EncoderLevel) Compressor {
|
||||
func newZstdCompressor(id HeaderID, level zstd.EncoderLevel) Compressor {
|
||||
return &zstdCompressor{id, compressionHeader(id), level}
|
||||
}
|
||||
|
||||
type zstdCompressor struct {
|
||||
id uint32
|
||||
id HeaderID
|
||||
header []byte
|
||||
level zstd.EncoderLevel
|
||||
}
|
||||
|
||||
func (c *zstdCompressor) ID() uint32 {
|
||||
func (c *zstdCompressor) HeaderID() HeaderID {
|
||||
return c.id
|
||||
}
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
package object
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// CompressorName is the name of the compressor to use.
|
||||
type CompressorName string
|
||||
|
||||
type Compressor interface {
|
||||
ID() uint32
|
||||
Compress(b []byte) ([]byte, error)
|
||||
Decompress(b []byte) ([]byte, error)
|
||||
}
|
||||
|
||||
var (
|
||||
Compressors = map[uint32]Compressor{}
|
||||
CompressorsByName = map[CompressorName]Compressor{}
|
||||
)
|
||||
|
||||
// RegisterCompressor registers the provided compressor implementation
|
||||
func RegisterCompressor(name CompressorName, c Compressor) {
|
||||
if Compressors[c.ID()] != nil {
|
||||
panic(fmt.Sprintf("compressor with ID %x already registered", c.ID()))
|
||||
}
|
||||
|
||||
if CompressorsByName[name] != nil {
|
||||
panic(fmt.Sprintf("compressor with name %q already registered", name))
|
||||
}
|
||||
|
||||
Compressors[c.ID()] = c
|
||||
CompressorsByName[name] = c
|
||||
}
|
||||
|
||||
func compressionHeader(id uint32) []byte {
|
||||
b := make([]byte, 4)
|
||||
binary.BigEndian.PutUint32(b, id)
|
||||
|
||||
return b
|
||||
}
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/kopia/kopia/repo/compression"
|
||||
"github.com/kopia/kopia/repo/content"
|
||||
)
|
||||
|
||||
@@ -53,7 +54,7 @@ func (om *Manager) NewWriter(ctx context.Context, opt WriterOptions) Writer {
|
||||
splitter: om.newSplitter(),
|
||||
description: opt.Description,
|
||||
prefix: opt.Prefix,
|
||||
compressor: CompressorsByName[opt.Compressor],
|
||||
compressor: compression.ByName[opt.Compressor],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -241,9 +242,9 @@ func (om *Manager) decompress(b []byte) ([]byte, error) {
|
||||
return nil, errors.Errorf("invalid compression header")
|
||||
}
|
||||
|
||||
compressorID := binary.BigEndian.Uint32(b[0:4])
|
||||
compressorID := compression.HeaderID(binary.BigEndian.Uint32(b[0:4]))
|
||||
|
||||
compressor := Compressors[compressorID]
|
||||
compressor := compression.ByHeaderID[compressorID]
|
||||
if compressor == nil {
|
||||
return nil, errors.Errorf("unsupported compressor %x", compressorID)
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
"testing"
|
||||
|
||||
"github.com/kopia/kopia/repo/blob"
|
||||
"github.com/kopia/kopia/repo/compression"
|
||||
"github.com/kopia/kopia/repo/content"
|
||||
)
|
||||
|
||||
@@ -329,7 +330,7 @@ func TestEndToEndReadAndSeekWithCompression(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
_, om := setupTest(t)
|
||||
|
||||
for compressorName := range CompressorsByName {
|
||||
for compressorName := range compression.ByName {
|
||||
for _, size := range []int{1, 199, 200, 201, 9999, 512434} {
|
||||
// Create some random data sample of the specified size.
|
||||
randomData := make([]byte, size)
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/kopia/kopia/repo/compression"
|
||||
"github.com/kopia/kopia/repo/content"
|
||||
)
|
||||
|
||||
@@ -52,7 +53,7 @@ type objectWriter struct {
|
||||
ctx context.Context
|
||||
repo *Manager
|
||||
|
||||
compressor Compressor
|
||||
compressor compression.Compressor
|
||||
|
||||
prefix content.ID
|
||||
buffer bytes.Buffer
|
||||
@@ -179,5 +180,5 @@ func (w *objectWriter) Result() (ID, error) {
|
||||
type WriterOptions struct {
|
||||
Description string
|
||||
Prefix content.ID // empty string or a single-character ('g'..'z')
|
||||
Compressor CompressorName
|
||||
Compressor compression.Name
|
||||
}
|
||||
|
||||
@@ -5,19 +5,19 @@
|
||||
"sort"
|
||||
|
||||
"github.com/kopia/kopia/fs"
|
||||
"github.com/kopia/kopia/repo/object"
|
||||
"github.com/kopia/kopia/repo/compression"
|
||||
)
|
||||
|
||||
// CompressionPolicy specifies compression policy.
|
||||
type CompressionPolicy struct {
|
||||
CompressorName object.CompressorName `json:"compressorName,omitempty"`
|
||||
OnlyCompress []string `json:"onlyCompress,omitempty"`
|
||||
NeverCompress []string `json:"neverCompress,omitempty"`
|
||||
MinSize int64 `json:"minSize,omitempty"`
|
||||
MaxSize int64 `json:"maxSize,omitempty"`
|
||||
CompressorName compression.Name `json:"compressorName,omitempty"`
|
||||
OnlyCompress []string `json:"onlyCompress,omitempty"`
|
||||
NeverCompress []string `json:"neverCompress,omitempty"`
|
||||
MinSize int64 `json:"minSize,omitempty"`
|
||||
MaxSize int64 `json:"maxSize,omitempty"`
|
||||
}
|
||||
|
||||
func (p *CompressionPolicy) CompressorForFile(e fs.File) object.CompressorName {
|
||||
func (p *CompressionPolicy) CompressorForFile(e fs.File) compression.Name {
|
||||
ext := filepath.Ext(e.Name())
|
||||
size := e.Size()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user