Files
kopia/repo/compression/compressor_zstd.go
Jarek Kowalski bbbf65de8c feat(repository): big reduction in memory usage during decompression (#3780)
This has huge performance benefits for `zstd` and `pgzip`

BEFORE:

```
  0. s2-parallel-4              35.1 MB      8.9 GB/s     688      4.6 GB
  1. s2-parallel-8              35.1 MB      8.1 GB/s     688      4.6 GB
  2. s2-default                 35.1 MB      8 GB/s       689      4.6 GB
  3. s2-better                  33.7 MB      7.2 GB/s     689      4.6 GB
  4. zstd-fastest               18.4 MB      6.3 GB/s     22281    1.2 GB
  5. zstd-better-compression    16.4 MB      5.3 GB/s     17957    2 GB
  6. zstd                       18.1 MB      4.8 GB/s     17711    2.1 GB
  7. deflate-best-speed         20.6 MB      3.7 GB/s     4672     6.4 MB
  8. deflate-best-compression   18.4 MB      3.5 GB/s     8352     6.5 MB
  9. pgzip-best-speed           20.6 MB      3.5 GB/s     27871    679.1 MB
 10. deflate-default            19.7 MB      3.5 GB/s     9152     6.4 MB
 11. pgzip-best-compression     18.4 MB      3.3 GB/s     29494    679.1 MB
 12. pgzip                      19.7 MB      3.3 GB/s     32835    679.2 MB
 13. gzip                       19.3 MB      2.9 GB/s     1427888  4.4 GB
 14. gzip-best-compression      18.5 MB      2.7 GB/s     1489968  4.4 GB
 15. gzip-best-speed            21.6 MB      2.3 GB/s     1373330  4.4 GB
 ```

AFTER:

```
  0. lz4                        33.9 MB      13.4 GB/s    261      4.4 GB (deprecated)
  1. s2-default                 35.1 MB      10.2 GB/s    275      4.3 GB
  2. s2-better                  33.7 MB      9.7 GB/s     274      4.3 GB
  3. s2-parallel-8              35.1 MB      9.7 GB/s     278      4.3 GB
  4. s2-parallel-4              35.1 MB      9.4 GB/s     288      4.3 GB
  5. zstd-fastest               18.4 MB      6.9 GB/s     1192     90.1 MB
  6. zstd-better-compression    16.4 MB      5.8 GB/s     1162     158.8 MB
  7. zstd-best-compression      16.2 MB      5.7 GB/s     1183     159.3 MB (deprecated)
  8. zstd                       18.1 MB      5.2 GB/s     1103     158 MB
  9. deflate-best-speed         20.6 MB      3.8 GB/s     4672     6.4 MB
 10. deflate-best-compression   18.4 MB      3.6 GB/s     8352     6.5 MB
 11. pgzip-best-compression     18.4 MB      3.2 GB/s     28489    75 MB
 12. pgzip-best-speed           20.6 MB      3.2 GB/s     26725    74.9 MB
 13. pgzip                      19.7 MB      3.1 GB/s     31889    75.1 MB
 14. gzip-best-compression      18.5 MB      3.1 GB/s     1489269  4.4 GB
 15. deflate-default            19.7 MB      2.6 GB/s     9164     6.4 MB
 16. gzip-best-speed            21.6 MB      2.4 GB/s     1372639  4.4 GB
 17. gzip                       19.3 MB      2.2 GB/s     1427205  4.4 GB
```
2024-04-04 18:42:55 -07:00

92 lines
2.2 KiB
Go

package compression
import (
"io"
"sync"
"github.com/klauspost/compress/zstd"
"github.com/pkg/errors"
"github.com/kopia/kopia/internal/freepool"
"github.com/kopia/kopia/internal/iocopy"
)
func init() {
RegisterCompressor("zstd", newZstdCompressor(HeaderZstdDefault, zstd.SpeedDefault))
RegisterCompressor("zstd-fastest", newZstdCompressor(HeaderZstdFastest, zstd.SpeedFastest))
RegisterCompressor("zstd-better-compression", newZstdCompressor(HeaderZstdBetterCompression, zstd.SpeedBetterCompression))
RegisterDeprecatedCompressor("zstd-best-compression", newZstdCompressor(HeaderZstdBestCompression, zstd.SpeedBestCompression))
}
func newZstdCompressor(id HeaderID, level zstd.EncoderLevel) Compressor {
return &zstdCompressor{id, compressionHeader(id), sync.Pool{
New: func() interface{} {
w, err := zstd.NewWriter(io.Discard, zstd.WithEncoderLevel(level))
mustSucceed(err)
return w
},
}}
}
type zstdCompressor struct {
id HeaderID
header []byte
pool sync.Pool
}
func (c *zstdCompressor) HeaderID() HeaderID {
return c.id
}
func (c *zstdCompressor) Compress(output io.Writer, input io.Reader) error {
if _, err := output.Write(c.header); err != nil {
return errors.Wrap(err, "unable to write header")
}
//nolint:forcetypeassert
w := c.pool.Get().(*zstd.Encoder)
defer c.pool.Put(w)
w.Reset(output)
if err := iocopy.JustCopy(w, input); err != nil {
return errors.Wrap(err, "compression error")
}
if err := w.Close(); err != nil {
return errors.Wrap(err, "compression close error")
}
return nil
}
//nolint:gochecknoglobals
var zstdDecoderPool = freepool.New(func() *zstd.Decoder {
r, err := zstd.NewReader(nil, zstd.WithDecoderConcurrency(1))
mustSucceed(err)
return r
}, func(v *zstd.Decoder) {
mustSucceed(v.Reset(nil))
})
func (c *zstdCompressor) Decompress(output io.Writer, input io.Reader, withHeader bool) error {
if withHeader {
if err := verifyCompressionHeader(input, c.header); err != nil {
return err
}
}
dec := zstdDecoderPool.Take()
defer zstdDecoderPool.Return(dec)
if err := dec.Reset(input); err != nil {
return errors.Wrap(err, "decompression reset error")
}
if err := iocopy.JustCopy(output, dec); err != nil {
return errors.Wrap(err, "decompression error")
}
return nil
}