Files
kopia/repo/compression/compressor_pgzip.go
Jarek Kowalski bbbf65de8c feat(repository): big reduction in memory usage during decompression (#3780)
This has huge performance benefits for `zstd` and `pgzip`

BEFORE:

```
  0. s2-parallel-4              35.1 MB      8.9 GB/s     688      4.6 GB
  1. s2-parallel-8              35.1 MB      8.1 GB/s     688      4.6 GB
  2. s2-default                 35.1 MB      8 GB/s       689      4.6 GB
  3. s2-better                  33.7 MB      7.2 GB/s     689      4.6 GB
  4. zstd-fastest               18.4 MB      6.3 GB/s     22281    1.2 GB
  5. zstd-better-compression    16.4 MB      5.3 GB/s     17957    2 GB
  6. zstd                       18.1 MB      4.8 GB/s     17711    2.1 GB
  7. deflate-best-speed         20.6 MB      3.7 GB/s     4672     6.4 MB
  8. deflate-best-compression   18.4 MB      3.5 GB/s     8352     6.5 MB
  9. pgzip-best-speed           20.6 MB      3.5 GB/s     27871    679.1 MB
 10. deflate-default            19.7 MB      3.5 GB/s     9152     6.4 MB
 11. pgzip-best-compression     18.4 MB      3.3 GB/s     29494    679.1 MB
 12. pgzip                      19.7 MB      3.3 GB/s     32835    679.2 MB
 13. gzip                       19.3 MB      2.9 GB/s     1427888  4.4 GB
 14. gzip-best-compression      18.5 MB      2.7 GB/s     1489968  4.4 GB
 15. gzip-best-speed            21.6 MB      2.3 GB/s     1373330  4.4 GB
 ```

AFTER:

```
  0. lz4                        33.9 MB      13.4 GB/s    261      4.4 GB (deprecated)
  1. s2-default                 35.1 MB      10.2 GB/s    275      4.3 GB
  2. s2-better                  33.7 MB      9.7 GB/s     274      4.3 GB
  3. s2-parallel-8              35.1 MB      9.7 GB/s     278      4.3 GB
  4. s2-parallel-4              35.1 MB      9.4 GB/s     288      4.3 GB
  5. zstd-fastest               18.4 MB      6.9 GB/s     1192     90.1 MB
  6. zstd-better-compression    16.4 MB      5.8 GB/s     1162     158.8 MB
  7. zstd-best-compression      16.2 MB      5.7 GB/s     1183     159.3 MB (deprecated)
  8. zstd                       18.1 MB      5.2 GB/s     1103     158 MB
  9. deflate-best-speed         20.6 MB      3.8 GB/s     4672     6.4 MB
 10. deflate-best-compression   18.4 MB      3.6 GB/s     8352     6.5 MB
 11. pgzip-best-compression     18.4 MB      3.2 GB/s     28489    75 MB
 12. pgzip-best-speed           20.6 MB      3.2 GB/s     26725    74.9 MB
 13. pgzip                      19.7 MB      3.1 GB/s     31889    75.1 MB
 14. gzip-best-compression      18.5 MB      3.1 GB/s     1489269  4.4 GB
 15. deflate-default            19.7 MB      2.6 GB/s     9164     6.4 MB
 16. gzip-best-speed            21.6 MB      2.4 GB/s     1372639  4.4 GB
 17. gzip                       19.3 MB      2.2 GB/s     1427205  4.4 GB
```
2024-04-04 18:42:55 -07:00

86 lines
1.9 KiB
Go

package compression
import (
"bytes"
"io"
"sync"
"github.com/klauspost/pgzip"
"github.com/pkg/errors"
"github.com/kopia/kopia/internal/freepool"
"github.com/kopia/kopia/internal/iocopy"
)
func init() {
RegisterCompressor("pgzip", newpgzipCompressor(headerPgzipDefault, pgzip.DefaultCompression))
RegisterCompressor("pgzip-best-speed", newpgzipCompressor(headerPgzipBestSpeed, pgzip.BestSpeed))
RegisterCompressor("pgzip-best-compression", newpgzipCompressor(headerPgzipBestCompression, pgzip.BestCompression))
}
func newpgzipCompressor(id HeaderID, level int) Compressor {
return &pgzipCompressor{id, compressionHeader(id), sync.Pool{
New: func() interface{} {
w, err := pgzip.NewWriterLevel(bytes.NewBuffer(nil), level)
mustSucceed(err)
return w
},
}}
}
type pgzipCompressor struct {
id HeaderID
header []byte
pool sync.Pool
}
func (c *pgzipCompressor) HeaderID() HeaderID {
return c.id
}
func (c *pgzipCompressor) Compress(output io.Writer, input io.Reader) error {
if _, err := output.Write(c.header); err != nil {
return errors.Wrap(err, "unable to write header")
}
//nolint:forcetypeassert
w := c.pool.Get().(*pgzip.Writer)
defer c.pool.Put(w)
w.Reset(output)
if err := iocopy.JustCopy(w, input); err != nil {
return errors.Wrap(err, "compression error")
}
if err := w.Close(); err != nil {
return errors.Wrap(err, "compression close error")
}
return nil
}
//nolint:gochecknoglobals
var pgzipDecoderPool = freepool.New(func() *pgzip.Reader {
return &pgzip.Reader{}
}, func(_ *pgzip.Reader) {})
func (c *pgzipCompressor) Decompress(output io.Writer, input io.Reader, withHeader bool) error {
if withHeader {
if err := verifyCompressionHeader(input, c.header); err != nil {
return err
}
}
dec := pgzipDecoderPool.Take()
defer pgzipDecoderPool.Return(dec)
mustSucceed(dec.Reset(input))
if err := iocopy.JustCopy(output, dec); err != nil {
return errors.Wrap(err, "decompression error")
}
return nil
}