Files
kopia/cli/command_content_stats.go
Julio López d37de8316e refactor(general): generalize units package (#4075)
Generalize a couple of functions in the units package using generics.
This allows removing duplicate code and simplifying callers by removing unnecessary integer conversions.

Additional cleanups:

- make "/s" part of the Printf format string ;
- simplify setSizeMBParameter;
- generalize cli.maybeHumanReadable*` helpers;
- remove unneeded receiver in commandRepositorySetParameters helpers.
2024-08-26 17:26:32 -07:00

160 lines
4.1 KiB
Go

package cli
import (
"context"
"strconv"
"github.com/pkg/errors"
"github.com/kopia/kopia/internal/units"
"github.com/kopia/kopia/repo"
"github.com/kopia/kopia/repo/compression"
"github.com/kopia/kopia/repo/content"
)
type commandContentStats struct {
raw bool
contentRange contentRangeFlags
out textOutput
}
func (c *commandContentStats) setup(svc appServices, parent commandParent) {
cmd := parent.Command("stats", "Content statistics")
cmd.Flag("raw", "Raw numbers").Short('r').BoolVar(&c.raw)
c.contentRange.setup(cmd)
c.out.setup(svc)
cmd.Action(svc.directRepositoryReadAction(c.run))
}
type contentStatsTotals struct {
originalSize int64
packedSize int64
count int64
}
func (c *commandContentStats) run(ctx context.Context, rep repo.DirectRepository) error {
var (
sizeThreshold uint32 = 10
sizeBuckets []uint32
)
for range 8 {
sizeBuckets = append(sizeBuckets, sizeThreshold)
sizeThreshold *= 10
}
grandTotal, byCompressionTotal, countMap, totalSizeOfContentsUnder, err := c.calculateStats(ctx, rep, sizeBuckets)
if err != nil {
return errors.Wrap(err, "error calculating totals")
}
sizeToString := units.BytesString[int64]
if c.raw {
sizeToString = func(l int64) string {
return strconv.FormatInt(l, 10)
}
}
c.out.printStdout("Count: %v\n", grandTotal.count)
c.out.printStdout("Total Bytes: %v\n", sizeToString(grandTotal.originalSize))
if grandTotal.packedSize < grandTotal.originalSize {
c.out.printStdout(
"Total Packed: %v (compression %v)\n",
sizeToString(grandTotal.packedSize),
formatCompressionPercentage(grandTotal.originalSize, grandTotal.packedSize))
}
if len(byCompressionTotal) > 1 {
c.out.printStdout("By Method:\n")
if bct := byCompressionTotal[content.NoCompression]; bct != nil {
c.out.printStdout(" %-22v count: %v size: %v\n", "(uncompressed)", bct.count, sizeToString(bct.originalSize))
}
for hdrID, bct := range byCompressionTotal {
cname := compression.HeaderIDToName[hdrID]
if cname == "" {
continue
}
c.out.printStdout(" %-22v count: %v size: %v packed: %v compression: %v\n",
cname, bct.count,
sizeToString(bct.originalSize),
sizeToString(bct.packedSize),
formatCompressionPercentage(bct.originalSize, bct.packedSize))
}
}
if grandTotal.count == 0 {
return nil
}
c.out.printStdout("Average: %v\n", sizeToString(grandTotal.originalSize/grandTotal.count))
c.out.printStdout("Histogram:\n\n")
var lastSize uint32
for _, size := range sizeBuckets {
c.out.printStdout("%9v between %v and %v (total %v)\n",
countMap[size]-countMap[lastSize],
sizeToString(int64(lastSize)),
sizeToString(int64(size)),
sizeToString(totalSizeOfContentsUnder[size]-totalSizeOfContentsUnder[lastSize]),
)
lastSize = size
}
return nil
}
func (c *commandContentStats) calculateStats(ctx context.Context, rep repo.DirectRepository, sizeBuckets []uint32) (
grandTotal contentStatsTotals,
byCompressionTotal map[compression.HeaderID]*contentStatsTotals,
countMap map[uint32]int,
totalSizeOfContentsUnder map[uint32]int64,
err error,
) {
byCompressionTotal = make(map[compression.HeaderID]*contentStatsTotals)
totalSizeOfContentsUnder = make(map[uint32]int64)
countMap = make(map[uint32]int)
for _, s := range sizeBuckets {
countMap[s] = 0
}
err = rep.ContentReader().IterateContents(
ctx,
content.IterateOptions{
Range: c.contentRange.contentIDRange(),
},
func(b content.Info) error {
grandTotal.packedSize += int64(b.PackedLength)
grandTotal.originalSize += int64(b.OriginalLength)
grandTotal.count++
bct := byCompressionTotal[b.CompressionHeaderID]
if bct == nil {
bct = &contentStatsTotals{}
byCompressionTotal[b.CompressionHeaderID] = bct
}
bct.packedSize += int64(b.PackedLength)
bct.originalSize += int64(b.OriginalLength)
bct.count++
for s := range countMap {
if b.PackedLength < s {
countMap[s]++
totalSizeOfContentsUnder[s] += int64(b.PackedLength)
}
}
return nil
})
//nolint:wrapcheck
return grandTotal, byCompressionTotal, countMap, totalSizeOfContentsUnder, err
}