Files
kopia/cli/command_snapshot_estimate.go
Julio López d886957c7d feat(snapshots): reduce kernel page-cache pressure during snapshots (#5437)
Add fadvise hints to backup read path to reduce OS cache pollution

Issue FADV_SEQUENTIAL at open and FADV_DONTNEED at close for every file read during backup. This tells the kernel to read-ahead aggressively and then reclaim pages once the file is fully consumed, reducing cache pressure from large backups without adding measurable overhead.

Uses SyscallConn().Control() instead of os.File.Fd() to avoid switching file descriptors to blocking mode, which would remove them from Go's epoll poller and stall the upload pipeline.

Incremental FADV_DONTNEED during reads was tested and removed — it adds ~15% overhead on large-file workloads by fighting the kernel's own LRU. Restore write-path cache eviction was also tested and not included.

Authored-by: Rajat Gupta rajat.gupta@veeam.com
2026-06-26 19:29:13 -07:00

171 lines
5.0 KiB
Go

package cli
import (
"context"
"fmt"
"path/filepath"
"time"
"github.com/pkg/errors"
"github.com/kopia/kopia/fs"
"github.com/kopia/kopia/fs/localfs"
"github.com/kopia/kopia/internal/units"
"github.com/kopia/kopia/repo"
"github.com/kopia/kopia/snapshot"
"github.com/kopia/kopia/snapshot/policy"
"github.com/kopia/kopia/snapshot/upload"
)
type commandSnapshotEstimate struct {
snapshotEstimateSource string
snapshotEstimateShowFiles bool
snapshotEstimateQuiet bool
snapshotEstimateUploadSpeed float64
maxExamplesPerBucket int
out textOutput
}
func (c *commandSnapshotEstimate) setup(svc appServices, parent commandParent) {
cmd := parent.Command("estimate", "Estimate the snapshot size and upload time.")
cmd.Arg("source", "File or directory to analyze.").Required().ExistingFileOrDirVar(&c.snapshotEstimateSource)
cmd.Flag("show-files", "Show files").BoolVar(&c.snapshotEstimateShowFiles)
cmd.Flag("quiet", "Do not display scanning progress").Short('q').BoolVar(&c.snapshotEstimateQuiet)
cmd.Flag("upload-speed", "Upload speed to use for estimation").Default("10").PlaceHolder("mbit/s").Float64Var(&c.snapshotEstimateUploadSpeed)
cmd.Flag("max-examples-per-bucket", "Max examples per bucket").Default("10").IntVar(&c.maxExamplesPerBucket)
cmd.Action(svc.repositoryReaderAction(c.run))
c.out.setup(svc)
}
type estimateProgress struct {
stats snapshot.Stats
included upload.SampleBuckets
excluded upload.SampleBuckets
excludedDirs []string
quiet bool
}
func (ep *estimateProgress) Processing(ctx context.Context, dirname string) {
if !ep.quiet {
log(ctx).Infof("Analyzing %v...", dirname)
}
}
func (ep *estimateProgress) Error(ctx context.Context, filename string, err error, isIgnored bool) {
if isIgnored {
log(ctx).Errorf("Ignored error in %v: %v", filename, err)
} else {
log(ctx).Errorf("Error in %v: %v", filename, err)
}
}
func (ep *estimateProgress) Stats(_ context.Context, st *snapshot.Stats, included, excluded upload.SampleBuckets, excludedDirs []string, final bool) {
_ = final
ep.stats = *st
ep.included = included
ep.excluded = excluded
ep.excludedDirs = excludedDirs
}
func (c *commandSnapshotEstimate) run(ctx context.Context, rep repo.Repository) error {
path, err := filepath.Abs(c.snapshotEstimateSource)
if err != nil {
return errors.Errorf("invalid path: '%s': %s", path, err)
}
sourceInfo := snapshot.SourceInfo{
Path: filepath.Clean(path),
Host: rep.ClientOptions().Hostname,
UserName: rep.ClientOptions().Username,
}
// snapshot estimate only reads cached fs.Entry metadata and never opens files,
// so localfs.Options has no effect — pass the zero value.
entry, err := getLocalFSEntry(ctx, path, localfs.Options{})
if err != nil {
return err
}
dir, ok := entry.(fs.Directory)
if !ok {
return errors.Errorf("invalid path: '%s': must be a directory", path)
}
var ep estimateProgress
ep.quiet = c.snapshotEstimateQuiet
policyTree, err := policy.TreeForSource(ctx, rep, sourceInfo)
if err != nil {
return errors.Wrapf(err, "error creating policy tree for %v", sourceInfo)
}
if err := upload.Estimate(ctx, dir, policyTree, &ep, c.maxExamplesPerBucket); err != nil {
return errors.Wrap(err, "error estimating")
}
c.out.printStdout("Snapshot includes %v file(s), total size %v\n", ep.stats.TotalFileCount, units.BytesString(ep.stats.TotalFileSize))
c.showBuckets(ep.included, c.snapshotEstimateShowFiles)
c.out.printStdout("\n")
if ep.stats.ExcludedFileCount > 0 {
c.out.printStdout("Snapshot excludes %v file(s), total size %v\n", ep.stats.ExcludedFileCount, units.BytesString(ep.stats.ExcludedTotalFileSize))
c.showBuckets(ep.excluded, true)
} else {
c.out.printStdout("Snapshot excludes no files.\n")
}
if ep.stats.ExcludedDirCount > 0 {
c.out.printStdout("Snapshot excludes %v directories. Examples:\n", ep.stats.ExcludedDirCount)
for _, ed := range ep.excludedDirs {
c.out.printStdout(" - %v\n", ed)
}
} else {
c.out.printStdout("Snapshot excludes no directories.\n")
}
if ep.stats.ErrorCount > 0 {
c.out.printStdout("Encountered %v error(s).\n", ep.stats.ErrorCount)
}
megabits := float64(ep.stats.TotalFileSize) * 8 / 1000000 //nolint:mnd
seconds := megabits / c.snapshotEstimateUploadSpeed
c.out.printStdout("\n")
c.out.printStdout("Estimated upload time: %v at %v Mbit/s\n", time.Duration(seconds)*time.Second, c.snapshotEstimateUploadSpeed)
return nil
}
func (c *commandSnapshotEstimate) showBuckets(buckets upload.SampleBuckets, showFiles bool) {
for i, bucket := range buckets {
if bucket.Count == 0 {
continue
}
var sizeRange string
if i == 0 {
sizeRange = fmt.Sprintf("< %-6v",
units.BytesString(bucket.MinSize))
} else {
sizeRange = fmt.Sprintf("%-6v...%6v",
units.BytesString(bucket.MinSize),
units.BytesString(buckets[i-1].MinSize))
}
c.out.printStdout("%18v: %7v files, total size %v\n",
sizeRange,
bucket.Count, units.BytesString(bucket.TotalSize))
if showFiles {
for _, sample := range bucket.Examples {
c.out.printStdout(" - %v\n", sample)
}
}
}
}