feat(cli): compute snapshot diff stats (#4444)

Added functionality to calculate aggregate statistics when
comparing what's changed between snapshots using kopia diff

Statistics collected during snapshot diff computation includes:

- files added/removed/modified
- dirs added/removed/modified
- files/dirs with metadata changes but same underlying content (OID)

Testing approach:

Added a test for verifying stats collected when comparing two directories with the same objectID but metadata changes across snapshots (dir mode, dir mod time, dir owner, etc), expectation is all the appropriate dir stats fields are updated.
Added another test for verifying stats collected when comparing two directories with similar file contents but the metadata for the files have changed between snapshots but content remains unchanged. Expectation is all the relevant file level stats fields are updated.
Existing tests have been updated due to stats now being printed in addition to previous output.
This commit is contained in:
Rohit-BM18
2025-03-07 07:41:14 -05:00
committed by GitHub
parent 8e4abbbafd
commit 9b68189d29
5 changed files with 433 additions and 67 deletions

View File

@@ -2,6 +2,8 @@
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/pkg/errors"
@@ -63,7 +65,19 @@ func (c *commandDiff) run(ctx context.Context, rep repo.Repository) error {
}
if isDir1 {
return errors.Wrap(d.Compare(ctx, ent1, ent2), "error comparing directories")
snapshotDiffStats, err := d.Compare(ctx, ent1, ent2)
if err != nil {
return errors.Wrap(err, "error comparing directories")
}
b, err := json.Marshal(snapshotDiffStats)
if err != nil {
return errors.Wrap(err, "error marshaling computed snapshot diff stats")
}
fmt.Fprintf(c.out.stdout(), "%s", b) //nolint:errcheck
return nil
}
return errors.New("comparing files not implemented yet")