From 9a42e9b8a5d25ab230e1dc0060167dd90851366a Mon Sep 17 00:00:00 2001 From: Julio Lopez <1953782+julio-lopez@users.noreply.github.com> Date: Wed, 20 Aug 2025 17:49:00 -0700 Subject: [PATCH] refactor(general): miscellaneous cleanups (#4774) Nits and cleanups: - clarify log message to indicate the effect of advancing the deletion watermark; - add omitzero JSON tag to appropriate fields in snapshot.Manifest struct; - use maps.Clone instead of explicit loop; - rename function to IterateUnreferencedPacks for clarity; - use atomic.Int32 type; - move a continue check to the beginning of the loop, no actual work / side effects were performed before the check; - reduce type requirement in blob.ReadBlobMap --- cli/command_content_verify.go | 1 - cli/command_index_inspect.go | 8 ++++---- repo/blob/storage.go | 4 ++-- repo/content/content_manager_iterate.go | 12 ++++++------ repo/content/content_manager_test.go | 2 +- repo/content/index/index_builder.go | 13 ++----------- repo/maintenance/blob_gc.go | 2 +- repo/maintenance/maintenance_run.go | 2 +- snapshot/manifest.go | 2 +- 9 files changed, 18 insertions(+), 28 deletions(-) diff --git a/cli/command_content_verify.go b/cli/command_content_verify.go index 024755270..0048e7dd6 100644 --- a/cli/command_content_verify.go +++ b/cli/command_content_verify.go @@ -38,7 +38,6 @@ func (c *commandContentVerify) setup(svc appServices, parent commandParent) { } func (c *commandContentVerify) run(ctx context.Context, rep repo.DirectRepository) error { - blobMap := map[blob.ID]blob.Metadata{} downloadPercent := c.contentVerifyPercent if c.contentVerifyFull { diff --git a/cli/command_index_inspect.go b/cli/command_index_inspect.go index 7d7121d98..f29ee63e5 100644 --- a/cli/command_index_inspect.go +++ b/cli/command_index_inspect.go @@ -109,6 +109,10 @@ func (c *commandIndexInspect) inspectAllBlobs(ctx context.Context, rep repo.Dire func (c *commandIndexInspect) dumpIndexBlobEntries(entries chan indexBlobPlusContentInfo) { for ent := range entries { + if !c.shouldInclude(ent.contentInfo) { + continue + } + ci := ent.contentInfo bm := ent.indexBlob @@ -117,10 +121,6 @@ func (c *commandIndexInspect) dumpIndexBlobEntries(entries chan indexBlobPlusCon state = "deleted" } - if !c.shouldInclude(ci) { - continue - } - c.out.printStdout("%v %v %v %v %v %v %v %v\n", formatTimestampPrecise(bm.Timestamp), bm.BlobID, ci.ContentID, state, formatTimestampPrecise(ci.Timestamp()), ci.PackBlobID, ci.PackOffset, ci.PackedLength) diff --git a/repo/blob/storage.go b/repo/blob/storage.go index 9a35db5d1..2347f1f2d 100644 --- a/repo/blob/storage.go +++ b/repo/blob/storage.go @@ -398,12 +398,12 @@ func PutBlobAndGetMetadata(ctx context.Context, st Storage, blobID ID, data Byte } // ReadBlobMap reads the map of all the blobs indexed by ID. -func ReadBlobMap(ctx context.Context, br Reader) (map[ID]Metadata, error) { +func ReadBlobMap(ctx context.Context, bl Lister) (map[ID]Metadata, error) { blobMap := map[ID]Metadata{} log(ctx).Info("Listing blobs...") - if err := br.ListBlobs(ctx, "", func(bm Metadata) error { + if err := bl.ListBlobs(ctx, "", func(bm Metadata) error { blobMap[bm.BlobID] = bm if len(blobMap)%10000 == 0 { log(ctx).Infof(" %v blobs...", len(blobMap)) diff --git a/repo/content/content_manager_iterate.go b/repo/content/content_manager_iterate.go index 0f5e0e047..315e6ed45 100644 --- a/repo/content/content_manager_iterate.go +++ b/repo/content/content_manager_iterate.go @@ -227,8 +227,8 @@ func(ci Info) error { return nil } -// IterateUnreferencedBlobs returns the list of unreferenced storage blobs. -func (bm *WriteManager) IterateUnreferencedBlobs(ctx context.Context, blobPrefixes []blob.ID, parallelism int, callback func(blob.Metadata) error) error { +// IterateUnreferencedPacks returns the list of unreferenced storage blobs. +func (bm *WriteManager) IterateUnreferencedPacks(ctx context.Context, blobPrefixes []blob.ID, parallelism int, callback func(blob.Metadata) error) error { usedPacks, err := bigmap.NewSet(ctx) if err != nil { return errors.Wrap(err, "new set") @@ -253,8 +253,6 @@ func(pi PackInfo) error { return errors.Wrap(err, "error iterating packs") } - unusedCount := new(int32) - if len(blobPrefixes) == 0 { blobPrefixes = PackBlobIDPrefixes } @@ -274,20 +272,22 @@ func(pi PackInfo) error { bm.log.Debugf("scanning prefixes %v", prefixes) + var unusedCount atomic.Int32 + if err := blob.IterateAllPrefixesInParallel(ctx, parallelism, bm.st, prefixes, func(bm blob.Metadata) error { if usedPacks.Contains([]byte(bm.BlobID)) { return nil } - atomic.AddInt32(unusedCount, 1) + unusedCount.Add(1) return callback(bm) }); err != nil { return errors.Wrap(err, "error iterating blobs") } - bm.log.Debugf("found %v pack blobs not in use", *unusedCount) + bm.log.Debugf("found %v pack blobs not in use", unusedCount.Load()) return nil } diff --git a/repo/content/content_manager_test.go b/repo/content/content_manager_test.go index bda1cccba..29aa34104 100644 --- a/repo/content/content_manager_test.go +++ b/repo/content/content_manager_test.go @@ -1774,7 +1774,7 @@ func verifyUnreferencedBlobsCount(ctx context.Context, t *testing.T, bm *WriteMa var unrefCount int32 - err := bm.IterateUnreferencedBlobs(ctx, nil, 1, func(_ blob.Metadata) error { + err := bm.IterateUnreferencedPacks(ctx, nil, 1, func(_ blob.Metadata) error { atomic.AddInt32(&unrefCount, 1) return nil }) diff --git a/repo/content/index/index_builder.go b/repo/content/index/index_builder.go index e6216d8d2..1d77de03d 100644 --- a/repo/content/index/index_builder.go +++ b/repo/content/index/index_builder.go @@ -4,6 +4,7 @@ "crypto/rand" "hash/fnv" "io" + "maps" "runtime" "sort" "sync" @@ -25,17 +26,7 @@ type BuilderCreator interface { // Clone returns a deep Clone of the Builder. func (b Builder) Clone() Builder { - if b == nil { - return nil - } - - r := Builder{} - - for k, v := range b { - r[k] = v - } - - return r + return maps.Clone(b) } // Add adds a new entry to the builder or conditionally replaces it if the timestamp is greater. diff --git a/repo/maintenance/blob_gc.go b/repo/maintenance/blob_gc.go index 069d5f0f5..b295e97d0 100644 --- a/repo/maintenance/blob_gc.go +++ b/repo/maintenance/blob_gc.go @@ -87,7 +87,7 @@ func DeleteUnreferencedBlobs(ctx context.Context, rep repo.DirectRepositoryWrite // iterate all pack blobs + session blobs and keep ones that are too young or // belong to alive sessions. - if err := rep.ContentManager().IterateUnreferencedBlobs(ctx, prefixes, opt.Parallel, func(bm blob.Metadata) error { + if err := rep.ContentManager().IterateUnreferencedPacks(ctx, prefixes, opt.Parallel, func(bm blob.Metadata) error { if bm.Timestamp.After(cutoffTime) { log(ctx).Debugf(" preserving %v because it was created after maintenance started", bm.BlobID) return nil diff --git a/repo/maintenance/maintenance_run.go b/repo/maintenance/maintenance_run.go index 0a3168c67..1f4557b05 100644 --- a/repo/maintenance/maintenance_run.go +++ b/repo/maintenance/maintenance_run.go @@ -411,7 +411,7 @@ func runTaskDropDeletedContentsFull(ctx context.Context, runParams RunParameters } if safeDropTime.IsZero() { - log(ctx).Info("Not enough time has passed since previous successful Snapshot GC. Will try again next time.") + log(ctx).Info("Not forgetting deleted contents yet since not enough time has passed since previous successful Snapshot GC. Will try again next time.") return nil } diff --git a/snapshot/manifest.go b/snapshot/manifest.go index 8f3dcf681..01d6d6381 100644 --- a/snapshot/manifest.go +++ b/snapshot/manifest.go @@ -23,7 +23,7 @@ type Manifest struct { StartTime fs.UTCTimestamp `json:"startTime"` EndTime fs.UTCTimestamp `json:"endTime"` - Stats Stats `json:"stats,omitempty"` + Stats Stats `json:"stats,omitzero"` IncompleteReason string `json:"incomplete,omitempty"` RootEntry *DirEntry `json:"rootEntry"`