Files
kopia/cli/command_index_inspect.go
Jarek Kowalski 9bf9cac7fb refactor(repository): ensure we always parse content.ID and object.ID (#1960)
* refactor(repository): ensure we always parse content.ID and object.ID

This changes the types to be incompatible with string to prevent direct
conversion to and from string.

This has the additional benefit of reducing number of memory allocations
and bytes for all IDs.

content.ID went from 2 allocations to 1:
   typical case 32 characters + 16 bytes per-string overhead
   worst-case 65 characters + 16 bytes per-string overhead
   now: 34 bytes

object.ID went from 2 allocations to 1:
   typical case 32 characters + 16 bytes per-string overhead
   worst-case 65 characters + 16 bytes per-string overhead
   now: 36 bytes

* move index.{ID,IDRange} methods to separate files

* replaced index.IDFromHash with content.IDFromHash externally

* minor tweaks and additional tests

* Update repo/content/index/id_test.go

Co-authored-by: Julio Lopez <1953782+julio-lopez@users.noreply.github.com>

* Update repo/content/index/id_test.go

Co-authored-by: Julio Lopez <1953782+julio-lopez@users.noreply.github.com>

* pr feedback

* post-merge fixes

* pr feedback

* pr feedback

* fixed subtle regression in sortedContents()

This was actually not producing invalid results because of how base36
works, just not sorting as efficiently as it could.

Co-authored-by: Julio Lopez <1953782+julio-lopez@users.noreply.github.com>
2022-05-25 14:15:56 +00:00

176 lines
4.2 KiB
Go

package cli
import (
"context"
"sync"
"github.com/pkg/errors"
"golang.org/x/sync/errgroup"
"github.com/kopia/kopia/internal/gather"
"github.com/kopia/kopia/repo"
"github.com/kopia/kopia/repo/blob"
"github.com/kopia/kopia/repo/content"
)
type commandIndexInspect struct {
all bool
active bool
blobIDs []string
contentIDs []string
parallel int
out textOutput
}
func (c *commandIndexInspect) setup(svc appServices, parent commandParent) {
cmd := parent.Command("inspect", "Inspect index blob")
cmd.Flag("all", "Inspect all index blobs in the repository, including inactive").BoolVar(&c.all)
cmd.Flag("active", "Inspect all active index blobs").BoolVar(&c.active)
cmd.Flag("content-id", "Inspect all active index blobs").StringsVar(&c.contentIDs)
cmd.Flag("parallel", "Parallelism").Default("8").IntVar(&c.parallel)
cmd.Arg("blobs", "Names of index blobs to inspect").StringsVar(&c.blobIDs)
cmd.Action(svc.directRepositoryReadAction(c.run))
c.out.setup(svc)
}
func (c *commandIndexInspect) run(ctx context.Context, rep repo.DirectRepository) error {
output := make(chan indexBlobPlusContentInfo)
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
c.dumpIndexBlobEntries(output)
}()
err := c.runWithOutput(ctx, rep, output)
close(output)
wg.Wait()
return err
}
func (c *commandIndexInspect) runWithOutput(ctx context.Context, rep repo.DirectRepository, output chan indexBlobPlusContentInfo) error {
switch {
case c.all:
return c.inspectAllBlobs(ctx, rep, true, output)
case c.active:
return c.inspectAllBlobs(ctx, rep, false, output)
case len(c.blobIDs) > 0:
for _, indexBlobID := range c.blobIDs {
if err := c.inspectSingleIndexBlob(ctx, rep, blob.ID(indexBlobID), output); err != nil {
return err
}
}
default:
return errors.Errorf("must pass either --all, --active or provide a list of blob IDs to inspect")
}
return nil
}
func (c *commandIndexInspect) inspectAllBlobs(ctx context.Context, rep repo.DirectRepository, includeInactive bool, output chan indexBlobPlusContentInfo) error {
indexes, err := rep.IndexBlobs(ctx, includeInactive)
if err != nil {
return errors.Wrap(err, "error listing index blobs")
}
indexesCh := make(chan content.IndexBlobInfo, len(indexes))
for _, bm := range indexes {
indexesCh <- bm
}
close(indexesCh)
var eg errgroup.Group
for i := 0; i < c.parallel; i++ {
eg.Go(func() error {
for bm := range indexesCh {
if err := c.inspectSingleIndexBlob(ctx, rep, bm.BlobID, output); err != nil {
return err
}
}
return nil
})
}
// nolint:wrapcheck
return eg.Wait()
}
func (c *commandIndexInspect) dumpIndexBlobEntries(entries chan indexBlobPlusContentInfo) {
for ent := range entries {
ci := ent.contentInfo
bm := ent.indexBlob
state := "created"
if ci.GetDeleted() {
state = "deleted"
}
if !c.shouldInclude(ci) {
continue
}
c.out.printStdout("%v %v %v %v %v %v %v %v\n",
formatTimestampPrecise(bm.Timestamp), bm.BlobID,
ci.GetContentID(), state, formatTimestampPrecise(ci.Timestamp()), ci.GetPackBlobID(), ci.GetPackOffset(), ci.GetPackedLength())
}
}
func (c *commandIndexInspect) shouldInclude(ci content.Info) bool {
if len(c.contentIDs) == 0 {
return true
}
contentID := ci.GetContentID().String()
for _, cid := range c.contentIDs {
if cid == contentID {
return true
}
}
return false
}
type indexBlobPlusContentInfo struct {
indexBlob blob.Metadata
contentInfo content.Info
}
func (c *commandIndexInspect) inspectSingleIndexBlob(ctx context.Context, rep repo.DirectRepository, blobID blob.ID, output chan indexBlobPlusContentInfo) error {
log(ctx).Debugf("Inspecting blob %v...", blobID)
bm, err := rep.BlobReader().GetMetadata(ctx, blobID)
if err != nil {
return errors.Wrapf(err, "unable to get metadata for %v", blobID)
}
var data gather.WriteBuffer
defer data.Close()
if err = rep.BlobReader().GetBlob(ctx, blobID, 0, -1, &data); err != nil {
return errors.Wrapf(err, "unable to get data for %v", blobID)
}
entries, err := content.ParseIndexBlob(ctx, blobID, data.Bytes(), rep.Crypter())
if err != nil {
return errors.Wrapf(err, "unable to recover index from %v", blobID)
}
for _, ent := range entries {
output <- indexBlobPlusContentInfo{bm, content.ToInfoStruct(ent)}
}
return nil
}