mirror of
https://github.com/kopia/kopia.git
synced 2026-01-28 16:23:04 -05:00
* blob: changed default shards from {3,3} to {1,3}
Turns out for very large repository around 100TB (5M blobs),
we end up creating max ~16M directories which is way too much
and slows down listing. Currently each leaf directory only has a handful
of files.
Simple sharding of {3} should work much better and will end up creating
directories with meaningful shard sizes - 12 K files per directory
should not be too slow and will reduce the overhead of listing by
4096 times.
The change is done in a backwards-compatible way and will respect
custom sharding (.shards) file written by previous 0.9 builds
as well as older repositories that don't have the .shards file (which
we assume to be {3,3}).
* fixed compat tests
112 lines
3.1 KiB
Go
112 lines
3.1 KiB
Go
package cli
|
|
|
|
import (
|
|
"context"
|
|
|
|
"github.com/alecthomas/kingpin"
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/kopia/kopia/internal/gather"
|
|
"github.com/kopia/kopia/repo"
|
|
"github.com/kopia/kopia/repo/blob"
|
|
"github.com/kopia/kopia/repo/content"
|
|
)
|
|
|
|
type commandRepositoryRepair struct {
|
|
repairCommandRecoverFormatBlob string
|
|
repairCommandRecoverFormatBlobPrefixes []string
|
|
repairDryRun bool
|
|
}
|
|
|
|
func (c *commandRepositoryRepair) setup(svc advancedAppServices, parent commandParent) {
|
|
cmd := parent.Command("repair", "Repairs repository.")
|
|
|
|
cmd.Flag("recover-format", "Recover format blob from a copy").Default("auto").EnumVar(&c.repairCommandRecoverFormatBlob, "auto", "yes", "no")
|
|
cmd.Flag("recover-format-block-prefixes", "Prefixes of file names").StringsVar(&c.repairCommandRecoverFormatBlobPrefixes)
|
|
cmd.Flag("dry-run", "Do not modify repository").Short('n').BoolVar(&c.repairDryRun)
|
|
|
|
for _, prov := range storageProviders {
|
|
f := prov.newFlags()
|
|
cc := cmd.Command(prov.name, "Repair repository in "+prov.description)
|
|
f.setup(svc, cc)
|
|
cc.Action(func(_ *kingpin.ParseContext) error {
|
|
ctx := svc.rootContext()
|
|
st, err := f.connect(ctx, false, 0)
|
|
if err != nil {
|
|
return errors.Wrap(err, "can't connect to storage")
|
|
}
|
|
|
|
return c.runRepairCommandWithStorage(ctx, st)
|
|
})
|
|
}
|
|
}
|
|
|
|
func packBlockPrefixes() []string {
|
|
var str []string
|
|
|
|
for _, p := range content.PackBlobIDPrefixes {
|
|
str = append(str, string(p))
|
|
}
|
|
|
|
return str
|
|
}
|
|
|
|
func (c *commandRepositoryRepair) runRepairCommandWithStorage(ctx context.Context, st blob.Storage) error {
|
|
switch c.repairCommandRecoverFormatBlob {
|
|
case "auto":
|
|
log(ctx).Infof("looking for format blob...")
|
|
|
|
var tmp gather.WriteBuffer
|
|
defer tmp.Close()
|
|
|
|
if err := st.GetBlob(ctx, repo.FormatBlobID, 0, -1, &tmp); err == nil {
|
|
log(ctx).Infof("format blob already exists, not recovering, pass --recover-format=yes")
|
|
return nil
|
|
}
|
|
|
|
case "no":
|
|
return nil
|
|
}
|
|
|
|
prefixes := c.repairCommandRecoverFormatBlobPrefixes
|
|
if len(prefixes) == 0 {
|
|
prefixes = packBlockPrefixes()
|
|
}
|
|
|
|
return c.recoverFormatBlob(ctx, st, prefixes)
|
|
}
|
|
|
|
func (c *commandRepositoryRepair) recoverFormatBlob(ctx context.Context, st blob.Storage, prefixes []string) error {
|
|
errSuccess := errors.New("success")
|
|
|
|
for _, prefix := range prefixes {
|
|
err := st.ListBlobs(ctx, blob.ID(prefix), func(bi blob.Metadata) error {
|
|
log(ctx).Infof("looking for replica of format blob in %v...", bi.BlobID)
|
|
if b, err := repo.RecoverFormatBlob(ctx, st, bi.BlobID, bi.Length); err == nil {
|
|
if !c.repairDryRun {
|
|
if puterr := st.PutBlob(ctx, repo.FormatBlobID, gather.FromSlice(b), blob.PutOptions{}); puterr != nil {
|
|
return errors.Wrap(puterr, "error writing format blob")
|
|
}
|
|
}
|
|
|
|
log(ctx).Infof("recovered replica block from %v", bi.BlobID)
|
|
|
|
return errSuccess
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
switch {
|
|
case err == nil:
|
|
// do nothing
|
|
case errors.Is(err, errSuccess):
|
|
return nil
|
|
default:
|
|
return errors.Wrap(err, "unexpected error when listing blobs")
|
|
}
|
|
}
|
|
|
|
return errors.New("could not find a replica of a format blob")
|
|
}
|