mirror of
https://github.com/kopia/kopia.git
synced 2026-01-02 11:37:54 -05:00
* content: added support for cache of own writes Thi keeps track of which blobs (n and m) have been written by the local repository client, so that even if the storage listing is eventually consistent (as in S3), we get somewhat sane behavior. Note that this is still assumming read-after-create semantics, which S3 also guarantees, otherwise it's very hard to do anything useful. * compaction: support for compaction logs Instead of compaction immediately deleting source index blobs, we now write log entries (with `m` prefix) which are merged on reads and applied only if the blob list includes all inputs and outputs, in which case the inputs are discarded since they are known to have been superseded by the outputs. This addresses eventual consistency issues in stores such as S3, which don't guarantee list-after-put or list-after-delete. With such stores the repository is ultimately eventually consistent and there's not much that can be done about it, unless we use second strongly consistent storage (such as GCS) for the index only. * content: updated list cache to cache both `n` and `m` * repo: fixed cache clear on windows Clearing cache requires closing repository first, as Windows is holding the files locked. This requires ability to close the repository twice. * content: refactored index blob management into indexBlobManager * testing: fixed blobtesting.Map storage to allow overwrites * blob: added debug output String() to blob.Metadata * testing: added indexBlobManager stress test This works by using N parallel "actors", each repeatedly performing operations on indexBlobManagers all sharing single eventually consistent storage. Each actor runs in a loop and randomly selects between: - *reading* all contents in indexes and verifying that it includes all contents written by the actor so far and that contents are correctly marked as deleted - *creating* new contents - *deleting* one of previously-created contents (by the same actor) - *compacting* all index files into one The test runs on accelerated time (every read of time moves it by 0.1 seconds) and simulates several hours of running. In case of a failure, the log should provide enough debugging information to trace the exact sequence of events leading up to the failure - each log line is prefixed with actorID and all storage access is logged. * makefile: increase test timeout * content: fixed index blob manager race The race is where if we delete compaction log too early, it may lead to previously deleted contents becoming temporarily live again to an outside observer. Added test case that reproduces the issue, verified that it fails without the fix and passed with one. * testing: improvements to TestIndexBlobManagerStress test - better logging to be able to trace the root cause in case of a failure - prevented concurrent compaction which is unsafe: The sequence: 1. A creates contentA1 in INDEX-1 2. B creates contentB1 in INDEX-2 3. A deletes contentA1 in INDEX-3 4. B does compaction, but is not seeing INDEX-3 (due to EC or simply because B started read before #3 completed), so it writes INDEX-4==merge(INDEX-1,INDEX-2) * INDEX-4 has contentA1 as active 5. A does compaction but it's not seeing INDEX-4 yet (due to EC or because read started before #4), so it drops contentA1, writes INDEX-5=merge(INDEX-1,INDEX-2,INDEX-3) * INDEX-5 does not have contentA1 7. C sees INDEX-5 and INDEX-5 and merge(INDEX-4,INDEX-5) contains contentA1 which is wrong, because A has been deleted (and there's no record of it anywhere in the system) * content: when building pack index ensure index bytes are different each time by adding 32 random bytes
62 lines
1.9 KiB
Go
62 lines
1.9 KiB
Go
package cli
|
|
|
|
import (
|
|
"context"
|
|
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/kopia/kopia/internal/units"
|
|
"github.com/kopia/kopia/repo"
|
|
)
|
|
|
|
var (
|
|
cacheSetParamsCommand = cacheCommands.Command("set", "Sets parameters local caching of repository data")
|
|
|
|
cacheSetDirectory = cacheSetParamsCommand.Flag("cache-directory", "Directory where to store cache files").String()
|
|
cacheSetContentCacheSizeMB = cacheSetParamsCommand.Flag("content-cache-size-mb", "Size of local content cache").PlaceHolder("MB").Default("-1").Int64()
|
|
cacheSetMaxMetadataCacheSizeMB = cacheSetParamsCommand.Flag("metadata-cache-size-mb", "Size of local metadata cache").PlaceHolder("MB").Default("-1").Int64()
|
|
cacheSetMaxListCacheDuration = cacheSetParamsCommand.Flag("max-list-cache-duration", "Duration of index cache").Default("-1ns").Duration()
|
|
)
|
|
|
|
func runCacheSetCommand(ctx context.Context, rep *repo.DirectRepository) error {
|
|
opts := rep.Content.CachingOptions.CloneOrDefault()
|
|
|
|
changed := 0
|
|
|
|
if v := *cacheSetDirectory; v != "" {
|
|
log(ctx).Infof("setting cache directory to %v", v)
|
|
opts.CacheDirectory = v
|
|
changed++
|
|
}
|
|
|
|
if v := *cacheSetContentCacheSizeMB; v != -1 {
|
|
v *= 1e6 // convert MB to bytes
|
|
log(ctx).Infof("changing content cache size to %v", units.BytesStringBase10(v))
|
|
opts.MaxCacheSizeBytes = v
|
|
changed++
|
|
}
|
|
|
|
if v := *cacheSetMaxMetadataCacheSizeMB; v != -1 {
|
|
v *= 1e6 // convert MB to bytes
|
|
log(ctx).Infof("changing metadata cache size to %v", units.BytesStringBase10(v))
|
|
opts.MaxMetadataCacheSizeBytes = v
|
|
changed++
|
|
}
|
|
|
|
if v := *cacheSetMaxListCacheDuration; v != -1 {
|
|
log(ctx).Infof("changing list cache duration to %v", v)
|
|
opts.MaxListCacheDurationSec = int(v.Seconds())
|
|
changed++
|
|
}
|
|
|
|
if changed == 0 {
|
|
return errors.Errorf("no changes")
|
|
}
|
|
|
|
return rep.SetCachingConfig(ctx, opts)
|
|
}
|
|
|
|
func init() {
|
|
cacheSetParamsCommand.Action(directRepositoryAction(runCacheSetCommand))
|
|
}
|