From e19d6e993d5ed6f66322293ee421e2561937b27e Mon Sep 17 00:00:00 2001 From: Simon Frei Date: Wed, 27 Jan 2021 19:25:34 +0100 Subject: [PATCH] lib/fs: Cache all real-case results (fixes #7270) (#7286) --- go.mod | 1 + go.sum | 1 + lib/config/config_test.go | 2 +- lib/fs/casefs.go | 145 ++++++++++++++++++++++---------------- lib/fs/casefs_test.go | 59 ++++++++++++++-- 5 files changed, 141 insertions(+), 67 deletions(-) diff --git a/go.mod b/go.mod index e7c68ea0f..3e8985592 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,7 @@ require ( github.com/gogo/protobuf v1.3.1 github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e github.com/greatroar/blobloom v0.5.0 + github.com/hashicorp/golang-lru v0.5.1 github.com/jackpal/gateway v1.0.6 github.com/jackpal/go-nat-pmp v1.0.2 github.com/julienschmidt/httprouter v1.3.0 diff --git a/go.sum b/go.sum index ce23e9d4a..4ad4c4794 100644 --- a/go.sum +++ b/go.sum @@ -212,6 +212,7 @@ github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/b github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= diff --git a/lib/config/config_test.go b/lib/config/config_test.go index 14df9ca78..39b9076b5 100644 --- a/lib/config/config_test.go +++ b/lib/config/config_test.go @@ -509,7 +509,7 @@ func TestFolderCheckPath(t *testing.T) { } if err := cfg.CheckPath(); testcase.err != err { - t.Errorf("unexpected error in case %s: %s != %s", testcase.path, err, testcase.err) + t.Errorf("unexpected error in case %s: %s != %v", testcase.path, err, testcase.err) } } } diff --git a/lib/fs/casefs.go b/lib/fs/casefs.go index 1c2c5fcc7..c2cc12b90 100644 --- a/lib/fs/casefs.go +++ b/lib/fs/casefs.go @@ -14,11 +14,14 @@ import ( "strings" "sync" "time" + + lru "github.com/hashicorp/golang-lru" ) const ( // How long to consider cached dirnames valid - caseCacheTimeout = time.Second + caseCacheTimeout = time.Second + caseCacheItemLimit = 4 << 10 ) type ErrCaseConflict struct { @@ -351,96 +354,118 @@ func (f *caseFilesystem) checkCaseExisting(name string) error { } type defaultRealCaser struct { - fs Filesystem - root *caseNode - mut sync.RWMutex + fs Filesystem + cache caseCache } func newDefaultRealCaser(fs Filesystem) *defaultRealCaser { + cache, err := lru.New2Q(caseCacheItemLimit) + // New2Q only errors if given invalid parameters, which we don't. + if err != nil { + panic(err) + } caser := &defaultRealCaser{ - fs: fs, - root: &caseNode{name: "."}, + fs: fs, + cache: caseCache{ + TwoQueueCache: cache, + }, } return caser } func (r *defaultRealCaser) realCase(name string) (string, error) { - out := "." - if name == out { - return out, nil + realName := "." + if name == realName { + return realName, nil } - r.mut.Lock() - defer r.mut.Unlock() - - node := r.root for _, comp := range strings.Split(name, string(PathSeparator)) { - if node.dirNames == nil || node.expires.Before(time.Now()) { - // Haven't called DirNames yet, or the node has expired + node := r.cache.getExpireAdd(realName) - var err error - node.dirNames, err = r.fs.DirNames(out) + node.once.Do(func() { + dirNames, err := r.fs.DirNames(realName) if err != nil { - return "", err + r.cache.Remove(realName) + node.err = err + return } - node.dirNamesLower = make([]string, len(node.dirNames)) - for i, n := range node.dirNames { - node.dirNamesLower[i] = UnicodeLowercase(n) + num := len(dirNames) + node.children = make(map[string]struct{}, num) + node.lowerToReal = make(map[string]string, num) + lastLower := "" + for _, n := range dirNames { + node.children[n] = struct{}{} + lower := UnicodeLowercase(n) + if lower != lastLower { + node.lowerToReal[lower] = n + lastLower = n + } } - - node.expires = time.Now().Add(caseCacheTimeout) - node.child = nil + }) + if node.err != nil { + return "", node.err } - // If we don't already have a correct cached child, try to find it. - if node.child == nil || node.child.name != comp { - // Actually loop dirNames to search for a match. - n, err := findCaseInsensitiveMatch(comp, node.dirNames, node.dirNamesLower) - if err != nil { - return "", err + // Try to find a direct or case match + if _, ok := node.children[comp]; !ok { + comp, ok = node.lowerToReal[UnicodeLowercase(comp)] + if !ok { + return "", ErrNotExist } - node.child = &caseNode{name: n} } - node = node.child - out = filepath.Join(out, node.name) + realName = filepath.Join(realName, comp) } - return out, nil + return realName, nil } func (r *defaultRealCaser) dropCache() { - r.mut.Lock() - r.root = &caseNode{name: "."} - r.mut.Unlock() + r.cache.Purge() } -// Both name and the key to children are "Real", case resolved names of the path +func newCaseNode() *caseNode { + return &caseNode{ + expires: time.Now().Add(caseCacheTimeout), + } +} + +// The keys to children are "real", case resolved names of the path // component this node represents (i.e. containing no path separator). -// The key to results is also a path component, but as given to RealCase, not -// case resolved. +// lowerToReal is a map of lowercase path components (as in UnicodeLowercase) +// to their corresponding "real", case resolved names. +// A node is created empty and populated using once. If an error occurs the node +// is removed from cache and the error stored in err, such that anyone that +// already got the node doesn't try to access the nil maps. type caseNode struct { - name string - expires time.Time - dirNames []string - dirNamesLower []string - child *caseNode + expires time.Time + lowerToReal map[string]string + children map[string]struct{} + once sync.Once + err error } -func findCaseInsensitiveMatch(name string, names, namesLower []string) (string, error) { - lower := UnicodeLowercase(name) - candidate := "" - for i, n := range names { - if n == name { - return n, nil - } - if candidate == "" && namesLower[i] == lower { - candidate = n - } - } - if candidate == "" { - return "", ErrNotExist - } - return candidate, nil +type caseCache struct { + *lru.TwoQueueCache + mut sync.Mutex +} + +// getExpireAdd gets an entry for the given key. If no entry exists, or it is +// expired a new one is created and added to the cache. +func (c *caseCache) getExpireAdd(key string) *caseNode { + c.mut.Lock() + defer c.mut.Unlock() + v, ok := c.Get(key) + if !ok { + node := newCaseNode() + c.Add(key, node) + return node + } + node := v.(*caseNode) + if node.expires.Before(time.Now()) { + node = newCaseNode() + c.Add(key, node) + } + return node } diff --git a/lib/fs/casefs_test.go b/lib/fs/casefs_test.go index 0ea8b8011..ebf776397 100644 --- a/lib/fs/casefs_test.go +++ b/lib/fs/casefs_test.go @@ -163,7 +163,7 @@ func BenchmarkWalkCaseFakeFS100k(b *testing.B) { b.Run("rawfs", func(b *testing.B) { fakefs := unwrapFilesystem(fsys).(*fakefs) fakefs.resetCounters() - benchmarkWalkFakeFS(b, fsys, paths) + benchmarkWalkFakeFS(b, fsys, paths, 0, "") fakefs.reportMetricsPerOp(b) fakefs.reportMetricsPer(b, entries, "entry") b.ReportAllocs() @@ -176,14 +176,37 @@ func BenchmarkWalkCaseFakeFS100k(b *testing.B) { } fakefs := unwrapFilesystem(fsys).(*fakefs) fakefs.resetCounters() - benchmarkWalkFakeFS(b, casefs, paths) + benchmarkWalkFakeFS(b, casefs, paths, 0, "") + fakefs.reportMetricsPerOp(b) + fakefs.reportMetricsPer(b, entries, "entry") + b.ReportAllocs() + }) + var otherOpPath string + sep := string(PathSeparator) + longest := 0 + for _, p := range paths { + if length := len(strings.Split(p, sep)); length > longest { + otherOpPath = p + longest = length + } + } + otherOpEvery := 1000 + b.Run(fmt.Sprintf("casefs-otherOpEvery%v", otherOpEvery), func(b *testing.B) { + // Construct the casefs manually or it will get cached and the benchmark is invalid. + casefs := &caseFilesystem{ + Filesystem: fsys, + realCaser: newDefaultRealCaser(fsys), + } + fakefs := unwrapFilesystem(fsys).(*fakefs) + fakefs.resetCounters() + benchmarkWalkFakeFS(b, casefs, paths, otherOpEvery, otherOpPath) fakefs.reportMetricsPerOp(b) fakefs.reportMetricsPer(b, entries, "entry") b.ReportAllocs() }) } -func benchmarkWalkFakeFS(b *testing.B, fsys Filesystem, paths []string) { +func benchmarkWalkFakeFS(b *testing.B, fsys Filesystem, paths []string, otherOpEvery int, otherOpPath string) { // Simulate a scanner pass over the filesystem. First walk it to // discover all names, then stat each name individually to check if it's // been deleted or not (pretending that they all existed in the @@ -194,7 +217,7 @@ func benchmarkWalkFakeFS(b *testing.B, fsys Filesystem, paths []string) { t0 := time.Now() for i := 0; i < b.N; i++ { - if err := doubleWalkFS(fsys, paths); err != nil { + if err := doubleWalkFSWithOtherOps(fsys, paths, otherOpEvery, otherOpPath); err != nil { b.Fatal(err) } } @@ -250,15 +273,39 @@ func TestStressCaseFS(t *testing.T) { } func doubleWalkFS(fsys Filesystem, paths []string) error { + return doubleWalkFSWithOtherOps(fsys, paths, 0, "") +} + +func doubleWalkFSWithOtherOps(fsys Filesystem, paths []string, otherOpEvery int, otherOpPath string) error { + i := 0 if err := fsys.Walk("/", func(path string, info FileInfo, err error) error { + i++ + if otherOpEvery != 0 && i%otherOpEvery == 0 { + // l.Infoln("AAA", otherOpPath) + if _, err := fsys.Lstat(otherOpPath); err != nil { + return err + } + } + // l.Infoln("CCC", path) return err }); err != nil { return err } for _, p := range paths { - if _, err := fsys.Lstat(p); err != nil { - return err + for p != "." { + i++ + if otherOpEvery != 0 && i%otherOpEvery == 0 { + if _, err := fsys.Lstat(otherOpPath); err != nil { + // l.Infoln("AAA", otherOpPath) + return err + } + } + // l.Infoln("CCC", p) + if _, err := fsys.Lstat(p); err != nil { + return err + } + p = filepath.Dir(p) } } return nil