Files
kopia/fs/cachefs/cache.go
Jarek Kowalski 9bf9cac7fb refactor(repository): ensure we always parse content.ID and object.ID (#1960)
* refactor(repository): ensure we always parse content.ID and object.ID

This changes the types to be incompatible with string to prevent direct
conversion to and from string.

This has the additional benefit of reducing number of memory allocations
and bytes for all IDs.

content.ID went from 2 allocations to 1:
   typical case 32 characters + 16 bytes per-string overhead
   worst-case 65 characters + 16 bytes per-string overhead
   now: 34 bytes

object.ID went from 2 allocations to 1:
   typical case 32 characters + 16 bytes per-string overhead
   worst-case 65 characters + 16 bytes per-string overhead
   now: 36 bytes

* move index.{ID,IDRange} methods to separate files

* replaced index.IDFromHash with content.IDFromHash externally

* minor tweaks and additional tests

* Update repo/content/index/id_test.go

Co-authored-by: Julio Lopez <1953782+julio-lopez@users.noreply.github.com>

* Update repo/content/index/id_test.go

Co-authored-by: Julio Lopez <1953782+julio-lopez@users.noreply.github.com>

* pr feedback

* post-merge fixes

* pr feedback

* pr feedback

* fixed subtle regression in sortedContents()

This was actually not producing invalid results because of how base36
works, just not sorting as efficiently as it could.

Co-authored-by: Julio Lopez <1953782+julio-lopez@users.noreply.github.com>
2022-05-25 14:15:56 +00:00

203 lines
4.3 KiB
Go

package cachefs
import (
"context"
"sync"
"time"
"github.com/kopia/kopia/fs"
"github.com/kopia/kopia/internal/clock"
"github.com/kopia/kopia/repo/logging"
"github.com/kopia/kopia/repo/object"
)
var log = logging.Module("kopia/cachefs")
const dirCacheExpiration = 24 * time.Hour
type cacheEntry struct {
id string
prev *cacheEntry
next *cacheEntry
expireAfter time.Time
entries fs.Entries
}
// Cache maintains in-memory cache of recently-read data to speed up filesystem operations.
type Cache struct {
mu sync.Locker
totalDirectoryEntries int
maxDirectories int
maxDirectoryEntries int
data map[string]*cacheEntry
// Doubly-linked list of entries, in access time order
head *cacheEntry
tail *cacheEntry
debug bool
}
func (c *Cache) moveToHead(e *cacheEntry) {
if e == c.head {
// Already at head, no change.
return
}
c.remove(e)
c.addToHead(e)
}
func (c *Cache) addToHead(e *cacheEntry) {
if c.head != nil {
e.next = c.head
c.head.prev = e
c.head = e
} else {
c.head = e
c.tail = e
}
}
func (c *Cache) remove(e *cacheEntry) {
if e.prev == nil {
// First element.
c.head = e.next
} else {
e.prev.next = e.next
}
if e.next == nil {
// Last element
c.tail = e.prev
} else {
e.next.prev = e.prev
}
}
// Loader provides data to be stored in the cache.
type Loader func(ctx context.Context) (fs.Entries, error)
// EntryWrapper allows an fs.Entry to be modified before inserting into the cache.
type EntryWrapper func(entry fs.Entry) fs.Entry
// Readdir reads the contents of a provided directory using ObjectID of a directory (if any) to cache
// the results.
func (c *Cache) Readdir(ctx context.Context, d fs.Directory, w EntryWrapper) (fs.Entries, error) {
if h, ok := d.(object.HasObjectID); ok {
cacheID := h.ObjectID().String()
return c.getEntries(ctx, cacheID, dirCacheExpiration, d.Readdir, w)
}
// nolint:wrapcheck
return d.Readdir(ctx)
}
func (c *Cache) getEntriesFromCacheLocked(ctx context.Context, id string) fs.Entries {
if v, ok := c.data[id]; id != "" && ok {
if clock.Now().Before(v.expireAfter) {
c.moveToHead(v)
if c.debug {
log(ctx).Debugf("cache hit for %q (valid until %v)", id, v.expireAfter)
}
return v.entries
}
// time expired
if c.debug {
log(ctx).Debugf("removing expired cache entry %q after %v", id, v.expireAfter)
}
c.removeEntryLocked(v)
}
return nil
}
// getEntries consults the cache and either retrieves the contents of directory listing from the cache
// or invokes the provides callback and adds the results to cache.
func (c *Cache) getEntries(ctx context.Context, id string, expirationTime time.Duration, cb Loader, w EntryWrapper) (fs.Entries, error) {
if c == nil {
return cb(ctx)
}
c.mu.Lock()
defer c.mu.Unlock()
if entries := c.getEntriesFromCacheLocked(ctx, id); entries != nil {
return entries, nil
}
if c.debug {
log(ctx).Debugf("cache miss for %q", id)
}
raw, err := cb(ctx)
if err != nil {
return nil, err
}
wrapped := make(fs.Entries, len(raw))
for i, entry := range raw {
wrapped[i] = w(entry)
}
if len(wrapped) > c.maxDirectoryEntries {
// no point caching since it would not fit anyway, just return it.
return wrapped, nil
}
entry := &cacheEntry{
id: id,
entries: wrapped,
expireAfter: clock.Now().Add(expirationTime),
}
c.addToHead(entry)
c.data[id] = entry
c.totalDirectoryEntries += len(wrapped)
for c.totalDirectoryEntries > c.maxDirectoryEntries || len(c.data) > c.maxDirectories {
c.removeEntryLocked(c.tail)
}
return raw, nil
}
func (c *Cache) removeEntryLocked(toremove *cacheEntry) {
c.remove(toremove)
c.totalDirectoryEntries -= len(toremove.entries)
delete(c.data, toremove.id)
}
// Options specifies behavior of filesystem Cache.
type Options struct {
MaxCachedDirectories int
MaxCachedEntries int
}
// nolint:gochecknoglobals
var defaultOptions = &Options{
MaxCachedDirectories: 1000, //nolint:gomnd
MaxCachedEntries: 100000, //nolint:gomnd
}
// NewCache creates filesystem cache.
func NewCache(options *Options) *Cache {
if options == nil {
options = defaultOptions
}
return &Cache{
mu: &sync.Mutex{},
data: make(map[string]*cacheEntry),
maxDirectories: options.MaxCachedDirectories,
maxDirectoryEntries: options.MaxCachedEntries,
}
}