vfs: fix slow nfs serve by adding --vfs-handle-caching

Add a configurable grace period (default 5s) that delays closing file
handles and downloaders when the last handle closes. If a new handle
opens within the grace period, it reuses the existing resources.

This fixes 40x performance degradation with serve nfs vs serve sftp
caused by go-nfs opening/reading/closing on every NFS READ RPC, which
destroyed read-ahead prefetch before it could accumulate.

The grace period only applies to non-dirty files so that writeback
proceeds immediately on close.

Fixes #9251
This commit is contained in:
Nick Craig-Wood
2026-03-12 12:55:45 +00:00
parent 654df91375
commit 0db3e7a2a0
4 changed files with 191 additions and 9 deletions

View File

@@ -113,6 +113,9 @@ func newTestCache(t *testing.T) (r *fstest.Run, c *Cache) {
// Disable synchronous write
opt.WriteBack = 0
// Disable handle caching so existing tests get immediate close behavior
opt.HandleCaching = 0
return newTestCacheOpt(t, opt)
}

View File

@@ -68,6 +68,7 @@ type Item struct {
pendingAccesses int // number of threads - cache reset not allowed if not zero
modified bool // set if the file has been modified since the last Open
beingReset bool // cache cleaner is resetting the cache file, access not allowed
graceTimer *time.Timer // timer for delayed close after grace period
}
// Info is persisted to backing store
@@ -91,6 +92,7 @@ const (
SkippedDirty ResetResult = iota // Dirty item cannot be reset
SkippedPendingAccess // Reset pending access can lead to deadlock
SkippedEmpty // Reset empty item does not save space
SkippedGrace // Item is in grace period, treat as in-use
RemovedNotInUse // Item not used. Remove instead of reset
ResetFailed // Reset failed with an error
ResetComplete // Reset completed successfully
@@ -98,7 +100,7 @@ const (
func (rr ResetResult) String() string {
return [...]string{"Dirty item skipped", "In-access item skipped", "Empty item skipped",
"Not-in-use item removed", "Item reset failed", "Item reset completed"}[rr]
"Grace period item skipped", "Not-in-use item removed", "Item reset failed", "Item reset completed"}[rr]
}
func (v Items) Len() int { return len(v) }
@@ -535,6 +537,15 @@ func (item *Item) open(o fs.Object) (err error) {
return nil
}
// Check if recovering from grace period (fd and downloaders still alive)
if item.graceTimer != nil {
item.graceTimer.Stop()
item.graceTimer = nil
// fd and downloaders still alive - reuse them
// _checkObject already called above
return nil
}
err = item._createFile(osPath)
if err != nil {
item._remove("item.open failed on _createFile, remove cache data/metadata files")
@@ -647,10 +658,7 @@ func (item *Item) Close(storeFn StoreFn) (err error) {
// defer log.Trace(item.o, "Item.Close")("err=%v", &err)
item.preAccess()
defer item.postAccess()
var (
downloaders *downloaders.Downloaders
syncWriteBack = item.c.opt.WriteBack <= 0
)
syncWriteBack := item.c.opt.WriteBack <= 0
item.mu.Lock()
defer item.mu.Unlock()
@@ -663,6 +671,44 @@ func (item *Item) Close(storeFn StoreFn) (err error) {
return nil
}
// opens == 0: check for grace period (only for non-dirty files,
// dirty files need immediate close so writeback can proceed)
gracePeriod := time.Duration(item.c.opt.HandleCaching)
if gracePeriod > 0 && !item.info.Dirty {
item.graceTimer = time.AfterFunc(gracePeriod, item.closeAfterGrace)
return nil
}
return item._actualClose(storeFn, syncWriteBack)
}
// closeAfterGrace is called by the grace timer to perform the actual close.
//
// Grace period only applies to non-dirty files, so storeFn (only
// needed for writeback) and syncWriteBack are not relevant.
func (item *Item) closeAfterGrace() {
item.mu.Lock()
defer item.mu.Unlock()
// Someone re-opened, another timer took over, or a reset
// already cancelled the grace timer and cleaned up.
if item.opens > 0 || item.graceTimer == nil {
return
}
item.graceTimer = nil
err := item._actualClose(nil, false)
if err != nil {
fs.Errorf(item.name, "vfs cache: close after grace period failed: %v", err)
}
}
// _actualClose performs the actual close operations on the item.
//
// Call with item.mu held. May temporarily unlock item.mu.
func (item *Item) _actualClose(storeFn StoreFn, syncWriteBack bool) (err error) {
var dls *downloaders.Downloaders
// Update the size on close
_, _ = item._getSize()
@@ -690,7 +736,7 @@ func (item *Item) Close(storeFn StoreFn) (err error) {
}
// Close the downloaders
if downloaders = item.downloaders; downloaders != nil {
if dls = item.downloaders; dls != nil {
item.downloaders = nil
// FIXME need to unlock to kill downloader - should we
// re-arrange locking so this isn't necessary? maybe
@@ -700,7 +746,7 @@ func (item *Item) Close(storeFn StoreFn) (err error) {
// downloader.Write calls ensure which needs the lock
// close downloader with mutex unlocked
item.mu.Unlock()
checkErr(downloaders.Close(nil))
checkErr(dls.Close(nil))
item.mu.Lock()
}
@@ -914,7 +960,7 @@ func (item *Item) RemoveNotInUse(maxAge time.Duration, emptyOnly bool) (removed
spaceFreed = 0
removed = false
if item.opens != 0 || item.info.Dirty {
if item.opens != 0 || item.info.Dirty || item.graceTimer != nil {
return
}
@@ -950,7 +996,9 @@ func (item *Item) Reset() (rr ResetResult, spaceFreed int64, err error) {
defer item.mu.Unlock()
// The item is not being used now. Just remove it instead of resetting it.
if item.opens == 0 && !item.info.Dirty {
// Items in their grace period are treated as in-use; the cache
// cleaner will pick them up on the next pass.
if item.opens == 0 && !item.info.Dirty && item.graceTimer == nil {
spaceFreed = item.info.Rs.Size()
if item._remove("Removing old cache file not in use") {
fs.Errorf(item.name, "item removed when it was writing/uploaded")
@@ -963,6 +1011,12 @@ func (item *Item) Reset() (rr ResetResult, spaceFreed int64, err error) {
return SkippedDirty, 0, nil
}
// Items in their grace period are treated as in-use; the cache
// cleaner will pick them up on the next pass.
if item.graceTimer != nil {
return SkippedGrace, 0, nil
}
/* A wait on pendingAccessCnt to become 0 can lead to deadlock when an item.Open bumps
up the pendingAccesses count, calls item.open, which calls cache.put. The cache.put
operation needs the cache mutex, which is held here. We skip this file now. The

View File

@@ -32,6 +32,9 @@ func newItemTestCache(t *testing.T) (r *fstest.Run, c *Cache) {
// Disable synchronous write
opt.WriteBack = 0
// Disable handle caching so existing tests get immediate close behavior
opt.HandleCaching = 0
return newTestCacheOpt(t, opt)
}
@@ -580,3 +583,119 @@ func TestItemReadWrite(t *testing.T) {
assert.False(t, item.remove(fileName))
})
}
func newItemTestCacheHandleCaching(t *testing.T, handleCaching time.Duration) (r *fstest.Run, c *Cache) {
opt := vfscommon.Opt
// Disable the cache cleaner as it interferes with these tests
opt.CachePollInterval = 0
// Disable synchronous write
opt.WriteBack = 0
// Set handle caching grace period
opt.HandleCaching = fs.Duration(handleCaching)
return newTestCacheOpt(t, opt)
}
func TestItemHandleCaching(t *testing.T) {
r, c := newItemTestCacheHandleCaching(t, 1*time.Second)
contents, obj, item := newFile(t, r, c, "existing")
// Open, read, and close the item
require.NoError(t, item.Open(obj))
buf := make([]byte, 10)
n, err := item.ReadAt(buf, 0)
assert.Equal(t, 10, n)
require.NoError(t, err)
assert.Equal(t, contents[:10], string(buf[:n]))
require.NoError(t, item.Close(nil))
// After close, grace period should keep fd and downloaders alive
item.mu.Lock()
assert.NotNil(t, item.fd, "fd should still be open during grace period")
assert.NotNil(t, item.downloaders, "downloaders should still be alive during grace period")
assert.NotNil(t, item.graceTimer, "grace timer should be set")
item.mu.Unlock()
// Re-open the item - should reuse existing fd and downloaders
require.NoError(t, item.Open(obj))
// Read data to verify it works
n, err = item.ReadAt(buf, 10)
assert.Equal(t, 10, n)
require.NoError(t, err)
assert.Equal(t, contents[10:20], string(buf[:n]))
// Close again
require.NoError(t, item.Close(nil))
// Wait for grace period to expire
time.Sleep(1500 * time.Millisecond)
// After grace period, fd and downloaders should be cleaned up
item.mu.Lock()
assert.Nil(t, item.fd, "fd should be closed after grace period")
assert.Nil(t, item.downloaders, "downloaders should be closed after grace period")
assert.Nil(t, item.graceTimer, "grace timer should be nil after expiry")
item.mu.Unlock()
}
func TestItemHandleCachingDisabled(t *testing.T) {
r, c := newItemTestCacheHandleCaching(t, 0)
contents, obj, item := newFile(t, r, c, "existing")
_ = contents
// Open and close the item
require.NoError(t, item.Open(obj))
require.NoError(t, item.Close(nil))
// With handle caching disabled, fd and downloaders should be immediately closed
item.mu.Lock()
assert.Nil(t, item.fd, "fd should be closed immediately when handle caching disabled")
assert.Nil(t, item.downloaders, "downloaders should be closed immediately when handle caching disabled")
assert.Nil(t, item.graceTimer, "grace timer should not be set when handle caching disabled")
item.mu.Unlock()
}
func TestItemHandleCachingReset(t *testing.T) {
r, c := newItemTestCacheHandleCaching(t, 1*time.Second)
_, obj, item := newFile(t, r, c, "existing")
// Open, read (to instantiate cache), and close the item
require.NoError(t, item.Open(obj))
buf := make([]byte, 10)
_, err := item.ReadAt(buf, 0)
require.NoError(t, err)
require.NoError(t, item.Close(nil))
// Grace timer should be active
item.mu.Lock()
assert.NotNil(t, item.graceTimer, "grace timer should be set")
item.mu.Unlock()
// Reset should skip the item during grace period
rr, _, err := item.Reset()
require.NoError(t, err)
assert.Equal(t, SkippedGrace, rr)
// Grace timer should still be active
item.mu.Lock()
assert.NotNil(t, item.graceTimer, "grace timer should still be set after skipped reset")
item.mu.Unlock()
// Wait for grace period to expire then reset should remove the item
time.Sleep(1500 * time.Millisecond)
rr, _, err = item.Reset()
require.NoError(t, err)
assert.Equal(t, RemovedNotInUse, rr)
}

View File

@@ -165,6 +165,11 @@ var OptionsInfo = fs.Options{{
Default: getGID(),
Help: "Override the gid field set by the filesystem (not supported on Windows)",
Groups: "VFS",
}, {
Name: "vfs_handle_caching",
Default: fs.Duration(5 * time.Second),
Help: "Time to keep file handle and downloaders alive after last close",
Groups: "VFS",
}, {
Name: "vfs_metadata_extension",
Default: "",
@@ -209,6 +214,7 @@ type Options struct {
UsedIsSize bool `config:"vfs_used_is_size"` // if true, use the `rclone size` algorithm for Used size
FastFingerprint bool `config:"vfs_fast_fingerprint"` // if set use fast fingerprints
DiskSpaceTotalSize fs.SizeSuffix `config:"vfs_disk_space_total_size"`
HandleCaching fs.Duration `config:"vfs_handle_caching"` // time to keep handle alive after last close
MetadataExtension string `config:"vfs_metadata_extension"` // if set respond to files with this extension with metadata
}