vfs: fix hang reopening a file during the handle-caching grace period

When a cached file was closed, --vfs-handle-caching kept its handle and
downloaders alive for a grace period and closed them later from a timer.
The deferred close drops the item lock while tearing down the
downloaders, leaving the file handle open. A reopen landing in that
window saw no grace timer and a live handle, failed to create the cache
file with "internal error: didn't Close file" and removed the cache
file, which hung the application reopening the file.

Reopens now wait for an in-progress grace-period close to finish so they
start from a fully closed item.

See: https://forum.rclone.org/t/opening-a-recently-closed-and-cached-file-hangs-rclone/53986/
This commit is contained in:
Nick Craig-Wood
2026-06-30 13:52:51 +01:00
parent 48f8d63d9e
commit 9728fa0ede
2 changed files with 87 additions and 0 deletions

View File

@@ -69,6 +69,7 @@ type Item struct {
modified bool // set if the file has been modified since the last Open
beingReset bool // cache cleaner is resetting the cache file, access not allowed
graceTimer *time.Timer // timer for delayed close after grace period
closing chan struct{} // non-nil while a grace-period close is tearing the handle down, closed when done
}
// Info is persisted to backing store
@@ -520,6 +521,15 @@ func (item *Item) open(o fs.Object) (err error) {
item.mu.Lock()
defer item.mu.Unlock()
// Wait for any in-progress grace-period close to finish so we start
// from a fully closed item rather than racing the fd teardown.
for item.closing != nil {
closing := item.closing
item.mu.Unlock()
<-closing
item.mu.Lock()
}
item.info.ATime = time.Now()
osPath, err := item.c.createItemDir(item.name) // No locking in Cache
@@ -714,7 +724,14 @@ func (item *Item) closeAfterGrace() {
}
item.graceTimer = nil
// _actualClose drops item.mu while it tears down the downloaders,
// during which the fd is still open. Publish that a close is in
// progress so a concurrent open waits rather than tripping over the
// half-closed handle.
item.closing = make(chan struct{})
err := item._actualClose(nil, false)
close(item.closing)
item.closing = nil
if err != nil {
fs.Errorf(item.name, "vfs cache: close after grace period failed: %v", err)
}

View File

@@ -699,3 +699,73 @@ func TestItemHandleCachingReset(t *testing.T) {
require.NoError(t, err)
assert.Equal(t, RemovedNotInUse, rr)
}
// TestItemHandleCachingReopenDuringGraceClose reproduces a race between
// reopening an item and the grace-period close firing for it.
//
// closeAfterGrace clears the grace timer and then runs the actual close,
// which temporarily drops item.mu while it tears down the downloaders -
// at that point the file handle is still open. A reopen landing in that
// window used to see no grace timer and a live fd and fail _createFile
// with "internal error: didn't Close file".
func TestItemHandleCachingReopenDuringGraceClose(t *testing.T) {
r, c := newItemTestCacheHandleCaching(t, 10*time.Second)
_, obj, item := newFile(t, r, c, "existing")
buf := make([]byte, 1)
const iterations = 50
for i := 0; i < iterations; i++ {
// Open, read (to create a downloader) and close so a grace
// timer is pending with the fd and downloaders still alive.
require.NoError(t, item.Open(obj))
_, err := item.ReadAt(buf, 0)
require.NoError(t, err)
require.NoError(t, item.Close(nil))
// Drive the grace close ourselves so we can race it against a
// reopen. Hold item.mu and park both the close (A) and the
// reopen (B) on the lock with A queued first. When we release,
// A runs the close, which drops item.mu to tear down the
// downloaders, and B - already waiting - grabs it in that
// window and observes the still-open fd.
item.mu.Lock()
require.NotNil(t, item.graceTimer, "grace timer should be set after close")
item.graceTimer.Stop()
var openErr error
var wg sync.WaitGroup
wg.Add(2)
startA := make(chan struct{})
startB := make(chan struct{})
go func() {
defer wg.Done()
<-startA
item.closeAfterGrace()
}()
go func() {
defer wg.Done()
<-startB
openErr = item.Open(obj)
}()
close(startA)
time.Sleep(time.Millisecond) // let A park on item.mu first
close(startB)
time.Sleep(time.Millisecond) // let B park on item.mu
item.mu.Unlock()
wg.Wait()
require.NoError(t, openErr, "reopen racing a grace-period close failed on iteration %d", i)
// Drop the handle from the successful reopen so the next
// iteration starts from a closed item.
require.NoError(t, item.Close(nil))
}
// Stop the grace timer left pending by the final close.
item.mu.Lock()
if item.graceTimer != nil {
item.graceTimer.Stop()
}
item.mu.Unlock()
}