From 9728fa0ede0623fb031fa0772bb6a5a1e4e7b2f0 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Tue, 30 Jun 2026 13:52:51 +0100 Subject: [PATCH] vfs: fix hang reopening a file during the handle-caching grace period When a cached file was closed, --vfs-handle-caching kept its handle and downloaders alive for a grace period and closed them later from a timer. The deferred close drops the item lock while tearing down the downloaders, leaving the file handle open. A reopen landing in that window saw no grace timer and a live handle, failed to create the cache file with "internal error: didn't Close file" and removed the cache file, which hung the application reopening the file. Reopens now wait for an in-progress grace-period close to finish so they start from a fully closed item. See: https://forum.rclone.org/t/opening-a-recently-closed-and-cached-file-hangs-rclone/53986/ --- vfs/vfscache/item.go | 17 ++++++++++ vfs/vfscache/item_test.go | 70 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/vfs/vfscache/item.go b/vfs/vfscache/item.go index ca0459457..d82e90da7 100644 --- a/vfs/vfscache/item.go +++ b/vfs/vfscache/item.go @@ -69,6 +69,7 @@ type Item struct { modified bool // set if the file has been modified since the last Open beingReset bool // cache cleaner is resetting the cache file, access not allowed graceTimer *time.Timer // timer for delayed close after grace period + closing chan struct{} // non-nil while a grace-period close is tearing the handle down, closed when done } // Info is persisted to backing store @@ -520,6 +521,15 @@ func (item *Item) open(o fs.Object) (err error) { item.mu.Lock() defer item.mu.Unlock() + // Wait for any in-progress grace-period close to finish so we start + // from a fully closed item rather than racing the fd teardown. + for item.closing != nil { + closing := item.closing + item.mu.Unlock() + <-closing + item.mu.Lock() + } + item.info.ATime = time.Now() osPath, err := item.c.createItemDir(item.name) // No locking in Cache @@ -714,7 +724,14 @@ func (item *Item) closeAfterGrace() { } item.graceTimer = nil + // _actualClose drops item.mu while it tears down the downloaders, + // during which the fd is still open. Publish that a close is in + // progress so a concurrent open waits rather than tripping over the + // half-closed handle. + item.closing = make(chan struct{}) err := item._actualClose(nil, false) + close(item.closing) + item.closing = nil if err != nil { fs.Errorf(item.name, "vfs cache: close after grace period failed: %v", err) } diff --git a/vfs/vfscache/item_test.go b/vfs/vfscache/item_test.go index 0fc7862b5..8490006e1 100644 --- a/vfs/vfscache/item_test.go +++ b/vfs/vfscache/item_test.go @@ -699,3 +699,73 @@ func TestItemHandleCachingReset(t *testing.T) { require.NoError(t, err) assert.Equal(t, RemovedNotInUse, rr) } + +// TestItemHandleCachingReopenDuringGraceClose reproduces a race between +// reopening an item and the grace-period close firing for it. +// +// closeAfterGrace clears the grace timer and then runs the actual close, +// which temporarily drops item.mu while it tears down the downloaders - +// at that point the file handle is still open. A reopen landing in that +// window used to see no grace timer and a live fd and fail _createFile +// with "internal error: didn't Close file". +func TestItemHandleCachingReopenDuringGraceClose(t *testing.T) { + r, c := newItemTestCacheHandleCaching(t, 10*time.Second) + + _, obj, item := newFile(t, r, c, "existing") + buf := make([]byte, 1) + + const iterations = 50 + for i := 0; i < iterations; i++ { + // Open, read (to create a downloader) and close so a grace + // timer is pending with the fd and downloaders still alive. + require.NoError(t, item.Open(obj)) + _, err := item.ReadAt(buf, 0) + require.NoError(t, err) + require.NoError(t, item.Close(nil)) + + // Drive the grace close ourselves so we can race it against a + // reopen. Hold item.mu and park both the close (A) and the + // reopen (B) on the lock with A queued first. When we release, + // A runs the close, which drops item.mu to tear down the + // downloaders, and B - already waiting - grabs it in that + // window and observes the still-open fd. + item.mu.Lock() + require.NotNil(t, item.graceTimer, "grace timer should be set after close") + item.graceTimer.Stop() + + var openErr error + var wg sync.WaitGroup + wg.Add(2) + startA := make(chan struct{}) + startB := make(chan struct{}) + go func() { + defer wg.Done() + <-startA + item.closeAfterGrace() + }() + go func() { + defer wg.Done() + <-startB + openErr = item.Open(obj) + }() + close(startA) + time.Sleep(time.Millisecond) // let A park on item.mu first + close(startB) + time.Sleep(time.Millisecond) // let B park on item.mu + item.mu.Unlock() + wg.Wait() + + require.NoError(t, openErr, "reopen racing a grace-period close failed on iteration %d", i) + + // Drop the handle from the successful reopen so the next + // iteration starts from a closed item. + require.NoError(t, item.Close(nil)) + } + + // Stop the grace timer left pending by the final close. + item.mu.Lock() + if item.graceTimer != nil { + item.graceTimer.Stop() + } + item.mu.Unlock() +}