From 445528a3fb19669dc9dd2c2c3ffa04257162aaee Mon Sep 17 00:00:00 2001 From: max Date: Thu, 25 Jun 2026 01:20:01 +0200 Subject: [PATCH] serve s3: fix spurious 404 on HEAD/GET during VFS writeback - fixes #8188 After an upload (notably multipart) to a slow backing remote, the file lives in the VFS and is returned by ListBucket, but node.DirEntry() stays nil until the --vfs-write-back writeback completes. HeadObject and GetObject returned gofakes3.KeyNotFound while it was nil, so a HEAD/GET in that window 404'd even though the object existed. getFileHashByte already falls back to hashing the VFS cache when the backing object isn't available yet. Drop the early nil return, pass the node (not the fs.Object) to getFileHashByte, and take the Content-Type from fs.MimeTypeFromName when the backing object isn't there yet. --- cmd/serve/s3/backend.go | 37 ++++++++++------ cmd/serve/s3/writeback_test.go | 80 ++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 14 deletions(-) create mode 100644 cmd/serve/s3/writeback_test.go diff --git a/cmd/serve/s3/backend.go b/cmd/serve/s3/backend.go index 54019c7fb..566871032 100644 --- a/cmd/serve/s3/backend.go +++ b/cmd/serve/s3/backend.go @@ -137,18 +137,24 @@ func (b *s3Backend) HeadObject(ctx context.Context, bucketName, objectName strin return nil, gofakes3.KeyNotFound(objectName) } + // node.DirEntry() is nil while the file is still being uploaded to the + // backing remote (e.g. just after a multipart upload, before the VFS + // writeback completes). In that window the file already exists in the VFS + // and is returned by ListBucket, so serve its metadata from the node + // rather than returning a spurious 404. getFileHashByte already falls back + // to hashing the VFS cache when the backing object is not available yet. entry := node.DirEntry() - if entry == nil { - return nil, gofakes3.KeyNotFound(objectName) - } - - fobj := entry.(fs.Object) size := node.Size() - hash := getFileHashByte(fobj, b.s.etagHashType) + hash := getFileHashByte(node, b.s.etagHashType) + + mimeType := fs.MimeTypeFromName(objectName) + if fobj, ok := entry.(fs.Object); ok { + mimeType = fs.MimeType(context.Background(), fobj) + } meta := map[string]string{ "Last-Modified": formatHeaderTime(node.ModTime()), - "Content-Type": fs.MimeType(context.Background(), fobj), + "Content-Type": mimeType, } if val, ok := b.meta.Load(fp); ok { @@ -186,16 +192,14 @@ func (b *s3Backend) GetObject(ctx context.Context, bucketName, objectName string return nil, gofakes3.KeyNotFound(objectName) } + // As in HeadObject, node.DirEntry() may be nil while the file is still + // being written back to the backing remote. The data is readable from the + // VFS cache via file.Open regardless, so serve it instead of 404ing. entry := node.DirEntry() - if entry == nil { - return nil, gofakes3.KeyNotFound(objectName) - } - - fobj := entry.(fs.Object) file := node.(*vfs.File) size := node.Size() - hash := getFileHashByte(fobj, b.s.etagHashType) + hash := getFileHashByte(node, b.s.etagHashType) in, err := file.Open(os.O_RDONLY) if err != nil { @@ -221,9 +225,14 @@ func (b *s3Backend) GetObject(ctx context.Context, bucketName, objectName string rdr = limitReadCloser(rdr, in.Close, rnge.Length) } + mimeType := fs.MimeTypeFromName(objectName) + if fobj, ok := entry.(fs.Object); ok { + mimeType = fs.MimeType(context.Background(), fobj) + } + meta := map[string]string{ "Last-Modified": formatHeaderTime(node.ModTime()), - "Content-Type": fs.MimeType(context.Background(), fobj), + "Content-Type": mimeType, } if val, ok := b.meta.Load(fp); ok { diff --git a/cmd/serve/s3/writeback_test.go b/cmd/serve/s3/writeback_test.go new file mode 100644 index 000000000..57bbfb547 --- /dev/null +++ b/cmd/serve/s3/writeback_test.go @@ -0,0 +1,80 @@ +// Regression test for #8188: HEAD/GET during the VFS writeback window. + +package s3 + +import ( + "bytes" + "context" + "fmt" + "io" + "net/url" + "testing" + "time" + + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" + "github.com/rclone/rclone/cmd/serve/proxy" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fstest" + "github.com/rclone/rclone/lib/random" + "github.com/rclone/rclone/vfs/vfscommon" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestHeadGetDuringWriteback uploads an object and immediately HEADs and GETs it +// while it is still in the VFS write-back window — i.e. before the dirty file is +// flushed to the backing remote, so node.DirEntry() is still nil. Previously +// HeadObject/GetObject returned a spurious 404 (KeyNotFound) in that window even +// though ListBucket already reported the object. A long --vfs-write-back keeps +// the window open deterministically. Regression test for #8188. +func TestHeadGetDuringWriteback(t *testing.T) { + fstest.Initialise() + ctx := context.Background() + f, err := fs.NewFs(ctx, t.TempDir()) + require.NoError(t, err) + const bucket = "test" + require.NoError(t, f.Mkdir(ctx, bucket)) + + keyid := random.String(16) + keysec := random.String(16) + opt := Opt + opt.AuthKey = []string{fmt.Sprintf("%s,%s", keyid, keysec)} + opt.HTTP.ListenAddr = []string{endpoint} + + // Cache writes and hold the writeback so DirEntry() stays nil for the HEAD/GET. + vfsOpt := vfscommon.Opt + vfsOpt.CacheMode = vfscommon.CacheModeWrites + vfsOpt.WriteBack = fs.Duration(2 * time.Minute) + + w, err := newServer(ctx, f, &opt, &vfsOpt, &proxy.Opt) + require.NoError(t, err) + go func() { _ = w.Serve() }() + t.Cleanup(func() { _ = w.Shutdown() }) + + u, err := url.Parse(w.server.URLs()[0]) + require.NoError(t, err) + client, err := minio.New(u.Host, &minio.Options{ + Creds: credentials.NewStaticV4(keyid, keysec, ""), + Secure: false, + }) + require.NoError(t, err) + + const object = "writeback.txt" + want := []byte("hello writeback window") + _, err = client.PutObject(ctx, bucket, object, bytes.NewReader(want), int64(len(want)), minio.PutObjectOptions{}) + require.NoError(t, err) + + // HEAD must not 404 while the object is still being written back. + info, err := client.StatObject(ctx, bucket, object, minio.StatObjectOptions{}) + require.NoError(t, err) + assert.Equal(t, int64(len(want)), info.Size) + + // GET must return the body from the VFS cache, not 404. + rc, err := client.GetObject(ctx, bucket, object, minio.GetObjectOptions{}) + require.NoError(t, err) + got, err := io.ReadAll(rc) + require.NoError(t, err) + require.NoError(t, rc.Close()) + assert.Equal(t, want, got) +}