serve s3: fix spurious 404 on HEAD/GET during VFS writeback - fixes #8188

After an upload (notably multipart) to a slow backing remote, the file
lives in the VFS and is returned by ListBucket, but node.DirEntry() stays
nil until the --vfs-write-back writeback completes. HeadObject and
GetObject returned gofakes3.KeyNotFound while it was nil, so a HEAD/GET in
that window 404'd even though the object existed.

getFileHashByte already falls back to hashing the VFS cache when the
backing object isn't available yet. Drop the early nil return, pass the
node (not the fs.Object) to getFileHashByte, and take the Content-Type
from fs.MimeTypeFromName when the backing object isn't there yet.
This commit is contained in:
max
2026-06-25 01:20:01 +02:00
committed by Nick Craig-Wood
parent c1df24f142
commit 445528a3fb
2 changed files with 103 additions and 14 deletions

View File

@@ -137,18 +137,24 @@ func (b *s3Backend) HeadObject(ctx context.Context, bucketName, objectName strin
return nil, gofakes3.KeyNotFound(objectName)
}
// node.DirEntry() is nil while the file is still being uploaded to the
// backing remote (e.g. just after a multipart upload, before the VFS
// writeback completes). In that window the file already exists in the VFS
// and is returned by ListBucket, so serve its metadata from the node
// rather than returning a spurious 404. getFileHashByte already falls back
// to hashing the VFS cache when the backing object is not available yet.
entry := node.DirEntry()
if entry == nil {
return nil, gofakes3.KeyNotFound(objectName)
}
fobj := entry.(fs.Object)
size := node.Size()
hash := getFileHashByte(fobj, b.s.etagHashType)
hash := getFileHashByte(node, b.s.etagHashType)
mimeType := fs.MimeTypeFromName(objectName)
if fobj, ok := entry.(fs.Object); ok {
mimeType = fs.MimeType(context.Background(), fobj)
}
meta := map[string]string{
"Last-Modified": formatHeaderTime(node.ModTime()),
"Content-Type": fs.MimeType(context.Background(), fobj),
"Content-Type": mimeType,
}
if val, ok := b.meta.Load(fp); ok {
@@ -186,16 +192,14 @@ func (b *s3Backend) GetObject(ctx context.Context, bucketName, objectName string
return nil, gofakes3.KeyNotFound(objectName)
}
// As in HeadObject, node.DirEntry() may be nil while the file is still
// being written back to the backing remote. The data is readable from the
// VFS cache via file.Open regardless, so serve it instead of 404ing.
entry := node.DirEntry()
if entry == nil {
return nil, gofakes3.KeyNotFound(objectName)
}
fobj := entry.(fs.Object)
file := node.(*vfs.File)
size := node.Size()
hash := getFileHashByte(fobj, b.s.etagHashType)
hash := getFileHashByte(node, b.s.etagHashType)
in, err := file.Open(os.O_RDONLY)
if err != nil {
@@ -221,9 +225,14 @@ func (b *s3Backend) GetObject(ctx context.Context, bucketName, objectName string
rdr = limitReadCloser(rdr, in.Close, rnge.Length)
}
mimeType := fs.MimeTypeFromName(objectName)
if fobj, ok := entry.(fs.Object); ok {
mimeType = fs.MimeType(context.Background(), fobj)
}
meta := map[string]string{
"Last-Modified": formatHeaderTime(node.ModTime()),
"Content-Type": fs.MimeType(context.Background(), fobj),
"Content-Type": mimeType,
}
if val, ok := b.meta.Load(fp); ok {

View File

@@ -0,0 +1,80 @@
// Regression test for #8188: HEAD/GET during the VFS writeback window.
package s3
import (
"bytes"
"context"
"fmt"
"io"
"net/url"
"testing"
"time"
"github.com/minio/minio-go/v7"
"github.com/minio/minio-go/v7/pkg/credentials"
"github.com/rclone/rclone/cmd/serve/proxy"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fstest"
"github.com/rclone/rclone/lib/random"
"github.com/rclone/rclone/vfs/vfscommon"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestHeadGetDuringWriteback uploads an object and immediately HEADs and GETs it
// while it is still in the VFS write-back window — i.e. before the dirty file is
// flushed to the backing remote, so node.DirEntry() is still nil. Previously
// HeadObject/GetObject returned a spurious 404 (KeyNotFound) in that window even
// though ListBucket already reported the object. A long --vfs-write-back keeps
// the window open deterministically. Regression test for #8188.
func TestHeadGetDuringWriteback(t *testing.T) {
fstest.Initialise()
ctx := context.Background()
f, err := fs.NewFs(ctx, t.TempDir())
require.NoError(t, err)
const bucket = "test"
require.NoError(t, f.Mkdir(ctx, bucket))
keyid := random.String(16)
keysec := random.String(16)
opt := Opt
opt.AuthKey = []string{fmt.Sprintf("%s,%s", keyid, keysec)}
opt.HTTP.ListenAddr = []string{endpoint}
// Cache writes and hold the writeback so DirEntry() stays nil for the HEAD/GET.
vfsOpt := vfscommon.Opt
vfsOpt.CacheMode = vfscommon.CacheModeWrites
vfsOpt.WriteBack = fs.Duration(2 * time.Minute)
w, err := newServer(ctx, f, &opt, &vfsOpt, &proxy.Opt)
require.NoError(t, err)
go func() { _ = w.Serve() }()
t.Cleanup(func() { _ = w.Shutdown() })
u, err := url.Parse(w.server.URLs()[0])
require.NoError(t, err)
client, err := minio.New(u.Host, &minio.Options{
Creds: credentials.NewStaticV4(keyid, keysec, ""),
Secure: false,
})
require.NoError(t, err)
const object = "writeback.txt"
want := []byte("hello writeback window")
_, err = client.PutObject(ctx, bucket, object, bytes.NewReader(want), int64(len(want)), minio.PutObjectOptions{})
require.NoError(t, err)
// HEAD must not 404 while the object is still being written back.
info, err := client.StatObject(ctx, bucket, object, minio.StatObjectOptions{})
require.NoError(t, err)
assert.Equal(t, int64(len(want)), info.Size)
// GET must return the body from the VFS cache, not 404.
rc, err := client.GetObject(ctx, bucket, object, minio.GetObjectOptions{})
require.NoError(t, err)
got, err := io.ReadAll(rc)
require.NoError(t, err)
require.NoError(t, rc.Close())
assert.Equal(t, want, got)
}