diff --git a/cmd/serve/s3/backend.go b/cmd/serve/s3/backend.go index 54019c7fb..566871032 100644 --- a/cmd/serve/s3/backend.go +++ b/cmd/serve/s3/backend.go @@ -137,18 +137,24 @@ func (b *s3Backend) HeadObject(ctx context.Context, bucketName, objectName strin return nil, gofakes3.KeyNotFound(objectName) } + // node.DirEntry() is nil while the file is still being uploaded to the + // backing remote (e.g. just after a multipart upload, before the VFS + // writeback completes). In that window the file already exists in the VFS + // and is returned by ListBucket, so serve its metadata from the node + // rather than returning a spurious 404. getFileHashByte already falls back + // to hashing the VFS cache when the backing object is not available yet. entry := node.DirEntry() - if entry == nil { - return nil, gofakes3.KeyNotFound(objectName) - } - - fobj := entry.(fs.Object) size := node.Size() - hash := getFileHashByte(fobj, b.s.etagHashType) + hash := getFileHashByte(node, b.s.etagHashType) + + mimeType := fs.MimeTypeFromName(objectName) + if fobj, ok := entry.(fs.Object); ok { + mimeType = fs.MimeType(context.Background(), fobj) + } meta := map[string]string{ "Last-Modified": formatHeaderTime(node.ModTime()), - "Content-Type": fs.MimeType(context.Background(), fobj), + "Content-Type": mimeType, } if val, ok := b.meta.Load(fp); ok { @@ -186,16 +192,14 @@ func (b *s3Backend) GetObject(ctx context.Context, bucketName, objectName string return nil, gofakes3.KeyNotFound(objectName) } + // As in HeadObject, node.DirEntry() may be nil while the file is still + // being written back to the backing remote. The data is readable from the + // VFS cache via file.Open regardless, so serve it instead of 404ing. entry := node.DirEntry() - if entry == nil { - return nil, gofakes3.KeyNotFound(objectName) - } - - fobj := entry.(fs.Object) file := node.(*vfs.File) size := node.Size() - hash := getFileHashByte(fobj, b.s.etagHashType) + hash := getFileHashByte(node, b.s.etagHashType) in, err := file.Open(os.O_RDONLY) if err != nil { @@ -221,9 +225,14 @@ func (b *s3Backend) GetObject(ctx context.Context, bucketName, objectName string rdr = limitReadCloser(rdr, in.Close, rnge.Length) } + mimeType := fs.MimeTypeFromName(objectName) + if fobj, ok := entry.(fs.Object); ok { + mimeType = fs.MimeType(context.Background(), fobj) + } + meta := map[string]string{ "Last-Modified": formatHeaderTime(node.ModTime()), - "Content-Type": fs.MimeType(context.Background(), fobj), + "Content-Type": mimeType, } if val, ok := b.meta.Load(fp); ok { diff --git a/cmd/serve/s3/writeback_test.go b/cmd/serve/s3/writeback_test.go new file mode 100644 index 000000000..57bbfb547 --- /dev/null +++ b/cmd/serve/s3/writeback_test.go @@ -0,0 +1,80 @@ +// Regression test for #8188: HEAD/GET during the VFS writeback window. + +package s3 + +import ( + "bytes" + "context" + "fmt" + "io" + "net/url" + "testing" + "time" + + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" + "github.com/rclone/rclone/cmd/serve/proxy" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fstest" + "github.com/rclone/rclone/lib/random" + "github.com/rclone/rclone/vfs/vfscommon" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestHeadGetDuringWriteback uploads an object and immediately HEADs and GETs it +// while it is still in the VFS write-back window — i.e. before the dirty file is +// flushed to the backing remote, so node.DirEntry() is still nil. Previously +// HeadObject/GetObject returned a spurious 404 (KeyNotFound) in that window even +// though ListBucket already reported the object. A long --vfs-write-back keeps +// the window open deterministically. Regression test for #8188. +func TestHeadGetDuringWriteback(t *testing.T) { + fstest.Initialise() + ctx := context.Background() + f, err := fs.NewFs(ctx, t.TempDir()) + require.NoError(t, err) + const bucket = "test" + require.NoError(t, f.Mkdir(ctx, bucket)) + + keyid := random.String(16) + keysec := random.String(16) + opt := Opt + opt.AuthKey = []string{fmt.Sprintf("%s,%s", keyid, keysec)} + opt.HTTP.ListenAddr = []string{endpoint} + + // Cache writes and hold the writeback so DirEntry() stays nil for the HEAD/GET. + vfsOpt := vfscommon.Opt + vfsOpt.CacheMode = vfscommon.CacheModeWrites + vfsOpt.WriteBack = fs.Duration(2 * time.Minute) + + w, err := newServer(ctx, f, &opt, &vfsOpt, &proxy.Opt) + require.NoError(t, err) + go func() { _ = w.Serve() }() + t.Cleanup(func() { _ = w.Shutdown() }) + + u, err := url.Parse(w.server.URLs()[0]) + require.NoError(t, err) + client, err := minio.New(u.Host, &minio.Options{ + Creds: credentials.NewStaticV4(keyid, keysec, ""), + Secure: false, + }) + require.NoError(t, err) + + const object = "writeback.txt" + want := []byte("hello writeback window") + _, err = client.PutObject(ctx, bucket, object, bytes.NewReader(want), int64(len(want)), minio.PutObjectOptions{}) + require.NoError(t, err) + + // HEAD must not 404 while the object is still being written back. + info, err := client.StatObject(ctx, bucket, object, minio.StatObjectOptions{}) + require.NoError(t, err) + assert.Equal(t, int64(len(want)), info.Size) + + // GET must return the body from the VFS cache, not 404. + rc, err := client.GetObject(ctx, bucket, object, minio.GetObjectOptions{}) + require.NoError(t, err) + got, err := io.ReadAll(rc) + require.NoError(t, err) + require.NoError(t, rc.Close()) + assert.Equal(t, want, got) +}