s3: fix Content-MD5 for Object Lock uploads and add GCS quirk

AWS S3 requires Content-MD5 for PutObject with Object Lock parameters.
Since rclone passes a non-seekable io.Reader, the SDK cannot compute
checksums automatically. Buffer the body and compute MD5 manually for
singlepart PutObject and presigned request uploads when Object Lock
parameters are set. Multipart uploads are unaffected as Object Lock
headers go on CreateMultipartUpload which has no body.

Add object_lock_supported provider quirk (default true) to allow
skipping Object Lock integration tests on providers with incomplete
S3 API support. Set to false for GCS which uses non-standard
x-goog-bypass-governance-retention header and doesn't implement
PutObjectLegalHold/GetObjectLegalHold.

Add Multipart and Presigned subtests to Object Lock integration tests
to cover all three upload paths.

Fixes #9199
This commit is contained in:
Chris
2026-02-21 15:57:57 +01:00
committed by Nick Craig-Wood
parent e987d4f351
commit a3e1312d9d
5 changed files with 104 additions and 5 deletions

View File

@@ -18,3 +18,7 @@ quirks:
# See: https://issuetracker.google.com/issues/323465186
# So make cutoff very large which it does seem to support
copy_cutoff: 9223372036854775807
# GCS S3 API doesn't fully support Object Lock:
# - Uses x-goog-bypass-governance-retention instead of x-amz-bypass-governance-retention
# - PutObjectLegalHold / GetObjectLegalHold not implemented
object_lock_supported: false

View File

@@ -33,6 +33,7 @@ type Quirks struct {
UseXID *bool `yaml:"use_x_id,omitempty"`
SignAcceptEncoding *bool `yaml:"sign_accept_encoding,omitempty"`
EtagIsNotMD5 *bool `yaml:"etag_is_not_md5,omitempty"`
ObjectLockSupported *bool `yaml:"object_lock_supported,omitempty"`
CopyCutoff *int64 `yaml:"copy_cutoff,omitempty"`
MaxUploadParts *int `yaml:"max_upload_parts,omitempty"`
MinChunkSize *int64 `yaml:"min_chunk_size,omitempty"`

View File

@@ -4,6 +4,7 @@ package s3
//go:generate go run gen_setfrom.go -o setfrom.go
import (
"bytes"
"context"
"crypto/md5"
"crypto/tls"
@@ -934,6 +935,18 @@ PutObjectRetention and PutObjectLegalHold API calls after the upload completes.
This adds extra API calls per object, so only enable if your provider requires it.`,
Default: false,
Advanced: true,
}, {
Name: "object_lock_supported",
Help: `Whether the provider supports S3 Object Lock.
This should be true, false or left unset to use the default for the provider.
Set to false for providers that don't fully support the S3 Object Lock API
(e.g. GCS which uses non-standard headers for bypass governance retention
and doesn't implement Legal Hold via the S3 API).
`,
Default: fs.Tristate{},
Advanced: true,
},
}}))
}
@@ -1120,6 +1133,7 @@ type Options struct {
BypassGovernanceRetention bool `config:"bypass_governance_retention"`
BucketObjectLockEnabled bool `config:"bucket_object_lock_enabled"`
ObjectLockSetAfterUpload bool `config:"object_lock_set_after_upload"`
ObjectLockSupported fs.Tristate `config:"object_lock_supported"`
}
// Fs represents a remote s3 server
@@ -1726,6 +1740,7 @@ func setQuirks(opt *Options, provider *Provider) {
set(&opt.UseUnsignedPayload, true, provider.Quirks.UseUnsignedPayload)
set(&opt.UseXID, true, provider.Quirks.UseXID)
set(&opt.SignAcceptEncoding, true, provider.Quirks.SignAcceptEncoding)
set(&opt.ObjectLockSupported, true, provider.Quirks.ObjectLockSupported)
}
// setRoot changes the root of the Fs
@@ -4608,9 +4623,32 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R
return wantETag, gotETag, versionID, s3cw.ui, nil
}
// bufferForObjectLockMD5 buffers the body and computes Content-MD5 when
// Object Lock parameters are set on the request. AWS S3 requires Content-MD5
// for PutObject with Object Lock params and cannot compute it automatically
// from a non-seekable io.Reader.
// See: https://github.com/aws/aws-sdk-go-v2/discussions/2960
func bufferForObjectLockMD5(req *s3.PutObjectInput, in io.Reader) (io.Reader, error) {
if req.ObjectLockMode == "" && req.ObjectLockRetainUntilDate == nil && req.ObjectLockLegalHoldStatus == "" {
return in, nil
}
buf, err := io.ReadAll(in)
if err != nil {
return nil, fmt.Errorf("failed to read body for Content-MD5: %w", err)
}
md5sum := md5.Sum(buf)
md5base64 := base64.StdEncoding.EncodeToString(md5sum[:])
req.ContentMD5 = &md5base64
return bytes.NewReader(buf), nil
}
// Upload a single part using PutObject
func (o *Object) uploadSinglepartPutObject(ctx context.Context, req *s3.PutObjectInput, size int64, in io.Reader) (etag string, lastModified time.Time, versionID *string, err error) {
req.Body = io.NopCloser(in)
in, err = bufferForObjectLockMD5(req, in)
if err != nil {
return etag, lastModified, nil, err
}
req.Body = in
var options = []func(*s3.Options){}
if o.fs.opt.UseUnsignedPayload.Value {
options = append(options, s3.WithAPIOptions(
@@ -4640,6 +4678,11 @@ func (o *Object) uploadSinglepartPutObject(ctx context.Context, req *s3.PutObjec
// Upload a single part using a presigned request
func (o *Object) uploadSinglepartPresignedRequest(ctx context.Context, req *s3.PutObjectInput, size int64, in io.Reader) (etag string, lastModified time.Time, versionID *string, err error) {
// Content-MD5 must be set before signing so it's included in the presigned URL.
in, err = bufferForObjectLockMD5(req, in)
if err != nil {
return etag, lastModified, nil, err
}
// Create the presigned request
putReq, err := s3.NewPresignClient(o.fs.c).PresignPutObject(ctx, req, s3.WithPresignExpires(15*time.Minute))
if err != nil {

View File

@@ -499,6 +499,9 @@ func (f *Fs) InternalTestVersions(t *testing.T) {
}
func (f *Fs) InternalTestObjectLock(t *testing.T) {
if !f.opt.ObjectLockSupported.Value {
t.Skip("Object Lock not supported by this provider (quirk object_lock_supported = false)")
}
ctx := context.Background()
// Create a temporary bucket with Object Lock enabled to test on.
@@ -721,6 +724,55 @@ func (f *Fs) InternalTestObjectLock(t *testing.T) {
assert.WithinDuration(t, retainUntilDate, gotRetainDate, time.Second)
assert.Equal(t, "ON", gotMetadata["object-lock-legal-hold-status"])
})
t.Run("Multipart", func(t *testing.T) {
// Force multipart upload by setting a very low cutoff
oldCutoff := f.opt.UploadCutoff
f.opt.UploadCutoff = fs.SizeSuffix(1)
f.opt.ObjectLockMode = "GOVERNANCE"
f.opt.ObjectLockRetainUntilDate = retainUntilDate.Format(time.RFC3339)
defer func() {
f.opt.UploadCutoff = oldCutoff
f.opt.ObjectLockMode = ""
f.opt.ObjectLockRetainUntilDate = ""
}()
contents := random.String(100)
item := fstest.NewItem("test-object-lock-multipart", contents, fstest.Time("2001-05-06T04:05:06.499999999Z"))
obj := fstests.PutTestContents(ctx, t, f, &item, contents, true)
defer func() {
removeLocked(t, obj)
}()
o := obj.(*Object)
gotMetadata, err := o.Metadata(ctx)
require.NoError(t, err)
assert.Equal(t, "GOVERNANCE", gotMetadata["object-lock-mode"])
})
t.Run("Presigned", func(t *testing.T) {
// Use presigned request upload path
f.opt.UsePresignedRequest = true
f.opt.ObjectLockMode = "GOVERNANCE"
f.opt.ObjectLockRetainUntilDate = retainUntilDate.Format(time.RFC3339)
defer func() {
f.opt.UsePresignedRequest = false
f.opt.ObjectLockMode = ""
f.opt.ObjectLockRetainUntilDate = ""
}()
contents := random.String(100)
item := fstest.NewItem("test-object-lock-presigned", contents, fstest.Time("2001-05-06T04:05:06.499999999Z"))
obj := fstests.PutTestContents(ctx, t, f, &item, contents, true)
defer func() {
removeLocked(t, obj)
}()
o := obj.(*Object)
gotMetadata, err := o.Metadata(ctx)
require.NoError(t, err)
assert.Equal(t, "GOVERNANCE", gotMetadata["object-lock-mode"])
})
}
func (f *Fs) InternalTest(t *testing.T) {

View File

@@ -913,10 +913,9 @@ According to AWS's [documentation on S3 Object Lock](https://docs.aws.amazon.com
> If you configure a default retention period on a bucket, requests to upload
objects in such a bucket must include the Content-MD5 header.
As mentioned in the [Modification times and hashes](#modification-times-and-hashes)
section, small files that are not uploaded as multipart, use a different tag, causing
the upload to fail. A simple solution is to set the `--s3-upload-cutoff 0` and force
all the files to be uploaded as multipart.
Rclone automatically computes and includes the Content-MD5 header when
uploading objects with Object Lock parameters set. No special
configuration is needed.
#### Setting Object Lock retention