From a3e1312d9dd67937631c2451c6fa8bb0a2bc82c0 Mon Sep 17 00:00:00 2001 From: Chris <238498929+chris081519-crypto@users.noreply.github.com> Date: Sat, 21 Feb 2026 15:57:57 +0100 Subject: [PATCH] s3: fix Content-MD5 for Object Lock uploads and add GCS quirk AWS S3 requires Content-MD5 for PutObject with Object Lock parameters. Since rclone passes a non-seekable io.Reader, the SDK cannot compute checksums automatically. Buffer the body and compute MD5 manually for singlepart PutObject and presigned request uploads when Object Lock parameters are set. Multipart uploads are unaffected as Object Lock headers go on CreateMultipartUpload which has no body. Add object_lock_supported provider quirk (default true) to allow skipping Object Lock integration tests on providers with incomplete S3 API support. Set to false for GCS which uses non-standard x-goog-bypass-governance-retention header and doesn't implement PutObjectLegalHold/GetObjectLegalHold. Add Multipart and Presigned subtests to Object Lock integration tests to cover all three upload paths. Fixes #9199 --- backend/s3/provider/GCS.yaml | 4 +++ backend/s3/providers.go | 1 + backend/s3/s3.go | 45 ++++++++++++++++++++++++++++- backend/s3/s3_internal_test.go | 52 ++++++++++++++++++++++++++++++++++ docs/content/s3.md | 7 ++--- 5 files changed, 104 insertions(+), 5 deletions(-) diff --git a/backend/s3/provider/GCS.yaml b/backend/s3/provider/GCS.yaml index 4ece3251d..28a29de27 100644 --- a/backend/s3/provider/GCS.yaml +++ b/backend/s3/provider/GCS.yaml @@ -18,3 +18,7 @@ quirks: # See: https://issuetracker.google.com/issues/323465186 # So make cutoff very large which it does seem to support copy_cutoff: 9223372036854775807 + # GCS S3 API doesn't fully support Object Lock: + # - Uses x-goog-bypass-governance-retention instead of x-amz-bypass-governance-retention + # - PutObjectLegalHold / GetObjectLegalHold not implemented + object_lock_supported: false diff --git a/backend/s3/providers.go b/backend/s3/providers.go index 4ff6f7696..898e6417e 100644 --- a/backend/s3/providers.go +++ b/backend/s3/providers.go @@ -33,6 +33,7 @@ type Quirks struct { UseXID *bool `yaml:"use_x_id,omitempty"` SignAcceptEncoding *bool `yaml:"sign_accept_encoding,omitempty"` EtagIsNotMD5 *bool `yaml:"etag_is_not_md5,omitempty"` + ObjectLockSupported *bool `yaml:"object_lock_supported,omitempty"` CopyCutoff *int64 `yaml:"copy_cutoff,omitempty"` MaxUploadParts *int `yaml:"max_upload_parts,omitempty"` MinChunkSize *int64 `yaml:"min_chunk_size,omitempty"` diff --git a/backend/s3/s3.go b/backend/s3/s3.go index 80866f63d..5603ad6a1 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -4,6 +4,7 @@ package s3 //go:generate go run gen_setfrom.go -o setfrom.go import ( + "bytes" "context" "crypto/md5" "crypto/tls" @@ -934,6 +935,18 @@ PutObjectRetention and PutObjectLegalHold API calls after the upload completes. This adds extra API calls per object, so only enable if your provider requires it.`, Default: false, Advanced: true, + }, { + Name: "object_lock_supported", + Help: `Whether the provider supports S3 Object Lock. + +This should be true, false or left unset to use the default for the provider. + +Set to false for providers that don't fully support the S3 Object Lock API +(e.g. GCS which uses non-standard headers for bypass governance retention +and doesn't implement Legal Hold via the S3 API). +`, + Default: fs.Tristate{}, + Advanced: true, }, }})) } @@ -1120,6 +1133,7 @@ type Options struct { BypassGovernanceRetention bool `config:"bypass_governance_retention"` BucketObjectLockEnabled bool `config:"bucket_object_lock_enabled"` ObjectLockSetAfterUpload bool `config:"object_lock_set_after_upload"` + ObjectLockSupported fs.Tristate `config:"object_lock_supported"` } // Fs represents a remote s3 server @@ -1726,6 +1740,7 @@ func setQuirks(opt *Options, provider *Provider) { set(&opt.UseUnsignedPayload, true, provider.Quirks.UseUnsignedPayload) set(&opt.UseXID, true, provider.Quirks.UseXID) set(&opt.SignAcceptEncoding, true, provider.Quirks.SignAcceptEncoding) + set(&opt.ObjectLockSupported, true, provider.Quirks.ObjectLockSupported) } // setRoot changes the root of the Fs @@ -4608,9 +4623,32 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R return wantETag, gotETag, versionID, s3cw.ui, nil } +// bufferForObjectLockMD5 buffers the body and computes Content-MD5 when +// Object Lock parameters are set on the request. AWS S3 requires Content-MD5 +// for PutObject with Object Lock params and cannot compute it automatically +// from a non-seekable io.Reader. +// See: https://github.com/aws/aws-sdk-go-v2/discussions/2960 +func bufferForObjectLockMD5(req *s3.PutObjectInput, in io.Reader) (io.Reader, error) { + if req.ObjectLockMode == "" && req.ObjectLockRetainUntilDate == nil && req.ObjectLockLegalHoldStatus == "" { + return in, nil + } + buf, err := io.ReadAll(in) + if err != nil { + return nil, fmt.Errorf("failed to read body for Content-MD5: %w", err) + } + md5sum := md5.Sum(buf) + md5base64 := base64.StdEncoding.EncodeToString(md5sum[:]) + req.ContentMD5 = &md5base64 + return bytes.NewReader(buf), nil +} + // Upload a single part using PutObject func (o *Object) uploadSinglepartPutObject(ctx context.Context, req *s3.PutObjectInput, size int64, in io.Reader) (etag string, lastModified time.Time, versionID *string, err error) { - req.Body = io.NopCloser(in) + in, err = bufferForObjectLockMD5(req, in) + if err != nil { + return etag, lastModified, nil, err + } + req.Body = in var options = []func(*s3.Options){} if o.fs.opt.UseUnsignedPayload.Value { options = append(options, s3.WithAPIOptions( @@ -4640,6 +4678,11 @@ func (o *Object) uploadSinglepartPutObject(ctx context.Context, req *s3.PutObjec // Upload a single part using a presigned request func (o *Object) uploadSinglepartPresignedRequest(ctx context.Context, req *s3.PutObjectInput, size int64, in io.Reader) (etag string, lastModified time.Time, versionID *string, err error) { + // Content-MD5 must be set before signing so it's included in the presigned URL. + in, err = bufferForObjectLockMD5(req, in) + if err != nil { + return etag, lastModified, nil, err + } // Create the presigned request putReq, err := s3.NewPresignClient(o.fs.c).PresignPutObject(ctx, req, s3.WithPresignExpires(15*time.Minute)) if err != nil { diff --git a/backend/s3/s3_internal_test.go b/backend/s3/s3_internal_test.go index c4e6637f5..ebb0ee80e 100644 --- a/backend/s3/s3_internal_test.go +++ b/backend/s3/s3_internal_test.go @@ -499,6 +499,9 @@ func (f *Fs) InternalTestVersions(t *testing.T) { } func (f *Fs) InternalTestObjectLock(t *testing.T) { + if !f.opt.ObjectLockSupported.Value { + t.Skip("Object Lock not supported by this provider (quirk object_lock_supported = false)") + } ctx := context.Background() // Create a temporary bucket with Object Lock enabled to test on. @@ -721,6 +724,55 @@ func (f *Fs) InternalTestObjectLock(t *testing.T) { assert.WithinDuration(t, retainUntilDate, gotRetainDate, time.Second) assert.Equal(t, "ON", gotMetadata["object-lock-legal-hold-status"]) }) + + t.Run("Multipart", func(t *testing.T) { + // Force multipart upload by setting a very low cutoff + oldCutoff := f.opt.UploadCutoff + f.opt.UploadCutoff = fs.SizeSuffix(1) + f.opt.ObjectLockMode = "GOVERNANCE" + f.opt.ObjectLockRetainUntilDate = retainUntilDate.Format(time.RFC3339) + defer func() { + f.opt.UploadCutoff = oldCutoff + f.opt.ObjectLockMode = "" + f.opt.ObjectLockRetainUntilDate = "" + }() + + contents := random.String(100) + item := fstest.NewItem("test-object-lock-multipart", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + obj := fstests.PutTestContents(ctx, t, f, &item, contents, true) + defer func() { + removeLocked(t, obj) + }() + + o := obj.(*Object) + gotMetadata, err := o.Metadata(ctx) + require.NoError(t, err) + assert.Equal(t, "GOVERNANCE", gotMetadata["object-lock-mode"]) + }) + + t.Run("Presigned", func(t *testing.T) { + // Use presigned request upload path + f.opt.UsePresignedRequest = true + f.opt.ObjectLockMode = "GOVERNANCE" + f.opt.ObjectLockRetainUntilDate = retainUntilDate.Format(time.RFC3339) + defer func() { + f.opt.UsePresignedRequest = false + f.opt.ObjectLockMode = "" + f.opt.ObjectLockRetainUntilDate = "" + }() + + contents := random.String(100) + item := fstest.NewItem("test-object-lock-presigned", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + obj := fstests.PutTestContents(ctx, t, f, &item, contents, true) + defer func() { + removeLocked(t, obj) + }() + + o := obj.(*Object) + gotMetadata, err := o.Metadata(ctx) + require.NoError(t, err) + assert.Equal(t, "GOVERNANCE", gotMetadata["object-lock-mode"]) + }) } func (f *Fs) InternalTest(t *testing.T) { diff --git a/docs/content/s3.md b/docs/content/s3.md index ef085e99d..836509d95 100644 --- a/docs/content/s3.md +++ b/docs/content/s3.md @@ -913,10 +913,9 @@ According to AWS's [documentation on S3 Object Lock](https://docs.aws.amazon.com > If you configure a default retention period on a bucket, requests to upload objects in such a bucket must include the Content-MD5 header. -As mentioned in the [Modification times and hashes](#modification-times-and-hashes) -section, small files that are not uploaded as multipart, use a different tag, causing -the upload to fail. A simple solution is to set the `--s3-upload-cutoff 0` and force -all the files to be uploaded as multipart. +Rclone automatically computes and includes the Content-MD5 header when +uploading objects with Object Lock parameters set. No special +configuration is needed. #### Setting Object Lock retention