diff --git a/backend/s3/provider/GCS.yaml b/backend/s3/provider/GCS.yaml index 4ece3251d..28a29de27 100644 --- a/backend/s3/provider/GCS.yaml +++ b/backend/s3/provider/GCS.yaml @@ -18,3 +18,7 @@ quirks: # See: https://issuetracker.google.com/issues/323465186 # So make cutoff very large which it does seem to support copy_cutoff: 9223372036854775807 + # GCS S3 API doesn't fully support Object Lock: + # - Uses x-goog-bypass-governance-retention instead of x-amz-bypass-governance-retention + # - PutObjectLegalHold / GetObjectLegalHold not implemented + object_lock_supported: false diff --git a/backend/s3/providers.go b/backend/s3/providers.go index 4ff6f7696..898e6417e 100644 --- a/backend/s3/providers.go +++ b/backend/s3/providers.go @@ -33,6 +33,7 @@ type Quirks struct { UseXID *bool `yaml:"use_x_id,omitempty"` SignAcceptEncoding *bool `yaml:"sign_accept_encoding,omitempty"` EtagIsNotMD5 *bool `yaml:"etag_is_not_md5,omitempty"` + ObjectLockSupported *bool `yaml:"object_lock_supported,omitempty"` CopyCutoff *int64 `yaml:"copy_cutoff,omitempty"` MaxUploadParts *int `yaml:"max_upload_parts,omitempty"` MinChunkSize *int64 `yaml:"min_chunk_size,omitempty"` diff --git a/backend/s3/s3.go b/backend/s3/s3.go index 80866f63d..5603ad6a1 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -4,6 +4,7 @@ package s3 //go:generate go run gen_setfrom.go -o setfrom.go import ( + "bytes" "context" "crypto/md5" "crypto/tls" @@ -934,6 +935,18 @@ PutObjectRetention and PutObjectLegalHold API calls after the upload completes. This adds extra API calls per object, so only enable if your provider requires it.`, Default: false, Advanced: true, + }, { + Name: "object_lock_supported", + Help: `Whether the provider supports S3 Object Lock. + +This should be true, false or left unset to use the default for the provider. + +Set to false for providers that don't fully support the S3 Object Lock API +(e.g. GCS which uses non-standard headers for bypass governance retention +and doesn't implement Legal Hold via the S3 API). +`, + Default: fs.Tristate{}, + Advanced: true, }, }})) } @@ -1120,6 +1133,7 @@ type Options struct { BypassGovernanceRetention bool `config:"bypass_governance_retention"` BucketObjectLockEnabled bool `config:"bucket_object_lock_enabled"` ObjectLockSetAfterUpload bool `config:"object_lock_set_after_upload"` + ObjectLockSupported fs.Tristate `config:"object_lock_supported"` } // Fs represents a remote s3 server @@ -1726,6 +1740,7 @@ func setQuirks(opt *Options, provider *Provider) { set(&opt.UseUnsignedPayload, true, provider.Quirks.UseUnsignedPayload) set(&opt.UseXID, true, provider.Quirks.UseXID) set(&opt.SignAcceptEncoding, true, provider.Quirks.SignAcceptEncoding) + set(&opt.ObjectLockSupported, true, provider.Quirks.ObjectLockSupported) } // setRoot changes the root of the Fs @@ -4608,9 +4623,32 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R return wantETag, gotETag, versionID, s3cw.ui, nil } +// bufferForObjectLockMD5 buffers the body and computes Content-MD5 when +// Object Lock parameters are set on the request. AWS S3 requires Content-MD5 +// for PutObject with Object Lock params and cannot compute it automatically +// from a non-seekable io.Reader. +// See: https://github.com/aws/aws-sdk-go-v2/discussions/2960 +func bufferForObjectLockMD5(req *s3.PutObjectInput, in io.Reader) (io.Reader, error) { + if req.ObjectLockMode == "" && req.ObjectLockRetainUntilDate == nil && req.ObjectLockLegalHoldStatus == "" { + return in, nil + } + buf, err := io.ReadAll(in) + if err != nil { + return nil, fmt.Errorf("failed to read body for Content-MD5: %w", err) + } + md5sum := md5.Sum(buf) + md5base64 := base64.StdEncoding.EncodeToString(md5sum[:]) + req.ContentMD5 = &md5base64 + return bytes.NewReader(buf), nil +} + // Upload a single part using PutObject func (o *Object) uploadSinglepartPutObject(ctx context.Context, req *s3.PutObjectInput, size int64, in io.Reader) (etag string, lastModified time.Time, versionID *string, err error) { - req.Body = io.NopCloser(in) + in, err = bufferForObjectLockMD5(req, in) + if err != nil { + return etag, lastModified, nil, err + } + req.Body = in var options = []func(*s3.Options){} if o.fs.opt.UseUnsignedPayload.Value { options = append(options, s3.WithAPIOptions( @@ -4640,6 +4678,11 @@ func (o *Object) uploadSinglepartPutObject(ctx context.Context, req *s3.PutObjec // Upload a single part using a presigned request func (o *Object) uploadSinglepartPresignedRequest(ctx context.Context, req *s3.PutObjectInput, size int64, in io.Reader) (etag string, lastModified time.Time, versionID *string, err error) { + // Content-MD5 must be set before signing so it's included in the presigned URL. + in, err = bufferForObjectLockMD5(req, in) + if err != nil { + return etag, lastModified, nil, err + } // Create the presigned request putReq, err := s3.NewPresignClient(o.fs.c).PresignPutObject(ctx, req, s3.WithPresignExpires(15*time.Minute)) if err != nil { diff --git a/backend/s3/s3_internal_test.go b/backend/s3/s3_internal_test.go index c4e6637f5..ebb0ee80e 100644 --- a/backend/s3/s3_internal_test.go +++ b/backend/s3/s3_internal_test.go @@ -499,6 +499,9 @@ func (f *Fs) InternalTestVersions(t *testing.T) { } func (f *Fs) InternalTestObjectLock(t *testing.T) { + if !f.opt.ObjectLockSupported.Value { + t.Skip("Object Lock not supported by this provider (quirk object_lock_supported = false)") + } ctx := context.Background() // Create a temporary bucket with Object Lock enabled to test on. @@ -721,6 +724,55 @@ func (f *Fs) InternalTestObjectLock(t *testing.T) { assert.WithinDuration(t, retainUntilDate, gotRetainDate, time.Second) assert.Equal(t, "ON", gotMetadata["object-lock-legal-hold-status"]) }) + + t.Run("Multipart", func(t *testing.T) { + // Force multipart upload by setting a very low cutoff + oldCutoff := f.opt.UploadCutoff + f.opt.UploadCutoff = fs.SizeSuffix(1) + f.opt.ObjectLockMode = "GOVERNANCE" + f.opt.ObjectLockRetainUntilDate = retainUntilDate.Format(time.RFC3339) + defer func() { + f.opt.UploadCutoff = oldCutoff + f.opt.ObjectLockMode = "" + f.opt.ObjectLockRetainUntilDate = "" + }() + + contents := random.String(100) + item := fstest.NewItem("test-object-lock-multipart", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + obj := fstests.PutTestContents(ctx, t, f, &item, contents, true) + defer func() { + removeLocked(t, obj) + }() + + o := obj.(*Object) + gotMetadata, err := o.Metadata(ctx) + require.NoError(t, err) + assert.Equal(t, "GOVERNANCE", gotMetadata["object-lock-mode"]) + }) + + t.Run("Presigned", func(t *testing.T) { + // Use presigned request upload path + f.opt.UsePresignedRequest = true + f.opt.ObjectLockMode = "GOVERNANCE" + f.opt.ObjectLockRetainUntilDate = retainUntilDate.Format(time.RFC3339) + defer func() { + f.opt.UsePresignedRequest = false + f.opt.ObjectLockMode = "" + f.opt.ObjectLockRetainUntilDate = "" + }() + + contents := random.String(100) + item := fstest.NewItem("test-object-lock-presigned", contents, fstest.Time("2001-05-06T04:05:06.499999999Z")) + obj := fstests.PutTestContents(ctx, t, f, &item, contents, true) + defer func() { + removeLocked(t, obj) + }() + + o := obj.(*Object) + gotMetadata, err := o.Metadata(ctx) + require.NoError(t, err) + assert.Equal(t, "GOVERNANCE", gotMetadata["object-lock-mode"]) + }) } func (f *Fs) InternalTest(t *testing.T) { diff --git a/docs/content/s3.md b/docs/content/s3.md index ef085e99d..836509d95 100644 --- a/docs/content/s3.md +++ b/docs/content/s3.md @@ -913,10 +913,9 @@ According to AWS's [documentation on S3 Object Lock](https://docs.aws.amazon.com > If you configure a default retention period on a bucket, requests to upload objects in such a bucket must include the Content-MD5 header. -As mentioned in the [Modification times and hashes](#modification-times-and-hashes) -section, small files that are not uploaded as multipart, use a different tag, causing -the upload to fail. A simple solution is to set the `--s3-upload-cutoff 0` and force -all the files to be uploaded as multipart. +Rclone automatically computes and includes the Content-MD5 header when +uploading objects with Object Lock parameters set. No special +configuration is needed. #### Setting Object Lock retention