mirror of
https://github.com/rclone/rclone.git
synced 2026-04-05 08:04:34 -04:00
s3: fix Content-MD5 for Object Lock uploads and add GCS quirk
AWS S3 requires Content-MD5 for PutObject with Object Lock parameters. Since rclone passes a non-seekable io.Reader, the SDK cannot compute checksums automatically. Buffer the body and compute MD5 manually for singlepart PutObject and presigned request uploads when Object Lock parameters are set. Multipart uploads are unaffected as Object Lock headers go on CreateMultipartUpload which has no body. Add object_lock_supported provider quirk (default true) to allow skipping Object Lock integration tests on providers with incomplete S3 API support. Set to false for GCS which uses non-standard x-goog-bypass-governance-retention header and doesn't implement PutObjectLegalHold/GetObjectLegalHold. Add Multipart and Presigned subtests to Object Lock integration tests to cover all three upload paths. Fixes #9199
This commit is contained in:
@@ -18,3 +18,7 @@ quirks:
|
||||
# See: https://issuetracker.google.com/issues/323465186
|
||||
# So make cutoff very large which it does seem to support
|
||||
copy_cutoff: 9223372036854775807
|
||||
# GCS S3 API doesn't fully support Object Lock:
|
||||
# - Uses x-goog-bypass-governance-retention instead of x-amz-bypass-governance-retention
|
||||
# - PutObjectLegalHold / GetObjectLegalHold not implemented
|
||||
object_lock_supported: false
|
||||
|
||||
@@ -33,6 +33,7 @@ type Quirks struct {
|
||||
UseXID *bool `yaml:"use_x_id,omitempty"`
|
||||
SignAcceptEncoding *bool `yaml:"sign_accept_encoding,omitempty"`
|
||||
EtagIsNotMD5 *bool `yaml:"etag_is_not_md5,omitempty"`
|
||||
ObjectLockSupported *bool `yaml:"object_lock_supported,omitempty"`
|
||||
CopyCutoff *int64 `yaml:"copy_cutoff,omitempty"`
|
||||
MaxUploadParts *int `yaml:"max_upload_parts,omitempty"`
|
||||
MinChunkSize *int64 `yaml:"min_chunk_size,omitempty"`
|
||||
|
||||
@@ -4,6 +4,7 @@ package s3
|
||||
//go:generate go run gen_setfrom.go -o setfrom.go
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/md5"
|
||||
"crypto/tls"
|
||||
@@ -934,6 +935,18 @@ PutObjectRetention and PutObjectLegalHold API calls after the upload completes.
|
||||
This adds extra API calls per object, so only enable if your provider requires it.`,
|
||||
Default: false,
|
||||
Advanced: true,
|
||||
}, {
|
||||
Name: "object_lock_supported",
|
||||
Help: `Whether the provider supports S3 Object Lock.
|
||||
|
||||
This should be true, false or left unset to use the default for the provider.
|
||||
|
||||
Set to false for providers that don't fully support the S3 Object Lock API
|
||||
(e.g. GCS which uses non-standard headers for bypass governance retention
|
||||
and doesn't implement Legal Hold via the S3 API).
|
||||
`,
|
||||
Default: fs.Tristate{},
|
||||
Advanced: true,
|
||||
},
|
||||
}}))
|
||||
}
|
||||
@@ -1120,6 +1133,7 @@ type Options struct {
|
||||
BypassGovernanceRetention bool `config:"bypass_governance_retention"`
|
||||
BucketObjectLockEnabled bool `config:"bucket_object_lock_enabled"`
|
||||
ObjectLockSetAfterUpload bool `config:"object_lock_set_after_upload"`
|
||||
ObjectLockSupported fs.Tristate `config:"object_lock_supported"`
|
||||
}
|
||||
|
||||
// Fs represents a remote s3 server
|
||||
@@ -1726,6 +1740,7 @@ func setQuirks(opt *Options, provider *Provider) {
|
||||
set(&opt.UseUnsignedPayload, true, provider.Quirks.UseUnsignedPayload)
|
||||
set(&opt.UseXID, true, provider.Quirks.UseXID)
|
||||
set(&opt.SignAcceptEncoding, true, provider.Quirks.SignAcceptEncoding)
|
||||
set(&opt.ObjectLockSupported, true, provider.Quirks.ObjectLockSupported)
|
||||
}
|
||||
|
||||
// setRoot changes the root of the Fs
|
||||
@@ -4608,9 +4623,32 @@ func (o *Object) uploadMultipart(ctx context.Context, src fs.ObjectInfo, in io.R
|
||||
return wantETag, gotETag, versionID, s3cw.ui, nil
|
||||
}
|
||||
|
||||
// bufferForObjectLockMD5 buffers the body and computes Content-MD5 when
|
||||
// Object Lock parameters are set on the request. AWS S3 requires Content-MD5
|
||||
// for PutObject with Object Lock params and cannot compute it automatically
|
||||
// from a non-seekable io.Reader.
|
||||
// See: https://github.com/aws/aws-sdk-go-v2/discussions/2960
|
||||
func bufferForObjectLockMD5(req *s3.PutObjectInput, in io.Reader) (io.Reader, error) {
|
||||
if req.ObjectLockMode == "" && req.ObjectLockRetainUntilDate == nil && req.ObjectLockLegalHoldStatus == "" {
|
||||
return in, nil
|
||||
}
|
||||
buf, err := io.ReadAll(in)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read body for Content-MD5: %w", err)
|
||||
}
|
||||
md5sum := md5.Sum(buf)
|
||||
md5base64 := base64.StdEncoding.EncodeToString(md5sum[:])
|
||||
req.ContentMD5 = &md5base64
|
||||
return bytes.NewReader(buf), nil
|
||||
}
|
||||
|
||||
// Upload a single part using PutObject
|
||||
func (o *Object) uploadSinglepartPutObject(ctx context.Context, req *s3.PutObjectInput, size int64, in io.Reader) (etag string, lastModified time.Time, versionID *string, err error) {
|
||||
req.Body = io.NopCloser(in)
|
||||
in, err = bufferForObjectLockMD5(req, in)
|
||||
if err != nil {
|
||||
return etag, lastModified, nil, err
|
||||
}
|
||||
req.Body = in
|
||||
var options = []func(*s3.Options){}
|
||||
if o.fs.opt.UseUnsignedPayload.Value {
|
||||
options = append(options, s3.WithAPIOptions(
|
||||
@@ -4640,6 +4678,11 @@ func (o *Object) uploadSinglepartPutObject(ctx context.Context, req *s3.PutObjec
|
||||
|
||||
// Upload a single part using a presigned request
|
||||
func (o *Object) uploadSinglepartPresignedRequest(ctx context.Context, req *s3.PutObjectInput, size int64, in io.Reader) (etag string, lastModified time.Time, versionID *string, err error) {
|
||||
// Content-MD5 must be set before signing so it's included in the presigned URL.
|
||||
in, err = bufferForObjectLockMD5(req, in)
|
||||
if err != nil {
|
||||
return etag, lastModified, nil, err
|
||||
}
|
||||
// Create the presigned request
|
||||
putReq, err := s3.NewPresignClient(o.fs.c).PresignPutObject(ctx, req, s3.WithPresignExpires(15*time.Minute))
|
||||
if err != nil {
|
||||
|
||||
@@ -499,6 +499,9 @@ func (f *Fs) InternalTestVersions(t *testing.T) {
|
||||
}
|
||||
|
||||
func (f *Fs) InternalTestObjectLock(t *testing.T) {
|
||||
if !f.opt.ObjectLockSupported.Value {
|
||||
t.Skip("Object Lock not supported by this provider (quirk object_lock_supported = false)")
|
||||
}
|
||||
ctx := context.Background()
|
||||
|
||||
// Create a temporary bucket with Object Lock enabled to test on.
|
||||
@@ -721,6 +724,55 @@ func (f *Fs) InternalTestObjectLock(t *testing.T) {
|
||||
assert.WithinDuration(t, retainUntilDate, gotRetainDate, time.Second)
|
||||
assert.Equal(t, "ON", gotMetadata["object-lock-legal-hold-status"])
|
||||
})
|
||||
|
||||
t.Run("Multipart", func(t *testing.T) {
|
||||
// Force multipart upload by setting a very low cutoff
|
||||
oldCutoff := f.opt.UploadCutoff
|
||||
f.opt.UploadCutoff = fs.SizeSuffix(1)
|
||||
f.opt.ObjectLockMode = "GOVERNANCE"
|
||||
f.opt.ObjectLockRetainUntilDate = retainUntilDate.Format(time.RFC3339)
|
||||
defer func() {
|
||||
f.opt.UploadCutoff = oldCutoff
|
||||
f.opt.ObjectLockMode = ""
|
||||
f.opt.ObjectLockRetainUntilDate = ""
|
||||
}()
|
||||
|
||||
contents := random.String(100)
|
||||
item := fstest.NewItem("test-object-lock-multipart", contents, fstest.Time("2001-05-06T04:05:06.499999999Z"))
|
||||
obj := fstests.PutTestContents(ctx, t, f, &item, contents, true)
|
||||
defer func() {
|
||||
removeLocked(t, obj)
|
||||
}()
|
||||
|
||||
o := obj.(*Object)
|
||||
gotMetadata, err := o.Metadata(ctx)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "GOVERNANCE", gotMetadata["object-lock-mode"])
|
||||
})
|
||||
|
||||
t.Run("Presigned", func(t *testing.T) {
|
||||
// Use presigned request upload path
|
||||
f.opt.UsePresignedRequest = true
|
||||
f.opt.ObjectLockMode = "GOVERNANCE"
|
||||
f.opt.ObjectLockRetainUntilDate = retainUntilDate.Format(time.RFC3339)
|
||||
defer func() {
|
||||
f.opt.UsePresignedRequest = false
|
||||
f.opt.ObjectLockMode = ""
|
||||
f.opt.ObjectLockRetainUntilDate = ""
|
||||
}()
|
||||
|
||||
contents := random.String(100)
|
||||
item := fstest.NewItem("test-object-lock-presigned", contents, fstest.Time("2001-05-06T04:05:06.499999999Z"))
|
||||
obj := fstests.PutTestContents(ctx, t, f, &item, contents, true)
|
||||
defer func() {
|
||||
removeLocked(t, obj)
|
||||
}()
|
||||
|
||||
o := obj.(*Object)
|
||||
gotMetadata, err := o.Metadata(ctx)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "GOVERNANCE", gotMetadata["object-lock-mode"])
|
||||
})
|
||||
}
|
||||
|
||||
func (f *Fs) InternalTest(t *testing.T) {
|
||||
|
||||
@@ -913,10 +913,9 @@ According to AWS's [documentation on S3 Object Lock](https://docs.aws.amazon.com
|
||||
> If you configure a default retention period on a bucket, requests to upload
|
||||
objects in such a bucket must include the Content-MD5 header.
|
||||
|
||||
As mentioned in the [Modification times and hashes](#modification-times-and-hashes)
|
||||
section, small files that are not uploaded as multipart, use a different tag, causing
|
||||
the upload to fail. A simple solution is to set the `--s3-upload-cutoff 0` and force
|
||||
all the files to be uploaded as multipart.
|
||||
Rclone automatically computes and includes the Content-MD5 header when
|
||||
uploading objects with Object Lock parameters set. No special
|
||||
configuration is needed.
|
||||
|
||||
#### Setting Object Lock retention
|
||||
|
||||
|
||||
Reference in New Issue
Block a user