diff --git a/cmd/copyurl/copyurl.go b/cmd/copyurl/copyurl.go index c15f4bed4..56e51ff03 100644 --- a/cmd/copyurl/copyurl.go +++ b/cmd/copyurl/copyurl.go @@ -13,7 +13,9 @@ import ( "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/config/flags" "github.com/rclone/rclone/fs/operations" + "github.com/rclone/rclone/lib/errcount" "github.com/spf13/cobra" + "golang.org/x/sync/errgroup" ) var ( @@ -33,7 +35,7 @@ func init() { flags.BoolVarP(cmdFlags, &printFilename, "print-filename", "p", printFilename, "Print the resulting name from --auto-filename", "") flags.BoolVarP(cmdFlags, &noClobber, "no-clobber", "", noClobber, "Prevent overwriting file with same name", "") flags.BoolVarP(cmdFlags, &stdout, "stdout", "", stdout, "Write the output to stdout rather than a file", "") - flags.BoolVarP(cmdFlags, &urls, "urls", "", stdout, "Use a .csv file of links to process multiple urls", "") + flags.BoolVarP(cmdFlags, &urls, "urls", "", stdout, "Use a CSV file of links to process multiple URLs", "") } var commandDefinition = &cobra.Command{ @@ -57,9 +59,16 @@ destination if there is one with the same name. Setting |--stdout| or making the output file name |-| will cause the output to be written to standard output. -Setting |--urls| allows you to input a .csv file of urls in format: -URL,FILENAME. If a filename is missing one will be autogenerated for the file, -and it will otherwise continue as normal. +Setting |--urls| allows you to input a CSV file of URLs in format: URL, +FILENAME. If |--urls| is in use then replace the URL in the arguments with the +file containing the URLs, e.g.: +|||sh +rclone copyurl --urls myurls.csv remote:dir +||| +Missing filenames will be autogenerated equivalent to using |--auto-filename|. +Note that |--stdout| and |--print-filename| are incompatible with |--urls|. +This will do |--transfers| copies in parallel. Note that if |--auto-filename| +is desired for all URLs then a file with only URLs and no filename can be used. ### Troubleshooting @@ -75,102 +84,95 @@ If you can't get |rclone copyurl| to work then here are some things you can try: "groups": "Important", }, RunE: func(command *cobra.Command, args []string) (err error) { - - //Check they gave all the args correctly cmd.CheckArgs(1, 2, command, args) - //Create list to store the list of URLS - var urlList [][]string - - if urls { - //Read the .csv file provided if --urls is enabled - File, error := os.Open(args[0]) - if error != nil { - return errors.New("failed to open .csv file") - } - reader := csv.NewReader(File) - reader.FieldsPerRecord = -1 - data, error := reader.ReadAll() - if error != nil { - return errors.New("failed reading .csv file") - } - //Save the list of urls to the urlList variable - urlList = data - - } else { - //If its the non multi-url command, save only that, without attempting to read a file - urlList = make([][]string, 1) - urlList[0] = args - } - - //Save per-file variables - dstFileNames := make([]string, len(urlList)) - fsdsts := make([]fs.Fs, len(urlList)) - autonames := make([]bool, len(urlList)) - - //Save the original input folder, prevents it being overwritten when adding onto the end - root := args[1] - - if !stdout { - if len(args) < 2 { - return errors.New("need 2 arguments if not using --stdout") - } - if args[1] == "-" { - stdout = true - } else { - - //Go over each of the URLs need transferring - for i := range urlList { - //Save autoFilenames - if autoFilename { - autonames[i] = true - } else if urls { - - //If we are using multiple URLs, and they dont provide a name for one, set it to autogenerate one for it. - //This is because itd suck having it cancel midway through if you forgot to add the name of one. - - autonames[i] = (len(urlList[i]) == 1 || urlList[i][1] == "") - } - - if autonames[i] { - //If it is autonaming, generate the name. - //Set args[1] to the root, resetting it to the main directory. (Clears filenames from it) - args[1] = root - fsdsts[i] = cmd.NewFsDir(args[1:]) - } else { - //If using multiple URLs, set it to the root and the name. - //Because again, args[1] gets appended to each round, so it has to be reset. - if len(urlList) > 1 { - args[1] = root + urlList[i][1] - } - //Otherwise your clear to go, as in the regular format, - //args[1] already contains the destination and filename for a single file - fsdsts[i], dstFileNames[i] = cmd.NewFsDstFile(args[1:]) - } - } - } - } cmd.Run(true, true, command, func() error { - //Go over each URL, to upload them - for i, url := range urlList { - //Genereate a new fs.Object for the file - var dst fs.Object - if stdout { - err = operations.CopyURLToWriter(context.Background(), url[0], os.Stdout) - } else { - //CopyURLs, with each individual link's information - dst, err = operations.CopyURL(context.Background(), fsdsts[i], dstFileNames[i], url[0], autonames[i], headerFilename, noClobber) - if printFilename && err == nil && dst != nil { - fmt.Println(dst.Remote()) - } - } - if err != nil { - println(err) - return err - } + if !urls { + return run(args) } - return nil + return runURLS(args) }) return nil }, } + +var copyURL = operations.CopyURL // for testing + +// runURLS processes a .csv file of urls and filenames +func runURLS(args []string) (err error) { + if stdout { + return errors.New("can't use --stdout with --urls") + } + if printFilename { + return errors.New("can't use --print-filename with --urls") + } + dstFs := cmd.NewFsDir(args[1:]) + + f, err := os.Open(args[0]) + if err != nil { + return fmt.Errorf("failed to open .csv file: %w", err) + } + defer fs.CheckClose(f, &err) + reader := csv.NewReader(f) + reader.FieldsPerRecord = -1 + urlList, err := reader.ReadAll() + if err != nil { + return fmt.Errorf("failed reading .csv file: %w", err) + } + + ec := errcount.New() + g, gCtx := errgroup.WithContext(context.Background()) + ci := fs.GetConfig(gCtx) + g.SetLimit(ci.Transfers) + + for _, urlEntry := range urlList { + if len(urlEntry) == 0 { + continue + } + g.Go(func() error { + url := urlEntry[0] + var filename string + if len(urlEntry) > 1 { + filename = urlEntry[1] + } + _, err := copyURL(gCtx, dstFs, filename, url, filename == "", headerFilename, noClobber) + if err != nil { + fs.Errorf(filename, "failed to copy URL %q: %v", url, err) + ec.Add(err) + } + return nil + }) + } + ec.Add(g.Wait()) + return ec.Err("not all URLs copied successfully") +} + +// run runs the command for a single URL +func run(args []string) error { + var err error + var dstFileName string + var fsdst fs.Fs + if !stdout { + if len(args) < 2 { + return errors.New("need 2 arguments if not using --stdout") + } + if args[1] == "-" { + stdout = true + } else if autoFilename { + fsdst = cmd.NewFsDir(args[1:]) + } else { + fsdst, dstFileName = cmd.NewFsDstFile(args[1:]) + } + } + + var dst fs.Object + if stdout { + err = operations.CopyURLToWriter(context.Background(), args[0], os.Stdout) + } else { + dst, err = copyURL(context.Background(), fsdst, dstFileName, args[0], autoFilename, headerFilename, noClobber) + if printFilename && err == nil && dst != nil { + fmt.Println(dst.Remote()) + } + } + return err +} diff --git a/cmd/copyurl/copyurl_test.go b/cmd/copyurl/copyurl_test.go new file mode 100644 index 000000000..1c4e32234 --- /dev/null +++ b/cmd/copyurl/copyurl_test.go @@ -0,0 +1,157 @@ +package copyurl + +import ( + "context" + "errors" + "os" + "path/filepath" + "sync" + "sync/atomic" + "testing" + + _ "github.com/rclone/rclone/backend/local" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/operations" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func resetGlobals() { + autoFilename = false + headerFilename = false + printFilename = false + stdout = false + noClobber = false + urls = false + copyURL = operations.CopyURL +} + +func TestRun_RequiresTwoArgsWhenNotStdout(t *testing.T) { + t.Cleanup(resetGlobals) + resetGlobals() + + err := run([]string{"https://example.com/foo"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "need 2 arguments if not using --stdout") +} + +func TestRun_CallsCopyURL_WithExplicitFilename_Success(t *testing.T) { + t.Cleanup(resetGlobals) + resetGlobals() + + tmp := t.TempDir() + dstPath := filepath.Join(tmp, "out.txt") + + var called int32 + + copyURL = func(_ctx context.Context, _dst fs.Fs, dstFileName, url string, auto, header, noclobber bool) (fs.Object, error) { + atomic.AddInt32(&called, 1) + assert.Equal(t, "https://example.com/file", url) + assert.Equal(t, "out.txt", dstFileName) + assert.False(t, auto) + assert.False(t, header) + assert.False(t, noclobber) + return nil, nil + } + + err := run([]string{"https://example.com/file", dstPath}) + require.NoError(t, err) + assert.Equal(t, int32(1), atomic.LoadInt32(&called)) +} + +func TestRun_CallsCopyURL_WithAutoFilename_AndPropagatesError(t *testing.T) { + t.Cleanup(resetGlobals) + resetGlobals() + + tmp := t.TempDir() + autoFilename = true + + want := errors.New("boom") + var called int32 + + copyURL = func(_ctx context.Context, _dst fs.Fs, dstFileName, url string, auto, header, noclobber bool) (fs.Object, error) { + atomic.AddInt32(&called, 1) + assert.Equal(t, "", dstFileName) // auto filename -> empty + assert.True(t, auto) + return nil, want + } + + err := run([]string{"https://example.com/auto/name", tmp}) + require.Error(t, err) + assert.Equal(t, want, err) + assert.Equal(t, int32(1), atomic.LoadInt32(&called)) +} + +func TestRunURLS_ErrorsWithStdoutAndWithPrintFilename(t *testing.T) { + t.Cleanup(resetGlobals) + resetGlobals() + + stdout = true + err := runURLS([]string{"dummy.csv", "destDir"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "can't use --stdout with --urls") + + resetGlobals() + printFilename = true + err = runURLS([]string{"dummy.csv", "destDir"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "can't use --print-filename with --urls") +} + +func TestRunURLS_ProcessesCSV_ParallelCalls_AndAggregatesError(t *testing.T) { + t.Cleanup(resetGlobals) + resetGlobals() + + tmp := t.TempDir() + csvPath := filepath.Join(tmp, "urls.csv") + csvContent := []byte( + "https://example.com/a,aaa.txt\n" + // success + "https://example.com/b\n" + // auto filename + "https://example.com/c,ccc.txt\n") // error + require.NoError(t, os.WriteFile(csvPath, csvContent, 0o600)) + + // destination dir (local backend) + dest := t.TempDir() + + // mock copyURL: succeed for /a and /b, fail for /c + + var calls int32 + var mu sync.Mutex + var seen []string + + copyURL = func(_ctx context.Context, _dst fs.Fs, dstFileName, url string, auto, header, noclobber bool) (fs.Object, error) { + atomic.AddInt32(&calls, 1) + mu.Lock() + seen = append(seen, url+"|"+dstFileName) + mu.Unlock() + + switch { + case url == "https://example.com/a": + require.Equal(t, "aaa.txt", dstFileName) + return nil, nil + case url == "https://example.com/b": + require.Equal(t, "", dstFileName) // auto-name path + return nil, nil + case url == "https://example.com/c": + return nil, errors.New("network down") + default: + return nil, nil + } + } + + err := runURLS([]string{csvPath, dest}) + require.Error(t, err) + assert.Contains(t, err.Error(), "not all URLs copied successfully") + // 3 lines => 3 calls + assert.Equal(t, int32(3), atomic.LoadInt32(&calls)) + + // sanity: all expected URLs were seen + assert.ElementsMatch(t, + []string{ + "https://example.com/a|aaa.txt", + "https://example.com/b|", + "https://example.com/c|ccc.txt", + }, + seen, + ) +}