mirror of
https://github.com/rclone/rclone.git
synced 2026-05-12 10:03:35 -04:00
http: don't list parent directory when pointing at a single file
When an HTTP URL points to a single file, listing the parent directory is unnecessary and may fail entirely on servers that disable directory listings but still serve HEAD/GET on the file. Remember the file name in the Fs and short-circuit List to return just that one object. See: https://forum.rclone.org/t/how-to-combine-on-the-fly-http-archive-remote-to-list-crc32s-in-a-http-hosted-zip/53761
This commit is contained in:
@@ -156,6 +156,7 @@ type Fs struct {
|
||||
endpoint *url.URL
|
||||
endpointURL string // endpoint as a string
|
||||
httpClient *http.Client
|
||||
fileName string // set if we are pointing to a file
|
||||
}
|
||||
|
||||
// Object is a remote object that has been stat'd (so it exists, but is not necessarily open for reading)
|
||||
@@ -297,6 +298,7 @@ func (f *Fs) httpConnection(ctx context.Context, opt *Options) (isFile bool, err
|
||||
|
||||
if isFile {
|
||||
// Correct root if definitely pointing to a file
|
||||
f.fileName = path.Base(f.root)
|
||||
f.root = path.Dir(f.root)
|
||||
if f.root == "." || f.root == "/" {
|
||||
f.root = ""
|
||||
@@ -564,6 +566,17 @@ func (f *Fs) readDir(ctx context.Context, dir string) (names []string, err error
|
||||
// This should return ErrDirNotFound if the directory isn't
|
||||
// found.
|
||||
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
|
||||
// pointed at a single file: only that file is visible
|
||||
if f.fileName != "" {
|
||||
if dir != "" {
|
||||
return nil, fs.ErrorDirNotFound
|
||||
}
|
||||
obj, err := f.NewObject(ctx, f.fileName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fs.DirEntries{obj}, nil
|
||||
}
|
||||
if !strings.HasSuffix(dir, "/") && dir != "" {
|
||||
dir += "/"
|
||||
}
|
||||
|
||||
@@ -331,7 +331,19 @@ func TestIsAFileRoot(t *testing.T) {
|
||||
f, err := NewFs(context.Background(), remoteName, "one%.txt", m)
|
||||
assert.Equal(t, err, fs.ErrorIsFile)
|
||||
|
||||
testListRoot(t, f, false)
|
||||
entries, err := f.List(context.Background(), "")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Equal(t, 1, len(entries))
|
||||
|
||||
e := entries[0]
|
||||
assert.Equal(t, "one%.txt", e.Remote())
|
||||
assert.Equal(t, int64(5+lineEndSize), e.Size())
|
||||
_, ok := e.(*Object)
|
||||
assert.True(t, ok)
|
||||
|
||||
_, err = f.List(context.Background(), "anysub")
|
||||
assert.Equal(t, fs.ErrorDirNotFound, err)
|
||||
}
|
||||
|
||||
func TestIsAFileSubDir(t *testing.T) {
|
||||
|
||||
@@ -114,6 +114,33 @@ rclone sync --interactive remote:directory /home/local/directory
|
||||
|
||||
This remote is read only - you can't upload files to an HTTP server.
|
||||
|
||||
### Servers without directory listings
|
||||
|
||||
Rclone normally needs the HTTP server to return a parseable directory
|
||||
listing in order to discover files. However if the path points
|
||||
directly at a single file (i.e. it does not end with `/` and the
|
||||
initial HEAD request reports it as a file), rclone will skip the
|
||||
parent directory listing entirely and access the file directly.
|
||||
|
||||
This means rclone can be used to download individual files on HTTP
|
||||
servers that have directory listings disabled, as long as you know the
|
||||
exact URL of each file. For example, given a server that serves
|
||||
`https://example.com/path/file.txt` but returns an error or an
|
||||
unparseable response for `https://example.com/path/`:
|
||||
|
||||
```console
|
||||
rclone copy --http-url https://example.com :http:path/file.txt /tmp/
|
||||
```
|
||||
|
||||
You can use this as a remote in other rclone commands too:
|
||||
|
||||
```console
|
||||
rclone hashsum crc32 --http-url "https://getsamplefiles.com" :archive::http:download/zip/sample-1.zip
|
||||
```
|
||||
|
||||
If you just want to download a file or multiple files by URL then
|
||||
using [copyurl](/commands/rclone_copyurl/) is more efficient.
|
||||
|
||||
### Modification times
|
||||
|
||||
Most HTTP servers store time accurate to 1 second.
|
||||
|
||||
Reference in New Issue
Block a user