http: don't list parent directory when pointing at a single file

When an HTTP URL points to a single file, listing the parent
directory is unnecessary and may fail entirely on servers that
disable directory listings but still serve HEAD/GET on the file.

Remember the file name in the Fs and short-circuit List to return
just that one object.

See: https://forum.rclone.org/t/how-to-combine-on-the-fly-http-archive-remote-to-list-crc32s-in-a-http-hosted-zip/53761
This commit is contained in:
Nick Craig-Wood
2026-05-09 10:10:33 +01:00
parent 0157a1fb1e
commit c00756810a
3 changed files with 53 additions and 1 deletions

View File

@@ -156,6 +156,7 @@ type Fs struct {
endpoint *url.URL
endpointURL string // endpoint as a string
httpClient *http.Client
fileName string // set if we are pointing to a file
}
// Object is a remote object that has been stat'd (so it exists, but is not necessarily open for reading)
@@ -297,6 +298,7 @@ func (f *Fs) httpConnection(ctx context.Context, opt *Options) (isFile bool, err
if isFile {
// Correct root if definitely pointing to a file
f.fileName = path.Base(f.root)
f.root = path.Dir(f.root)
if f.root == "." || f.root == "/" {
f.root = ""
@@ -564,6 +566,17 @@ func (f *Fs) readDir(ctx context.Context, dir string) (names []string, err error
// This should return ErrDirNotFound if the directory isn't
// found.
func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
// pointed at a single file: only that file is visible
if f.fileName != "" {
if dir != "" {
return nil, fs.ErrorDirNotFound
}
obj, err := f.NewObject(ctx, f.fileName)
if err != nil {
return nil, err
}
return fs.DirEntries{obj}, nil
}
if !strings.HasSuffix(dir, "/") && dir != "" {
dir += "/"
}

View File

@@ -331,7 +331,19 @@ func TestIsAFileRoot(t *testing.T) {
f, err := NewFs(context.Background(), remoteName, "one%.txt", m)
assert.Equal(t, err, fs.ErrorIsFile)
testListRoot(t, f, false)
entries, err := f.List(context.Background(), "")
require.NoError(t, err)
require.Equal(t, 1, len(entries))
e := entries[0]
assert.Equal(t, "one%.txt", e.Remote())
assert.Equal(t, int64(5+lineEndSize), e.Size())
_, ok := e.(*Object)
assert.True(t, ok)
_, err = f.List(context.Background(), "anysub")
assert.Equal(t, fs.ErrorDirNotFound, err)
}
func TestIsAFileSubDir(t *testing.T) {

View File

@@ -114,6 +114,33 @@ rclone sync --interactive remote:directory /home/local/directory
This remote is read only - you can't upload files to an HTTP server.
### Servers without directory listings
Rclone normally needs the HTTP server to return a parseable directory
listing in order to discover files. However if the path points
directly at a single file (i.e. it does not end with `/` and the
initial HEAD request reports it as a file), rclone will skip the
parent directory listing entirely and access the file directly.
This means rclone can be used to download individual files on HTTP
servers that have directory listings disabled, as long as you know the
exact URL of each file. For example, given a server that serves
`https://example.com/path/file.txt` but returns an error or an
unparseable response for `https://example.com/path/`:
```console
rclone copy --http-url https://example.com :http:path/file.txt /tmp/
```
You can use this as a remote in other rclone commands too:
```console
rclone hashsum crc32 --http-url "https://getsamplefiles.com" :archive::http:download/zip/sample-1.zip
```
If you just want to download a file or multiple files by URL then
using [copyurl](/commands/rclone_copyurl/) is more efficient.
### Modification times
Most HTTP servers store time accurate to 1 second.