Files
kopia/fs/virtualfs/virtualfs.go
Jarek Kowalski c8d1b221e2 refactor(repository): added fs.DirectoryIterator (#3365)
* refactor(repository): added fs.DirectoryIterator

This significantly reduces number of small allocations while
taking snapshots of lots of files, which leads to faster snapshots.

```
$ runbench --kopia-exe ~/go/bin/kopia \
   --compare-to-exe ~/go/bin/kopia-baseline --min-duration 30s \
   ./snapshot-linux-parallel-4.sh
DIFF duration: current:5.1 baseline:5.8 change:-13.0 %
DIFF repo_size: current:1081614127.6 baseline:1081615302.8 change:-0.0 %
DIFF num_files: current:60.0 baseline:60.0 change:0%
DIFF avg_heap_objects: current:4802666.0 baseline:4905741.8 change:-2.1 %
DIFF avg_heap_bytes: current:737397275.2 baseline:715263289.6 change:+3.1 %
DIFF avg_ram: current:215.0 baseline:211.5 change:+1.6 %
DIFF max_ram: current:294.8 baseline:311.4 change:-5.3 %
DIFF avg_cpu: current:167.3 baseline:145.3 change:+15.1 %
DIFF max_cpu: current:227.2 baseline:251.0 change:-9.5 %
```

* changed `Next()` API

* mechanical move of the iterator to its own file

* clarified comment

* pr feedback

* mechanical move of all localfs dependencies on os.FileInfo to a separate file

* Update fs/entry.go

Co-authored-by: ashmrtn <3891298+ashmrtn@users.noreply.github.com>

* Update fs/entry_dir_iterator.go

Co-authored-by: Julio Lopez <1953782+julio-lopez@users.noreply.github.com>

* doc: clarified valid results from Next()

---------

Co-authored-by: ashmrtn <3891298+ashmrtn@users.noreply.github.com>
Co-authored-by: Julio Lopez <1953782+julio-lopez@users.noreply.github.com>
2023-10-05 02:45:44 +00:00

196 lines
4.4 KiB
Go

// Package virtualfs implements an in-memory abstraction of fs.Directory and fs.StreamingFile.
package virtualfs
import (
"context"
"errors"
"io"
"os"
"sync"
"time"
"github.com/kopia/kopia/fs"
"github.com/kopia/kopia/internal/clock"
)
const (
defaultPermissions os.FileMode = 0o777
)
// virtualEntry is an in-memory implementation of a directory entry.
type virtualEntry struct {
name string
mode os.FileMode
size int64
modTime time.Time
owner fs.OwnerInfo
device fs.DeviceInfo
}
func (e *virtualEntry) Name() string {
return e.name
}
func (e *virtualEntry) IsDir() bool {
return e.mode.IsDir()
}
func (e *virtualEntry) Mode() os.FileMode {
return e.mode
}
func (e *virtualEntry) ModTime() time.Time {
return e.modTime
}
func (e *virtualEntry) Size() int64 {
return e.size
}
func (e *virtualEntry) Sys() interface{} {
return nil
}
func (e *virtualEntry) Owner() fs.OwnerInfo {
return e.owner
}
func (e *virtualEntry) Device() fs.DeviceInfo {
return e.device
}
func (e *virtualEntry) LocalFilesystemPath() string {
return ""
}
func (e *virtualEntry) Close() {
}
// staticDirectory is an in-memory implementation of fs.Directory.
type staticDirectory struct {
virtualEntry
entries []fs.Entry
}
// Child gets the named child of a directory.
func (sd *staticDirectory) Child(ctx context.Context, name string) (fs.Entry, error) {
//nolint:wrapcheck
return fs.IterateEntriesAndFindChild(ctx, sd, name)
}
func (sd *staticDirectory) Iterate(ctx context.Context) (fs.DirectoryIterator, error) {
return fs.StaticIterator(append([]fs.Entry{}, sd.entries...), nil), nil
}
func (sd *staticDirectory) SupportsMultipleIterations() bool {
return true
}
// NewStaticDirectory returns a virtual static directory.
func NewStaticDirectory(name string, entries []fs.Entry) fs.Directory {
return &staticDirectory{
virtualEntry: virtualEntry{
name: name,
mode: defaultPermissions | os.ModeDir,
},
entries: entries,
}
}
type streamingDirectory struct {
virtualEntry
mu sync.Mutex
// +checklocks:mu
iter fs.DirectoryIterator
}
var errChildNotSupported = errors.New("streamingDirectory.Child not supported")
func (sd *streamingDirectory) Child(ctx context.Context, _ string) (fs.Entry, error) {
return nil, errChildNotSupported
}
var errIteratorAlreadyUsed = errors.New("cannot use streaming directory iterator more than once") // +checklocksignore: mu
func (sd *streamingDirectory) Iterate(ctx context.Context) (fs.DirectoryIterator, error) {
sd.mu.Lock()
defer sd.mu.Unlock()
if sd.iter == nil {
return nil, errIteratorAlreadyUsed
}
it := sd.iter
sd.iter = nil
return it, nil
}
func (sd *streamingDirectory) SupportsMultipleIterations() bool {
return false
}
// NewStreamingDirectory returns a directory that will invoke the provided iterator
// on Iterate().
func NewStreamingDirectory(
name string,
iter fs.DirectoryIterator,
) fs.Directory {
return &streamingDirectory{
virtualEntry: virtualEntry{
name: name,
mode: defaultPermissions | os.ModeDir,
},
iter: iter,
}
}
// virtualFile is an implementation of fs.StreamingFile with an io.Reader.
type virtualFile struct {
virtualEntry
reader io.ReadCloser
}
var errReaderAlreadyUsed = errors.New("cannot use streaming file reader more than once")
// GetReader returns the streaming file's reader.
// Note: Caller of this function has to ensure concurrency safety.
// The file's reader is set to nil after the first call.
func (vf *virtualFile) GetReader(ctx context.Context) (io.ReadCloser, error) {
if vf.reader == nil {
return nil, errReaderAlreadyUsed
}
// reader must be fetched only once
ret := vf.reader
vf.reader = nil
return ret, nil
}
// StreamingFileFromReader returns a streaming file with given name and reader.
func StreamingFileFromReader(name string, reader io.ReadCloser) fs.StreamingFile {
return StreamingFileWithModTimeFromReader(name, clock.Now(), reader)
}
// StreamingFileWithModTimeFromReader returns a streaming file with given name, modified time, and reader.
func StreamingFileWithModTimeFromReader(name string, t time.Time, reader io.ReadCloser) fs.StreamingFile {
return &virtualFile{
virtualEntry: virtualEntry{
name: name,
mode: defaultPermissions,
modTime: t,
},
reader: reader,
}
}
var (
_ fs.Directory = &staticDirectory{}
_ fs.Directory = &streamingDirectory{}
_ fs.StreamingFile = &virtualFile{}
_ fs.Entry = &virtualEntry{}
)