diff --git a/repo/content/committed_content_index_disk_cache.go b/repo/content/committed_content_index_disk_cache.go index f1a0a8a36..5a09b916b 100644 --- a/repo/content/committed_content_index_disk_cache.go +++ b/repo/content/committed_content_index_disk_cache.go @@ -7,7 +7,6 @@ "strings" "time" - "github.com/edsrzf/mmap-go" "github.com/pkg/errors" "github.com/kopia/kopia/internal/blobparam" @@ -52,54 +51,6 @@ func (c *diskCommittedContentIndexCache) openIndex(ctx context.Context, indexBlo return ndx, nil } -// mmapOpenWithRetry attempts mmap.Open() with exponential back-off to work around rare issue specific to Windows where -// we can't open the file right after it has been written. -func (c *diskCommittedContentIndexCache) mmapOpenWithRetry(ctx context.Context, path string) (mmap.MMap, func() error, error) { - const ( - maxRetries = 8 - startingDelay = 10 * time.Millisecond - ) - - // retry milliseconds: 10, 20, 40, 80, 160, 320, 640, 1280, total ~2.5s - f, err := os.Open(path) //nolint:gosec - nextDelay := startingDelay - - retryCount := 0 - for err != nil && retryCount < maxRetries { - retryCount++ - contentlog.Log2(ctx, c.log, "retry #%v unable to mmap.Open()", - logparam.Int("retryCount", retryCount), - logparam.Error("err", err)) - time.Sleep(nextDelay) - nextDelay *= 2 - - f, err = os.Open(path) //nolint:gosec - } - - if err != nil { - return nil, nil, errors.Wrap(err, "unable to open file despite retries") - } - - mm, err := mmap.Map(f, mmap.RDONLY, 0) - if err != nil { - f.Close() //nolint:errcheck - - return nil, nil, errors.Wrap(err, "mmap error") - } - - return mm, func() error { - if err2 := mm.Unmap(); err2 != nil { - return errors.Wrapf(err2, "error unmapping index %v", path) - } - - if err2 := f.Close(); err2 != nil { - return errors.Wrapf(err2, "error closing index %v", path) - } - - return nil - }, nil -} - func (c *diskCommittedContentIndexCache) hasIndexBlobID(_ context.Context, indexBlobID blob.ID) (bool, error) { _, err := os.Stat(c.indexBlobPath(indexBlobID)) if err == nil { diff --git a/repo/content/committed_content_index_disk_cache_unix.go b/repo/content/committed_content_index_disk_cache_unix.go new file mode 100644 index 000000000..700f057b2 --- /dev/null +++ b/repo/content/committed_content_index_disk_cache_unix.go @@ -0,0 +1,49 @@ +//go:build !windows + +package content + +import ( + "context" + "os" + + "github.com/edsrzf/mmap-go" + "github.com/pkg/errors" +) + +// Unix semantics: Close the file descriptor immediately after a successful mmap so the +// process does not retain FDs for all mapped index files. The mapping remains valid until +// Unmap is called. +func (c *diskCommittedContentIndexCache) mmapOpenWithRetry(_ context.Context, path string) (mmap.MMap, func() error, error) { + f, err := os.Open(path) //nolint:gosec + if err != nil { + return nil, nil, errors.Wrap(err, "unable to open file despite retries") + } + + mm, err := mmap.Map(f, mmap.RDONLY, 0) + if err != nil { + _ = f.Close() + return nil, nil, errors.Wrap(err, "mmap error") + } + + // On Unix, it's safe to close the FD now; the mapping remains valid. + if err := f.Close(); err != nil { + // If close fails, still return mapping, but report error on closer to surface the issue later. + closeErr := errors.Wrapf(err, "error closing index %v after mmap", path) + + return mm, func() error { + if err2 := mm.Unmap(); err2 != nil { + return errors.Wrapf(err2, "error unmapping index %v (also had close error: %v)", path, closeErr) + } + + return closeErr + }, nil + } + + return mm, func() error { + if err2 := mm.Unmap(); err2 != nil { + return errors.Wrapf(err2, "error unmapping index %v", path) + } + + return nil + }, nil +} diff --git a/repo/content/committed_content_index_disk_cache_windows.go b/repo/content/committed_content_index_disk_cache_windows.go new file mode 100644 index 000000000..2a0337a2f --- /dev/null +++ b/repo/content/committed_content_index_disk_cache_windows.go @@ -0,0 +1,62 @@ +//go:build windows + +package content + +import ( + "context" + "os" + "time" + + "github.com/edsrzf/mmap-go" + "github.com/pkg/errors" + + "github.com/kopia/kopia/internal/contentlog" + "github.com/kopia/kopia/internal/contentlog/logparam" +) + +// mmapOpenWithRetry attempts mmap.Open() with exponential back-off to work around a rare issue +// where Windows can't open the file right after it has been written. +// +// Windows semantics: keep the file descriptor open until Unmap due to OS requirements. +func (c *diskCommittedContentIndexCache) mmapOpenWithRetry(ctx context.Context, path string) (mmap.MMap, func() error, error) { + const ( + maxRetries = 8 + startingDelay = 10 * time.Millisecond + ) + + // retry milliseconds: 10, 20, 40, 80, 160, 320, 640, 1280, total ~2.5s + f, err := os.Open(path) //nolint:gosec + nextDelay := startingDelay + + retryCount := 0 + for err != nil && retryCount < maxRetries { + retryCount++ + contentlog.Log2(ctx, c.log, "retry unable to mmap.Open()", + logparam.Int("retryCount", retryCount), + logparam.Error("err", err)) + time.Sleep(nextDelay) + nextDelay *= 2 + + f, err = os.Open(path) //nolint:gosec + } + + if err != nil { + return nil, nil, errors.Wrap(err, "unable to open file despite retries") + } + + mm, err := mmap.Map(f, mmap.RDONLY, 0) + if err != nil { + _ = f.Close() + return nil, nil, errors.Wrap(err, "mmap error") + } + + return mm, func() error { + if err2 := mm.Unmap(); err2 != nil { + return errors.Wrapf(err2, "error unmapping index %v", path) + } + if err2 := f.Close(); err2 != nil { + return errors.Wrapf(err2, "error closing index %v", path) + } + return nil + }, nil +} diff --git a/repo/content/committed_content_index_fd_linux_test.go b/repo/content/committed_content_index_fd_linux_test.go new file mode 100644 index 000000000..14fd8d4bf --- /dev/null +++ b/repo/content/committed_content_index_fd_linux_test.go @@ -0,0 +1,79 @@ +//go:build linux + +package content + +import ( + "fmt" + "os" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/kopia/kopia/internal/faketime" + "github.com/kopia/kopia/internal/repodiag" + "github.com/kopia/kopia/internal/testlogging" + "github.com/kopia/kopia/internal/testutil" + "github.com/kopia/kopia/repo/blob" + "github.com/kopia/kopia/repo/content/index" +) + +// countFDsLinux returns the number of open file descriptors for the current process on Linux. +func countFDsLinux(t *testing.T) int { + t.Helper() + + entries, err := os.ReadDir("/proc/self/fd") + require.NoError(t, err, "unable to read /proc/self/fd") + + return len(entries) +} + +// Test that opening many indexes on Linux does not retain a file descriptor per index. +func TestCommittedContentIndexCache_Disk_FDsNotGrowingOnOpen_Linux(t *testing.T) { + // Do not run in parallel to avoid fd count noise from other tests. + var lm *repodiag.LogManager + + ctx := testlogging.Context(t) + ft := faketime.NewClockTimeWithOffset(0) + cache := &diskCommittedContentIndexCache{ + testutil.TempDirectory(t), + ft.NowFunc(), + func() int { return 3 }, + lm.NewLogger("test"), + DefaultIndexCacheSweepAge, + } + + const indexCount = 200 + + // Prepare N small indexes in the cache directory. + for i := range indexCount { + b := index.Builder{ + mustParseID(t, fmt.Sprintf("c%03d", i)): Info{PackBlobID: blob.ID(fmt.Sprintf("p%03d", i)), ContentID: mustParseID(t, fmt.Sprintf("c%03d", i))}, + } + require.NoError(t, cache.addContentToCache(ctx, blob.ID(fmt.Sprintf("ndx%03d", i)), mustBuildIndex(t, b))) + } + + before := countFDsLinux(t) + + var opened []index.Index + + // Open all indexes and keep them open to maximize pressure. + for i := range indexCount { + ndx, err := cache.openIndex(ctx, blob.ID(fmt.Sprintf("ndx%03d", i))) + require.NoError(t, err) + + opened = append(opened, ndx) + } + + after := countFDsLinux(t) + + // Despite keeping many mappings alive, the FD count should not grow proportionally. + // Allow some slack for incidental FDs opened by runtime or test harness. + const maxDelta = 32 + + require.LessOrEqualf(t, after-before, maxDelta, "fd count grew too much after opening %d indexes", indexCount) + + // Cleanup + for _, ndx := range opened { + require.NoError(t, ndx.Close()) + } +}