mirror of
https://github.com/kopia/kopia.git
synced 2026-03-27 02:21:59 -04:00
feat(general): reduce number of open file descriptors for indexes on Unix (#4866)
Close file after mmap on Unix to reduce open file descriptors.
On Unix-like platforms, close the file descriptor immediately after a successful
mmap.Map of .sndx index cache files. This keeps the mapping valid until Unmap
(per POSIX semantics) and significantly reduces steady-state FD usage when many
indexes are open, helping avoid EMFILE ("too many open files").
- Split mmapOpenWithRetry into platform-specific implementations:
- committed_content_index_disk_cache_unix.go (!windows):
- Map RDONLY, close FD immediately.
- Return closer that only unmaps.
- committed_content_index_disk_cache_windows.go (windows):
- Keep FD open until Unmap.
- Return closer that unmaps and closes FD.
- Remove old mmapOpenWithRetry and mmap import from
repo/content/committed_content_index_disk_cache.go.
- Add Linux-only unit test verifying FD count does not grow proportionally:
repo/content/committed_content_index_fd_linux_test.go
- Creates N small indexes, opens them all, checks /proc/self/fd delta stays low.
Notes:
- Behavior unchanged on Windows due to OS semantics.
- Mapping failures close the FD to avoid leaks.
- Unlink semantics remain correct; mappings stay valid until Unmap.
* remove windows only retry logic under unix
This commit is contained in:
@@ -7,7 +7,6 @@
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/edsrzf/mmap-go"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/kopia/kopia/internal/blobparam"
|
||||
@@ -52,54 +51,6 @@ func (c *diskCommittedContentIndexCache) openIndex(ctx context.Context, indexBlo
|
||||
return ndx, nil
|
||||
}
|
||||
|
||||
// mmapOpenWithRetry attempts mmap.Open() with exponential back-off to work around rare issue specific to Windows where
|
||||
// we can't open the file right after it has been written.
|
||||
func (c *diskCommittedContentIndexCache) mmapOpenWithRetry(ctx context.Context, path string) (mmap.MMap, func() error, error) {
|
||||
const (
|
||||
maxRetries = 8
|
||||
startingDelay = 10 * time.Millisecond
|
||||
)
|
||||
|
||||
// retry milliseconds: 10, 20, 40, 80, 160, 320, 640, 1280, total ~2.5s
|
||||
f, err := os.Open(path) //nolint:gosec
|
||||
nextDelay := startingDelay
|
||||
|
||||
retryCount := 0
|
||||
for err != nil && retryCount < maxRetries {
|
||||
retryCount++
|
||||
contentlog.Log2(ctx, c.log, "retry #%v unable to mmap.Open()",
|
||||
logparam.Int("retryCount", retryCount),
|
||||
logparam.Error("err", err))
|
||||
time.Sleep(nextDelay)
|
||||
nextDelay *= 2
|
||||
|
||||
f, err = os.Open(path) //nolint:gosec
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, nil, errors.Wrap(err, "unable to open file despite retries")
|
||||
}
|
||||
|
||||
mm, err := mmap.Map(f, mmap.RDONLY, 0)
|
||||
if err != nil {
|
||||
f.Close() //nolint:errcheck
|
||||
|
||||
return nil, nil, errors.Wrap(err, "mmap error")
|
||||
}
|
||||
|
||||
return mm, func() error {
|
||||
if err2 := mm.Unmap(); err2 != nil {
|
||||
return errors.Wrapf(err2, "error unmapping index %v", path)
|
||||
}
|
||||
|
||||
if err2 := f.Close(); err2 != nil {
|
||||
return errors.Wrapf(err2, "error closing index %v", path)
|
||||
}
|
||||
|
||||
return nil
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *diskCommittedContentIndexCache) hasIndexBlobID(_ context.Context, indexBlobID blob.ID) (bool, error) {
|
||||
_, err := os.Stat(c.indexBlobPath(indexBlobID))
|
||||
if err == nil {
|
||||
|
||||
49
repo/content/committed_content_index_disk_cache_unix.go
Normal file
49
repo/content/committed_content_index_disk_cache_unix.go
Normal file
@@ -0,0 +1,49 @@
|
||||
//go:build !windows
|
||||
|
||||
package content
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"github.com/edsrzf/mmap-go"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// Unix semantics: Close the file descriptor immediately after a successful mmap so the
|
||||
// process does not retain FDs for all mapped index files. The mapping remains valid until
|
||||
// Unmap is called.
|
||||
func (c *diskCommittedContentIndexCache) mmapOpenWithRetry(_ context.Context, path string) (mmap.MMap, func() error, error) {
|
||||
f, err := os.Open(path) //nolint:gosec
|
||||
if err != nil {
|
||||
return nil, nil, errors.Wrap(err, "unable to open file despite retries")
|
||||
}
|
||||
|
||||
mm, err := mmap.Map(f, mmap.RDONLY, 0)
|
||||
if err != nil {
|
||||
_ = f.Close()
|
||||
return nil, nil, errors.Wrap(err, "mmap error")
|
||||
}
|
||||
|
||||
// On Unix, it's safe to close the FD now; the mapping remains valid.
|
||||
if err := f.Close(); err != nil {
|
||||
// If close fails, still return mapping, but report error on closer to surface the issue later.
|
||||
closeErr := errors.Wrapf(err, "error closing index %v after mmap", path)
|
||||
|
||||
return mm, func() error {
|
||||
if err2 := mm.Unmap(); err2 != nil {
|
||||
return errors.Wrapf(err2, "error unmapping index %v (also had close error: %v)", path, closeErr)
|
||||
}
|
||||
|
||||
return closeErr
|
||||
}, nil
|
||||
}
|
||||
|
||||
return mm, func() error {
|
||||
if err2 := mm.Unmap(); err2 != nil {
|
||||
return errors.Wrapf(err2, "error unmapping index %v", path)
|
||||
}
|
||||
|
||||
return nil
|
||||
}, nil
|
||||
}
|
||||
62
repo/content/committed_content_index_disk_cache_windows.go
Normal file
62
repo/content/committed_content_index_disk_cache_windows.go
Normal file
@@ -0,0 +1,62 @@
|
||||
//go:build windows
|
||||
|
||||
package content
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/edsrzf/mmap-go"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/kopia/kopia/internal/contentlog"
|
||||
"github.com/kopia/kopia/internal/contentlog/logparam"
|
||||
)
|
||||
|
||||
// mmapOpenWithRetry attempts mmap.Open() with exponential back-off to work around a rare issue
|
||||
// where Windows can't open the file right after it has been written.
|
||||
//
|
||||
// Windows semantics: keep the file descriptor open until Unmap due to OS requirements.
|
||||
func (c *diskCommittedContentIndexCache) mmapOpenWithRetry(ctx context.Context, path string) (mmap.MMap, func() error, error) {
|
||||
const (
|
||||
maxRetries = 8
|
||||
startingDelay = 10 * time.Millisecond
|
||||
)
|
||||
|
||||
// retry milliseconds: 10, 20, 40, 80, 160, 320, 640, 1280, total ~2.5s
|
||||
f, err := os.Open(path) //nolint:gosec
|
||||
nextDelay := startingDelay
|
||||
|
||||
retryCount := 0
|
||||
for err != nil && retryCount < maxRetries {
|
||||
retryCount++
|
||||
contentlog.Log2(ctx, c.log, "retry unable to mmap.Open()",
|
||||
logparam.Int("retryCount", retryCount),
|
||||
logparam.Error("err", err))
|
||||
time.Sleep(nextDelay)
|
||||
nextDelay *= 2
|
||||
|
||||
f, err = os.Open(path) //nolint:gosec
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, nil, errors.Wrap(err, "unable to open file despite retries")
|
||||
}
|
||||
|
||||
mm, err := mmap.Map(f, mmap.RDONLY, 0)
|
||||
if err != nil {
|
||||
_ = f.Close()
|
||||
return nil, nil, errors.Wrap(err, "mmap error")
|
||||
}
|
||||
|
||||
return mm, func() error {
|
||||
if err2 := mm.Unmap(); err2 != nil {
|
||||
return errors.Wrapf(err2, "error unmapping index %v", path)
|
||||
}
|
||||
if err2 := f.Close(); err2 != nil {
|
||||
return errors.Wrapf(err2, "error closing index %v", path)
|
||||
}
|
||||
return nil
|
||||
}, nil
|
||||
}
|
||||
79
repo/content/committed_content_index_fd_linux_test.go
Normal file
79
repo/content/committed_content_index_fd_linux_test.go
Normal file
@@ -0,0 +1,79 @@
|
||||
//go:build linux
|
||||
|
||||
package content
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/kopia/kopia/internal/faketime"
|
||||
"github.com/kopia/kopia/internal/repodiag"
|
||||
"github.com/kopia/kopia/internal/testlogging"
|
||||
"github.com/kopia/kopia/internal/testutil"
|
||||
"github.com/kopia/kopia/repo/blob"
|
||||
"github.com/kopia/kopia/repo/content/index"
|
||||
)
|
||||
|
||||
// countFDsLinux returns the number of open file descriptors for the current process on Linux.
|
||||
func countFDsLinux(t *testing.T) int {
|
||||
t.Helper()
|
||||
|
||||
entries, err := os.ReadDir("/proc/self/fd")
|
||||
require.NoError(t, err, "unable to read /proc/self/fd")
|
||||
|
||||
return len(entries)
|
||||
}
|
||||
|
||||
// Test that opening many indexes on Linux does not retain a file descriptor per index.
|
||||
func TestCommittedContentIndexCache_Disk_FDsNotGrowingOnOpen_Linux(t *testing.T) {
|
||||
// Do not run in parallel to avoid fd count noise from other tests.
|
||||
var lm *repodiag.LogManager
|
||||
|
||||
ctx := testlogging.Context(t)
|
||||
ft := faketime.NewClockTimeWithOffset(0)
|
||||
cache := &diskCommittedContentIndexCache{
|
||||
testutil.TempDirectory(t),
|
||||
ft.NowFunc(),
|
||||
func() int { return 3 },
|
||||
lm.NewLogger("test"),
|
||||
DefaultIndexCacheSweepAge,
|
||||
}
|
||||
|
||||
const indexCount = 200
|
||||
|
||||
// Prepare N small indexes in the cache directory.
|
||||
for i := range indexCount {
|
||||
b := index.Builder{
|
||||
mustParseID(t, fmt.Sprintf("c%03d", i)): Info{PackBlobID: blob.ID(fmt.Sprintf("p%03d", i)), ContentID: mustParseID(t, fmt.Sprintf("c%03d", i))},
|
||||
}
|
||||
require.NoError(t, cache.addContentToCache(ctx, blob.ID(fmt.Sprintf("ndx%03d", i)), mustBuildIndex(t, b)))
|
||||
}
|
||||
|
||||
before := countFDsLinux(t)
|
||||
|
||||
var opened []index.Index
|
||||
|
||||
// Open all indexes and keep them open to maximize pressure.
|
||||
for i := range indexCount {
|
||||
ndx, err := cache.openIndex(ctx, blob.ID(fmt.Sprintf("ndx%03d", i)))
|
||||
require.NoError(t, err)
|
||||
|
||||
opened = append(opened, ndx)
|
||||
}
|
||||
|
||||
after := countFDsLinux(t)
|
||||
|
||||
// Despite keeping many mappings alive, the FD count should not grow proportionally.
|
||||
// Allow some slack for incidental FDs opened by runtime or test harness.
|
||||
const maxDelta = 32
|
||||
|
||||
require.LessOrEqualf(t, after-before, maxDelta, "fd count grew too much after opening %d indexes", indexCount)
|
||||
|
||||
// Cleanup
|
||||
for _, ndx := range opened {
|
||||
require.NoError(t, ndx.Close())
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user