logging: added log rotation and improved predictability of log sweep (#1562)

* logging: added log rotation and improved predictability of log sweep

With this change logs will be rotated every 50 MB, which prevents
accumulation of giant files while server is running.

This change will also guarantee that log sweep completes at least once
before each invocation of Kopia finishes. Previously it was a goroutine
that was not monitored for completion.

Flags can be used to override default behaviors:

* `--max-log-file-segment-size`
* `--no-wait-for-log-sweep` - disables waiting for full log sweep

Fixes #1561

* logging: added --log-dir-max-total-size-mb flag

This limits the total size of all logs in a directory to 1 GB.
This commit is contained in:
Jarek Kowalski
2021-12-03 16:43:46 -08:00
committed by GitHub
parent 920341cb68
commit 2cb05f7501
3 changed files with 259 additions and 38 deletions

View File

@@ -122,7 +122,8 @@ type App struct {
disableInternalLog bool
AdvancedCommands string
currentAction string
currentAction string
onExitCallbacks []func()
// subcommands
blob commandBlob
@@ -170,6 +171,18 @@ func (c *App) SetLoggerFactory(loggerForModule logging.LoggerFactory) {
c.loggerFactory = loggerForModule
}
// RegisterOnExit registers the provided function to run before app exits.
func (c *App) RegisterOnExit(f func()) {
c.onExitCallbacks = append(c.onExitCallbacks, f)
}
// runOnExit runs all registered on-exit callbacks.
func (c *App) runOnExit() {
for _, f := range c.onExitCallbacks {
f()
}
}
func (c *App) passwordPersistenceStrategy() passwordpersist.Strategy {
if !c.persistCredentials {
return passwordpersist.None
@@ -405,7 +418,7 @@ func (c *App) maybeRepositoryAction(act func(ctx context.Context, rep repo.Repos
return func(kpc *kingpin.ParseContext) error {
ctx0 := c.rootContext()
if err := c.pf.withProfiling(func() error {
err := c.pf.withProfiling(func() error {
ctx, finishMemoryTracking := c.mt.startMemoryTracking(ctx0)
defer finishMemoryTracking()
@@ -461,7 +474,11 @@ func (c *App) maybeRepositoryAction(act func(ctx context.Context, rep repo.Repos
}
return err
}); err != nil {
})
c.runOnExit()
if err != nil {
// print error in red
log(ctx0).Errorf("%v", err.Error())
c.osExit(1)

View File

@@ -14,6 +14,7 @@
"github.com/alecthomas/kingpin"
"github.com/fatih/color"
"github.com/pkg/errors"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
@@ -30,21 +31,25 @@
var logLevels = []string{"debug", "info", "warning", "error"}
type loggingFlags struct {
logFile string
contentLogFile string
logDir string
logDirMaxFiles int
logDirMaxAge time.Duration
contentLogDirMaxFiles int
contentLogDirMaxAge time.Duration
logLevel string
fileLogLevel string
fileLogLocalTimezone bool
jsonLogFile bool
jsonLogConsole bool
forceColor bool
disableColor bool
consoleLogTimestamps bool
logFile string
contentLogFile string
logDir string
logDirMaxFiles int
logDirMaxAge time.Duration
logDirMaxTotalSizeMB float64
contentLogDirMaxFiles int
contentLogDirMaxAge time.Duration
contentLogDirMaxTotalSizeMB float64
logFileMaxSegmentSize int
logLevel string
fileLogLevel string
fileLogLocalTimezone bool
jsonLogFile bool
jsonLogConsole bool
forceColor bool
disableColor bool
consoleLogTimestamps bool
waitForLogSweep bool
cliApp *cli.App
}
@@ -56,8 +61,12 @@ func (c *loggingFlags) setup(cliApp *cli.App, app *kingpin.Application) {
app.Flag("log-dir", "Directory where log files should be written.").Envar("KOPIA_LOG_DIR").Default(ospath.LogsDir()).StringVar(&c.logDir)
app.Flag("log-dir-max-files", "Maximum number of log files to retain").Envar("KOPIA_LOG_DIR_MAX_FILES").Default("1000").Hidden().IntVar(&c.logDirMaxFiles)
app.Flag("log-dir-max-age", "Maximum age of log files to retain").Envar("KOPIA_LOG_DIR_MAX_AGE").Hidden().Default("720h").DurationVar(&c.logDirMaxAge)
app.Flag("log-dir-max-total-size-mb", "Maximum total size of log files to retain").Envar("KOPIA_LOG_DIR_MAX_SIZE_MB").Hidden().Default("1000").Float64Var(&c.logDirMaxTotalSizeMB)
app.Flag("max-log-file-segment-size", "Maximum size of a single log file segment").Envar("KOPIA_LOG_FILE_MAX_SEGMENT_SIZE").Default("50000000").Hidden().IntVar(&c.logFileMaxSegmentSize)
app.Flag("wait-for-log-sweep", "Wait for log sweep before program exit").Default("true").Hidden().BoolVar(&c.waitForLogSweep)
app.Flag("content-log-dir-max-files", "Maximum number of content log files to retain").Envar("KOPIA_CONTENT_LOG_DIR_MAX_FILES").Default("5000").Hidden().IntVar(&c.contentLogDirMaxFiles)
app.Flag("content-log-dir-max-age", "Maximum age of content log files to retain").Envar("KOPIA_CONTENT_LOG_DIR_MAX_AGE").Default("720h").Hidden().DurationVar(&c.contentLogDirMaxAge)
app.Flag("content-log-dir-max-total-size-mb", "Maximum total size of log files to retain").Envar("KOPIA_CONTENT_LOG_DIR_MAX_SIZE_MB").Hidden().Default("1000").Float64Var(&c.contentLogDirMaxTotalSizeMB)
app.Flag("log-level", "Console log level").Default("info").EnumVar(&c.logLevel, logLevels...)
app.Flag("json-log-console", "JSON log file").Hidden().BoolVar(&c.jsonLogConsole)
app.Flag("json-log-file", "JSON log file").Hidden().BoolVar(&c.jsonLogFile)
@@ -177,7 +186,7 @@ func (c *loggingFlags) setupConsoleCore() zapcore.Core {
)
}
func (c *loggingFlags) setupLogFileBasedLogger(now time.Time, subdir, suffix, logFileOverride string, maxFiles int, maxAge time.Duration) zapcore.WriteSyncer {
func (c *loggingFlags) setupLogFileBasedLogger(now time.Time, subdir, suffix, logFileOverride string, maxFiles int, maxSizeMB float64, maxAge time.Duration) zapcore.WriteSyncer {
var logFileName, symlinkName string
if logFileOverride != "" {
@@ -202,16 +211,42 @@ func (c *loggingFlags) setupLogFileBasedLogger(now time.Time, subdir, suffix, lo
fmt.Fprintln(os.Stderr, "Unable to create logs directory:", err)
}
sweepLogWG := &sync.WaitGroup{}
doSweep := func() {}
// do not scrub directory if custom log file has been provided.
if logFileOverride == "" && shouldSweepLog(maxFiles, maxAge) {
go sweepLogDir(context.TODO(), logDir, maxFiles, maxAge)
doSweep = func() {
sweepLogDir(context.TODO(), logDir, maxFiles, maxSizeMB, maxAge)
}
}
return &onDemandFile{
odf := &onDemandFile{
logDir: logDir,
logFileBaseName: logFileBaseName,
symlinkName: symlinkName,
maxSegmentSize: c.logFileMaxSegmentSize,
startSweep: func() {
sweepLogWG.Add(1)
go func() {
defer sweepLogWG.Done()
doSweep()
}()
},
}
if c.waitForLogSweep {
// wait for log sweep at the end
c.cliApp.RegisterOnExit(odf.closeSegmentAndSweep)
c.cliApp.RegisterOnExit(sweepLogWG.Wait)
} else {
// old behavior: start log sweep in parallel to program but don't wait at the end.
odf.startSweep()
}
return odf
}
func (c *loggingFlags) setupLogFileCore(now time.Time, suffix string) zapcore.Core {
@@ -227,7 +262,7 @@ func (c *loggingFlags) setupLogFileCore(now time.Time, suffix string) zapcore.Co
EncodeDuration: zapcore.StringDurationEncoder,
ConsoleSeparator: " ",
}, c.jsonLogFile),
c.setupLogFileBasedLogger(now, "cli-logs", suffix, c.logFile, c.logDirMaxFiles, c.logDirMaxAge),
c.setupLogFileBasedLogger(now, "cli-logs", suffix, c.logFile, c.logDirMaxFiles, c.logDirMaxTotalSizeMB, c.logDirMaxAge),
logLevelFromFlag(c.fileLogLevel),
)
}
@@ -250,7 +285,7 @@ func (c *loggingFlags) setupContentLogFileBackend(now time.Time, suffix string)
EncodeDuration: zapcore.StringDurationEncoder,
ConsoleSeparator: " ",
}),
c.setupLogFileBasedLogger(now, "content-logs", suffix, c.contentLogFile, c.contentLogDirMaxFiles, c.contentLogDirMaxAge),
c.setupLogFileBasedLogger(now, "content-logs", suffix, c.contentLogFile, c.contentLogDirMaxFiles, c.contentLogDirMaxTotalSizeMB, c.contentLogDirMaxAge),
zap.DebugLevel)
}
@@ -258,7 +293,7 @@ func shouldSweepLog(maxFiles int, maxAge time.Duration) bool {
return maxFiles > 0 || maxAge > 0
}
func sweepLogDir(ctx context.Context, dirname string, maxCount int, maxAge time.Duration) {
func sweepLogDir(ctx context.Context, dirname string, maxCount int, maxSizeMB float64, maxAge time.Duration) {
var timeCutoff time.Time
if maxAge > 0 {
timeCutoff = clock.Now().Add(-maxAge)
@@ -268,6 +303,8 @@ func sweepLogDir(ctx context.Context, dirname string, maxCount int, maxAge time.
maxCount = math.MaxInt32
}
maxTotalSizeBytes := int64(maxSizeMB * 1e6)
entries, err := os.ReadDir(dirname)
if err != nil {
log(ctx).Errorf("unable to read log directory: %v", err)
@@ -296,6 +333,7 @@ func sweepLogDir(ctx context.Context, dirname string, maxCount int, maxAge time.
})
cnt := 0
totalSize := int64(0)
for _, fi := range fileInfos {
if !strings.HasPrefix(fi.Name(), logFileNamePrefix) {
@@ -308,7 +346,9 @@ func sweepLogDir(ctx context.Context, dirname string, maxCount int, maxAge time.
cnt++
if cnt > maxCount || fi.ModTime().Before(timeCutoff) {
totalSize += fi.Size()
if cnt > maxCount || totalSize > maxTotalSizeBytes || fi.ModTime().Before(timeCutoff) {
if err = os.Remove(filepath.Join(dirname, fi.Name())); err != nil && !os.IsNotExist(err) {
log(ctx).Errorf("unable to remove log file: %v", err)
}
@@ -332,13 +372,19 @@ func logLevelFromFlag(levelString string) zapcore.LevelEnabler {
}
type onDemandFile struct {
segmentCounter int // number of segments written
currentSegmentSize int // number of bytes written to current segment
maxSegmentSize int
currentSegmentFilename string
logDir string
logFileBaseName string
symlinkName string
f *os.File
startSweep func()
once sync.Once
mu sync.Mutex
f *os.File
}
func (w *onDemandFile) Sync() error {
@@ -350,28 +396,70 @@ func (w *onDemandFile) Sync() error {
return w.f.Sync()
}
func (w *onDemandFile) closeSegmentAndSweep() {
w.mu.Lock()
defer w.mu.Unlock()
w.closeSegmentAndSweepLocked()
}
func (w *onDemandFile) closeSegmentAndSweepLocked() {
if w.f != nil {
if err := w.f.Close(); err != nil {
fmt.Fprintf(os.Stderr, "warning: unable to close log segment: %v", err)
}
w.f = nil
}
w.startSweep()
}
func (w *onDemandFile) Write(b []byte) (int, error) {
w.once.Do(func() {
lf := filepath.Join(w.logDir, w.logFileBaseName)
w.mu.Lock()
defer w.mu.Unlock()
// close current file if we'd overflow on next write.
if w.f != nil && w.currentSegmentSize+len(b) > w.maxSegmentSize {
w.closeSegmentAndSweepLocked()
}
// open file if we don't have it yet
if w.f == nil {
var baseName, ext string
p := strings.LastIndex(w.logFileBaseName, ".")
if p < 0 {
ext = ""
baseName = w.logFileBaseName
} else {
ext = w.logFileBaseName[p:]
baseName = w.logFileBaseName[0:p]
}
w.currentSegmentFilename = fmt.Sprintf("%s.%d%s", baseName, w.segmentCounter, ext)
w.segmentCounter++
w.currentSegmentSize = 0
lf := filepath.Join(w.logDir, w.currentSegmentFilename)
f, err := os.Create(lf)
if err != nil {
fmt.Fprintf(os.Stderr, "unable to open log file: %v\n", err)
return
return 0, errors.Wrap(err, "unable to open log file")
}
w.f = f
if w.symlinkName != "" {
symlink := filepath.Join(w.logDir, w.symlinkName)
_ = os.Remove(symlink) // best-effort remove
_ = os.Symlink(w.logFileBaseName, symlink) // best-effort symlink
_ = os.Remove(symlink) // best-effort remove
_ = os.Symlink(w.currentSegmentFilename, symlink) // best-effort symlink
}
})
if w.f == nil {
return 0, nil
}
n, err := w.f.Write(b)
w.currentSegmentSize += n
// nolint:wrapcheck
return w.f.Write(b)
return n, err
}

View File

@@ -2,6 +2,7 @@
import (
"bufio"
"fmt"
"os"
"path/filepath"
"regexp"
@@ -14,6 +15,7 @@
"github.com/kopia/kopia/internal/clock"
"github.com/kopia/kopia/internal/logfile"
"github.com/kopia/kopia/internal/testutil"
"github.com/kopia/kopia/tests/testdirtree"
"github.com/kopia/kopia/tests/testenv"
)
@@ -95,6 +97,98 @@ func TestLoggingFlags(t *testing.T) {
require.Empty(t, stderr)
}
func TestLogFileRotation(t *testing.T) {
runner := testenv.NewInProcRunner(t)
runner.CustomizeApp = logfile.Attach
env := testenv.NewCLITest(t, testenv.RepoFormatNotImportant, runner)
env.RunAndExpectSuccess(t, "repo", "create", "filesystem", "--path", env.RepoDir)
dir1 := testutil.TempDirectory(t)
tmpLogDir := testutil.TempDirectory(t)
env.RunAndExpectSuccess(t, "snap", "create", dir1,
"--file-log-local-tz", "--log-level=error", "--file-log-level=debug",
"--max-log-file-segment-size=1000", "--log-dir", tmpLogDir, "--log-dir-max-files=3", "--content-log-dir-max-files=4")
// expected number of files per directory
subdirs := map[string]int{
"cli-logs": 3,
"content-logs": 4,
}
for subdir, wantEntryCount := range subdirs {
logSubdir := filepath.Join(tmpLogDir, subdir)
wantEntryCount := wantEntryCount
t.Run(subdir, func(t *testing.T) {
entries, err := os.ReadDir(logSubdir)
require.NoError(t, err)
var gotEntryCount int
for _, ent := range entries {
info, err := ent.Info()
require.NoError(t, err)
t.Logf("%v %v", info.Name(), info.Size())
if info.Mode().IsRegular() {
gotEntryCount++
}
require.LessOrEqual(t, info.Size(), int64(3000), info.Name())
}
require.Equal(t, wantEntryCount, gotEntryCount)
})
}
}
func TestLogFileMaxTotalSize(t *testing.T) {
runner := testenv.NewInProcRunner(t)
runner.CustomizeApp = logfile.Attach
env := testenv.NewCLITest(t, testenv.RepoFormatNotImportant, runner)
env.RunAndExpectSuccess(t, "repo", "create", "filesystem", "--path", env.RepoDir)
srcDir := testutil.TempDirectory(t)
tmpLogDir := testutil.TempDirectory(t)
// 5-level directory with <=10 files and <=10 subdirectories at each level
testdirtree.CreateDirectoryTree(srcDir, testdirtree.MaybeSimplifyFilesystem(testdirtree.DirectoryTreeOptions{
Depth: 3,
MaxSubdirsPerDirectory: 10,
MaxFilesPerDirectory: 100,
MaxFileSize: 10,
}), &testdirtree.DirectoryTreeCounters{})
env.RunAndExpectSuccess(t, "snap", "create", srcDir,
"--file-log-local-tz", "--log-level=error", "--file-log-level=debug",
"--max-log-file-segment-size=1000", "--log-dir", tmpLogDir)
subdirFlags := map[string]string{
"cli-logs": "--log-dir-max-total-size-mb",
"content-logs": "--content-log-dir-max-total-size-mb",
}
for subdir, flag := range subdirFlags {
logSubdir := filepath.Join(tmpLogDir, subdir)
flag := flag
t.Run(subdir, func(t *testing.T) {
env.RunAndExpectSuccess(t, "snap", "ls", "--file-log-level=debug", "--log-dir", tmpLogDir, flag+"=40000")
size1 := getTotalDirSize(t, logSubdir)
size1MB := float64(size1) / 1e6
env.RunAndExpectSuccess(t, "snap", "ls", "--file-log-level=debug", "--log-dir", tmpLogDir, fmt.Sprintf("%s=%v", flag, size1MB/2))
size2 := getTotalDirSize(t, logSubdir)
require.Less(t, size2, size1/2)
require.Greater(t, size2, size1/4)
})
}
}
func verifyFileLogFormat(t *testing.T, fname string, re *regexp.Regexp) {
t.Helper()
@@ -115,3 +209,25 @@ func isUTC() bool {
return offset == 0
}
func getTotalDirSize(t *testing.T, dir string) int {
t.Helper()
entries, err := os.ReadDir(dir)
require.NoError(t, err)
var totalSize int
for _, ent := range entries {
info, err := ent.Info()
require.NoError(t, err)
t.Logf("%v %v", info.Name(), info.Size())
if info.Mode().IsRegular() {
totalSize += int(info.Size())
}
}
return totalSize
}