mirror of
https://github.com/kopia/kopia.git
synced 2026-03-25 17:41:11 -04:00
This is a breaking change to users who might be using Kopia as a library.
### Log Format
```json
{"t":"<timestamp-rfc-3389-microseconds>", "span:T1":"V1", "span:T2":"V2", "n":"<source>", "m":"<message>", /*parameters*/}
```
Where each record is associated with one or more spans that describe its scope:
* `"span:client": "<hash-of-username@hostname>"`
* `"span:repo": "<random>"` - random identifier of a repository connection (from `repo.Open`)
* `"span:maintenance": "<random>"` - random identifier of a maintenance session
* `"span:upload": "<hash-of-username@host:/path>"` - uniquely identifies upload session of a given directory
* `"span:checkpoint": "<random>"` - encapsulates each checkpoint operation during Upload
* `"span:server-session": "<random>"` -single client connection to the server
* `"span:flush": "<random>"` - encapsulates each Flush session
* `"span:maintenance": "<random>"` - encapsulates each maintenance operation
* `"span:loadIndex" : "<random>"` - encapsulates index loading operation
* `"span:emr" : "<random>"` - encapsulates epoch manager refresh
* `"span:writePack": "<pack-blob-ID>"` - encapsulates pack blob preparation and writing
(plus additional minor spans for various phases of the maintenance).
Notable points:
- Used internal zero allocation JSON writer for reduced memory usage.
- renamed `--disable-internal-log` to `--disable-repository-log` (controls saving blobs to repository)
- added `--disable-content-log` (controls writing of `content-log` files)
- all storage operations are also logged in a structural way and associated with the corresponding spans.
- all content IDs are logged in a truncated format (since first N bytes that are usually enough to be unique) to improve compressibility of logs (blob IDs are frequently repeated but content IDs usually appear just once).
This format should make it possible to recreate the journey of any single content throughout pack blobs, indexes and compaction events.
193 lines
4.5 KiB
Go
193 lines
4.5 KiB
Go
package server
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/pkg/errors"
|
|
|
|
"github.com/kopia/kopia/internal/clock"
|
|
"github.com/kopia/kopia/notification"
|
|
"github.com/kopia/kopia/notification/notifydata"
|
|
"github.com/kopia/kopia/notification/notifytemplate"
|
|
"github.com/kopia/kopia/repo"
|
|
"github.com/kopia/kopia/repo/maintenance"
|
|
)
|
|
|
|
type srvMaintenance struct {
|
|
triggerChan chan struct{}
|
|
closed chan struct{}
|
|
cancelCtx context.CancelFunc
|
|
srv maintenanceManagerServerInterface
|
|
wg sync.WaitGroup
|
|
dr repo.DirectRepository
|
|
|
|
minMaintenanceInterval time.Duration // +checklocksignore
|
|
|
|
mu sync.Mutex
|
|
//+checklocks:mu
|
|
cachedNextMaintenanceTime time.Time
|
|
//+checklocks:mu
|
|
nextMaintenanceNoEarlierThan time.Time
|
|
}
|
|
|
|
type maintenanceManagerServerInterface interface {
|
|
runMaintenanceTask(ctx context.Context, dr repo.DirectRepository) error
|
|
refreshScheduler(reason string)
|
|
enableErrorNotifications() bool
|
|
notificationTemplateOptions() notifytemplate.Options
|
|
}
|
|
|
|
func (s *srvMaintenance) trigger() {
|
|
s.beforeRun()
|
|
|
|
select {
|
|
case s.triggerChan <- struct{}{}:
|
|
default:
|
|
}
|
|
}
|
|
|
|
func (s *srvMaintenance) stop(ctx context.Context) {
|
|
// cancel context for any running maintenance
|
|
s.cancelCtx()
|
|
|
|
// stop the goroutine and wait for it
|
|
close(s.closed)
|
|
s.wg.Wait()
|
|
|
|
userLog(ctx).Debug("maintenance manager stopped")
|
|
}
|
|
|
|
func (s *srvMaintenance) beforeRun() {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
// make sure we're not scheduling next maintenance until we refresh
|
|
s.cachedNextMaintenanceTime = time.Time{}
|
|
}
|
|
|
|
func (s *srvMaintenance) afterFailedRun() {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
// make sure we don't run maintenance too often
|
|
s.nextMaintenanceNoEarlierThan = clock.Now().Add(s.minMaintenanceInterval)
|
|
}
|
|
|
|
func (s *srvMaintenance) refresh(ctx context.Context, notify bool) {
|
|
if notify {
|
|
defer s.srv.refreshScheduler("maintenance schedule changed")
|
|
}
|
|
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
if err := s.refreshLocked(ctx); err != nil {
|
|
userLog(ctx).Debugw("unable to refresh maintenance manager", "err", err)
|
|
}
|
|
}
|
|
|
|
func (s *srvMaintenance) refreshLocked(ctx context.Context) error {
|
|
nmt, err := maintenance.TimeToAttemptNextMaintenance(ctx, s.dr)
|
|
if err != nil {
|
|
return errors.Wrap(err, "unable to get next maintenance time")
|
|
}
|
|
|
|
if nmt.Before(s.nextMaintenanceNoEarlierThan) {
|
|
nmt = s.nextMaintenanceNoEarlierThan
|
|
}
|
|
|
|
s.cachedNextMaintenanceTime = nmt
|
|
|
|
return nil
|
|
}
|
|
|
|
func (s *srvMaintenance) nextMaintenanceTime() time.Time {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
return s.cachedNextMaintenanceTime
|
|
}
|
|
|
|
// Starts a periodic, background srvMaintenance task. Returns nil if no task was created.
|
|
func maybeStartMaintenanceManager(
|
|
ctx context.Context,
|
|
rep repo.Repository,
|
|
srv maintenanceManagerServerInterface,
|
|
minMaintenanceInterval time.Duration,
|
|
) *srvMaintenance {
|
|
// Check whether maintenance can be run and avoid unnecessarily starting a task that
|
|
// would fail later. Don't start a task when the repo is either:
|
|
// - not direct; or
|
|
// - read only.
|
|
// Note: the repo owner is not checked here since the repo owner can be externally
|
|
// changed while the server is running. The server would pick up the new owner
|
|
// the next time a maintenance task executes.
|
|
dr, ok := rep.(repo.DirectRepository)
|
|
if !ok {
|
|
return nil
|
|
}
|
|
|
|
if rep.ClientOptions().ReadOnly {
|
|
userLog(ctx).Warnln("the repository connection is read-only, maintenance tasks will not be performed on this repository")
|
|
|
|
return nil
|
|
}
|
|
|
|
mctx, cancel := context.WithCancel(ctx)
|
|
|
|
m := srvMaintenance{
|
|
triggerChan: make(chan struct{}, 1),
|
|
closed: make(chan struct{}),
|
|
srv: srv,
|
|
cancelCtx: cancel,
|
|
minMaintenanceInterval: minMaintenanceInterval,
|
|
dr: dr,
|
|
}
|
|
|
|
m.wg.Add(1)
|
|
|
|
userLog(ctx).Debug("starting maintenance manager")
|
|
|
|
m.refresh(ctx, false)
|
|
|
|
go func() {
|
|
defer m.wg.Done()
|
|
|
|
for {
|
|
select {
|
|
case <-m.triggerChan:
|
|
userLog(ctx).Debug("starting maintenance task")
|
|
|
|
m.beforeRun()
|
|
|
|
t0 := clock.Now()
|
|
|
|
if err := srv.runMaintenanceTask(mctx, dr); err != nil {
|
|
userLog(ctx).Debugw("maintenance task failed", "err", err)
|
|
m.afterFailedRun()
|
|
|
|
if srv.enableErrorNotifications() {
|
|
notification.Send(ctx,
|
|
rep,
|
|
"generic-error",
|
|
notifydata.NewErrorInfo("Maintenance", "Scheduled Maintenance", t0, clock.Now(), err),
|
|
notification.SeverityError,
|
|
srv.notificationTemplateOptions(),
|
|
)
|
|
}
|
|
}
|
|
|
|
m.refresh(mctx, true)
|
|
|
|
case <-m.closed:
|
|
userLog(ctx).Debug("stopping maintenance manager")
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
return &m
|
|
}
|