feat(server): improve scheduler algorithm to run missed snapshots (#3323)

* Improve RunMissed algorithm to work better with Cron and to give more predictable results for time-of-day rules

* Add a RunMissed test for multiple times-of-day

* add variable to improve code-readability

* Fix test after rebase
This commit is contained in:
PhracturedBlue
2023-10-04 19:31:28 -07:00
committed by GitHub
parent 45c1f0950f
commit 66027721e0
3 changed files with 83 additions and 15 deletions

View File

@@ -23,7 +23,7 @@ func (c *policySchedulingFlags) setup(cmd *kingpin.CmdClause) {
cmd.Flag("snapshot-interval", "Interval between snapshots").DurationListVar(&c.policySetInterval)
cmd.Flag("snapshot-time", "Comma-separated times of day when to take snapshot (HH:mm,HH:mm,...) or 'inherit' to remove override").StringsVar(&c.policySetTimesOfDay)
cmd.Flag("snapshot-time-crontab", "Semicolon-separated crontab-compatible expressions (or 'inherit')").StringVar(&c.policySetCron)
cmd.Flag("run-missed", "Run missed time-of-day snapshots ('true', 'false', 'inherit')").EnumVar(&c.policySetRunMissed, booleanEnumValues...)
cmd.Flag("run-missed", "Run missed time-of-day or cron snapshots ('true', 'false', 'inherit')").EnumVar(&c.policySetRunMissed, booleanEnumValues...)
cmd.Flag("manual", "Only create snapshots manually").BoolVar(&c.policySetManual)
}

View File

@@ -94,8 +94,6 @@ func (p *SchedulingPolicy) NextSnapshotTime(previousSnapshotTime, now time.Time)
return time.Time{}, false
}
const oneDay = 24 * time.Hour
var (
nextSnapshotTime time.Time
ok bool
@@ -117,8 +115,35 @@ func (p *SchedulingPolicy) NextSnapshotTime(previousSnapshotTime, now time.Time)
}
}
if todSnapshot, todOk := p.getNextTimeOfDaySnapshot(now); todOk && (!ok || todSnapshot.Before(nextSnapshotTime)) {
nextSnapshotTime = todSnapshot
ok = true
}
if cronSnapshot, cronOk := p.getNextCronSnapshot(now); cronOk && (!ok || cronSnapshot.Before(nextSnapshotTime)) {
nextSnapshotTime = cronSnapshot
ok = true
}
if ok && p.checkMissedSnapshot(now, previousSnapshotTime, nextSnapshotTime) {
// if RunMissed is set and last run was missed, and next run is at least 30 mins from now, then run now
nextSnapshotTime = now
ok = true
}
return nextSnapshotTime, ok
}
// Get next ToD snapshot.
func (p *SchedulingPolicy) getNextTimeOfDaySnapshot(now time.Time) (time.Time, bool) {
const oneDay = 24 * time.Hour
var nextSnapshotTime time.Time
ok := false
nowLocalTime := now.Local()
for _, tod := range p.TimesOfDay {
nowLocalTime := now.Local()
localSnapshotTime := time.Date(nowLocalTime.Year(), nowLocalTime.Month(), nowLocalTime.Day(), tod.Hour, tod.Minute, 0, 0, time.Local)
if now.After(localSnapshotTime) {
@@ -131,6 +156,15 @@ func (p *SchedulingPolicy) NextSnapshotTime(previousSnapshotTime, now time.Time)
}
}
return nextSnapshotTime, ok
}
// Get next Cron snapshot.
func (p *SchedulingPolicy) getNextCronSnapshot(now time.Time) (time.Time, bool) {
var nextSnapshotTime time.Time
ok := false
for _, e := range p.Cron {
ce, err := cronexpr.Parse(stripCronComment(e))
if err != nil {
@@ -150,26 +184,37 @@ func (p *SchedulingPolicy) NextSnapshotTime(previousSnapshotTime, now time.Time)
}
}
if ok && p.checkMissedSnapshot(now, previousSnapshotTime, nextSnapshotTime) {
// if RunMissed is set and last run was missed, and next run is at least 30 mins from now, then run now
nextSnapshotTime = now
ok = true
}
return nextSnapshotTime, ok
}
// Check if a previous snapshot was missed and should be started now.
func (p *SchedulingPolicy) checkMissedSnapshot(now, previousSnapshotTime, nextSnapshotTime time.Time) bool {
const oneDay = 24 * time.Hour
const halfhour = 30 * time.Minute
momentAfterSnapshot := previousSnapshotTime.Add(time.Second)
if !p.RunMissed.OrDefault(false) {
return false
}
return (len(p.TimesOfDay) > 0 || len(p.Cron) > 0) && previousSnapshotTime.Add(oneDay-halfhour).Before(now) && nextSnapshotTime.After(now.Add(halfhour))
nextSnapshot := nextSnapshotTime
// We add a second to ensure that the next possible snapshot is > the last snaphot
todSnapshot, todOk := p.getNextTimeOfDaySnapshot(momentAfterSnapshot)
cronSnapshot, cronOk := p.getNextCronSnapshot(momentAfterSnapshot)
if !todOk && !cronOk {
return false
}
if todOk && todSnapshot.Before(nextSnapshot) {
nextSnapshot = todSnapshot
}
if cronOk && cronSnapshot.Before(nextSnapshot) {
nextSnapshot = cronSnapshot
}
return nextSnapshot.Before(now) && nextSnapshotTime.After(now.Add(halfhour))
}
// Merge applies default values from the provided policy.

View File

@@ -233,6 +233,17 @@ func TestNextSnapshotTime(t *testing.T) {
wantTime: time.Date(2020, time.January, 3, 11, 55, 0, 0, time.Local),
wantOK: true,
},
{
name: "Run immediately because one of the TimeOfDays was missed",
pol: policy.SchedulingPolicy{
TimesOfDay: []policy.TimeOfDay{{11, 1}, {4, 1}},
RunMissed: policy.NewOptionalBool(true),
},
now: time.Date(2020, time.January, 2, 10, 0, 0, 0, time.Local),
previousSnapshotTime: time.Date(2020, time.January, 1, 11, 1, 0, 0, time.Local),
wantTime: time.Date(2020, time.January, 2, 10, 0, 0, 0, time.Local),
wantOK: true,
},
{
name: "Don't run immediately even though RunMissed is set because last run was not missed",
pol: policy.SchedulingPolicy{
@@ -251,8 +262,20 @@ func TestNextSnapshotTime(t *testing.T) {
RunMissed: policy.NewOptionalBool(true),
},
now: time.Date(2020, time.January, 2, 11, 0, 0, 0, time.Local),
previousSnapshotTime: time.Date(2020, time.January, 2, 10, 0, 0, 0, time.Local),
wantTime: time.Date(2020, time.January, 3, 10, 0, 0, 0, time.Local),
previousSnapshotTime: time.Date(2020, time.January, 1, 11, 55, 0, 0, time.Local),
wantTime: time.Date(2020, time.January, 2, 11, 0, 0, 0, time.Local),
wantOK: true,
},
{
name: "Run immediately because Cron was missed",
pol: policy.SchedulingPolicy{
TimesOfDay: []policy.TimeOfDay{{11, 55}},
Cron: []string{"0 * * * *"}, // Every hour
RunMissed: policy.NewOptionalBool(true),
},
now: time.Date(2020, time.January, 2, 11, 0, 0, 0, time.Local),
previousSnapshotTime: time.Date(2020, time.January, 1, 11, 55, 0, 0, time.Local),
wantTime: time.Date(2020, time.January, 2, 11, 0, 0, 0, time.Local),
wantOK: true,
},
}