mirror of
https://github.com/tailscale/tailscale.git
synced 2026-06-24 07:52:47 -04:00
Previously, testwrapper only retried tests explicitly annotated with flakytest.Mark. Authors don't pre-emptively mark tests that haven't flaked yet, so the first flake of a brand-new test failed CI even when a re-run would have passed. testwrapper now retries every failing test within a per-test wall-clock budget (default: 5 minute per-attempt timeout capped at 1.5x the first failure duration, 10 minute total). A test that fails and then passes on retry is reported as flaky; a test that never passes within the budget remains a real failure (exit non-zero). For flakeapp's existing log scraping, the wire format is preserved: the "flakytest failures JSON:" line is now emitted only for tests that ultimately flaked (passed on retry). Unmarked tests get a fake issue URL of the form https://github.com/{owner}/{repo}/issues/UNKNOWN where owner/repo is detected from GITHUB_REPOSITORY, the local git remote, or falls back to tailscale/tailscale. A new "permanent test failures JSON:" line is emitted for tests that never passed; flakeapp ignores it for now (a follow-up can teach it to record real failures separately). flakytest.Mark stays as an opt-in API: still useful for tracking a known-flaky test against a real issue and for TS_SKIP_FLAKY_TESTS. Updates tailscale/corp#38960 Change-Id: I56dfc9b023486d239f60793a53e9690578ce8017 Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
102 lines
3.2 KiB
Go
102 lines
3.2 KiB
Go
// Copyright (c) Tailscale Inc & contributors
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
// Package flakytest contains test helpers for marking a test as flaky.
|
|
//
|
|
// Marking a test with [Mark] is not required for cmd/testwrapper to retry
|
|
// failed tests; the wrapper retries any failure within a per-test time
|
|
// budget and reports a test as flaky if it ever passes on retry. Mark is
|
|
// useful for tracking a known-flaky test against a GitHub issue and for the
|
|
// TS_SKIP_FLAKY_TESTS skip behavior used to keep CI green when a flake is
|
|
// being investigated.
|
|
package flakytest
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path"
|
|
"regexp"
|
|
"strconv"
|
|
"sync"
|
|
"testing"
|
|
|
|
"tailscale.com/util/mak"
|
|
)
|
|
|
|
// FlakyTestLogMessage is a sentinel value that is printed to stderr when a
|
|
// flaky test is marked. This is used by cmd/testwrapper to detect flaky tests
|
|
// and retry them.
|
|
const FlakyTestLogMessage = "flakytest: this is a known flaky test"
|
|
|
|
// FlakeAttemptEnv is an environment variable that is set by cmd/testwrapper
|
|
// when a flaky test is being (re)tried. It contains the attempt number,
|
|
// starting at 1.
|
|
const FlakeAttemptEnv = "TS_TESTWRAPPER_ATTEMPT"
|
|
|
|
var issueRegexp = regexp.MustCompile(`\Ahttps://github\.com/[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+/issues/\d+\z`)
|
|
|
|
var (
|
|
rootFlakesMu sync.Mutex
|
|
rootFlakes map[string]bool
|
|
)
|
|
|
|
// Mark sets the current test as a flaky test, such that if it fails, it will
|
|
// be retried a few times on failure. issue must be a GitHub issue that tracks
|
|
// the status of the flaky test being marked, of the format:
|
|
//
|
|
// https://github.com/tailscale/myRepo-H3re/issues/12345
|
|
func Mark(t testing.TB, issue string) {
|
|
if !issueRegexp.MatchString(issue) {
|
|
t.Fatalf("bad issue format: %q", issue)
|
|
}
|
|
if _, ok := os.LookupEnv(FlakeAttemptEnv); ok {
|
|
// We're being run under cmd/testwrapper so send our sentinel message
|
|
// to stderr. (We avoid doing this when the env is absent to avoid
|
|
// spamming people running tests without the wrapper)
|
|
fmt.Fprintf(os.Stderr, "%s: %s\n", FlakyTestLogMessage, issue)
|
|
}
|
|
t.Attr("flaky-test-issue-url", issue)
|
|
|
|
// The Attr method above also emits human-readable output, so this t.Logf
|
|
// is somewhat redundant, but we keep it for compatibility with
|
|
// old test runs, so cmd/testwrapper doesn't need to be modified.
|
|
// TODO(bradfitz): switch testwrapper to look for Action "attr"
|
|
// instead:
|
|
// "Action":"attr","Package":"tailscale.com/cmd/testwrapper/flakytest","Test":"TestMarked_Root","Key":"flaky-test-issue-url","Value":"https://github.com/tailscale/tailscale/issues/0"}
|
|
// And then remove this Logf a month or so after that.
|
|
t.Logf("flakytest: issue tracking this flaky test: %s", issue)
|
|
|
|
if boolEnv("TS_SKIP_FLAKY_TESTS") {
|
|
t.Skipf("skipping due to TS_SKIP_FLAKY_TESTS")
|
|
}
|
|
|
|
// Record the root test name as flakey.
|
|
rootFlakesMu.Lock()
|
|
defer rootFlakesMu.Unlock()
|
|
mak.Set(&rootFlakes, t.Name(), true)
|
|
}
|
|
|
|
// Marked reports whether the current test or one of its parents was marked flaky.
|
|
func Marked(t testing.TB) bool {
|
|
n := t.Name()
|
|
for {
|
|
if rootFlakes[n] {
|
|
return true
|
|
}
|
|
n = path.Dir(n)
|
|
if n == "." || n == "/" {
|
|
break
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func boolEnv(k string) bool {
|
|
s := os.Getenv(k)
|
|
if s == "" {
|
|
return false
|
|
}
|
|
v, _ := strconv.ParseBool(s)
|
|
return v
|
|
}
|