From 3cc7b8530c7fd0dd069ebb5ec9863a8cf1e0deee Mon Sep 17 00:00:00 2001 From: Mike O'Driscoll Date: Fri, 6 Mar 2026 09:54:25 -0500 Subject: [PATCH] prober: fix queuing delay probe txRecords overflow under high DERP server load (#18803) The txRecords buffer had two compounding bugs that caused the overflow guard to fire on every send tick under high DERP server load, spamming logs at the full send rate (e.g. 100x/second). First, int(packetTimeout.Seconds()) truncates fractional-second timeouts, under-allocating the buffer. Second, the capacity was sized to exactly the theoretical maximum number of in-flight records with no headroom, and the expiry check used strict > rather than >=, so records at exactly the timeout boundary were never evicted by applyTimeouts, leaving len==cap on the very next tick. Fixes tailscale/corp#37696 Signed-off-by: Mike O'Driscoll --- prober/derp.go | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/prober/derp.go b/prober/derp.go index 73ea02cf5..dadda6fce 100644 --- a/prober/derp.go +++ b/prober/derp.go @@ -17,6 +17,7 @@ "io" "log" "maps" + "math" "net" "net/http" "net/netip" @@ -423,7 +424,7 @@ type txRecord struct { // for packets up to their timeout. As records age out of the front of this // list, if the associated packet arrives, we won't have a txRecord for it // and will consider it to have timed out. - txRecords := make([]txRecord, 0, packetsPerSecond*int(packetTimeout.Seconds())) + txRecords := make([]txRecord, 0, int(math.Ceil(float64(packetsPerSecond)*packetTimeout.Seconds()))+1) var txRecordsMu sync.Mutex // applyTimeouts walks over txRecords and expires any records that are older @@ -435,7 +436,7 @@ type txRecord struct { now := time.Now() recs := txRecords[:0] for _, r := range txRecords { - if now.Sub(r.at) > packetTimeout { + if now.Sub(r.at) >= packetTimeout { packetsDropped.Add(1) } else { recs = append(recs, r) @@ -451,9 +452,7 @@ type txRecord struct { pkt := make([]byte, 260) // the same size as a CallMeMaybe packet observed on a Tailscale client. crand.Read(pkt) - wg.Add(1) - go func() { - defer wg.Done() + wg.Go(func() { t := time.NewTicker(time.Second / time.Duration(packetsPerSecond)) defer t.Stop() @@ -481,13 +480,11 @@ type txRecord struct { } } } - }() + }) // Receive the packets. recvFinishedC := make(chan error, 1) - wg.Add(1) - go func() { - defer wg.Done() + wg.Go(func() { defer close(recvFinishedC) // to break out of 'select' below. fromDERPPubKey := fromc.SelfPublicKey() for { @@ -531,7 +528,7 @@ type txRecord struct { // Loop. } } - }() + }) select { case <-ctx.Done():