From c76113ac754d64e7cf5fec475e3613d24f7ac28f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Claus=20Lensb=C3=B8l?= Date: Wed, 1 Apr 2026 17:20:03 -0400 Subject: [PATCH] wgengine/magicsock: send out disco keys over TSMP periodically (#19212) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of sending out disco keys via TSMP once, send them out in intervals of 60+ seconds. The trigger is still callmemaaybe and the keys will not be send if no direct connection needs to be established. This fixes a case where a node can have stale keys but have communicated with the other peer before, leading to an infinite DERP state. Updates #12639 Signed-off-by: Claus Lensbøl --- wgengine/magicsock/endpoint.go | 7 ++++- wgengine/magicsock/magicsock.go | 6 ++-- wgengine/magicsock/magicsock_test.go | 44 ++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index f322ebaeb..b8d3b96be 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -40,6 +40,11 @@ var mtuProbePingSizesV4 []int var mtuProbePingSizesV6 []int +// discoKeyAdvertisementInterval tells how often a disco update via TSMP can +// happen. The update is triggered via enqueueCallMeMaybe, and thus it will +// only be sent if the magicsock is in a state to send out CallMeMaybe. +const discoKeyAdvertisementInterval = time.Second * 60 + func init() { for _, m := range tstun.WireMTUsToProbe { mtuProbePingSizesV4 = append(mtuProbePingSizesV4, pktLenToPingSize(m, false)) @@ -80,7 +85,7 @@ type endpoint struct { lastSendAny mono.Time // last time there were outgoing packets sent this peer from any trigger, internal or external to magicsock lastFullPing mono.Time // last time we pinged all disco or wireguard only endpoints lastUDPRelayPathDiscovery mono.Time // last time we ran UDP relay path discovery - sentDiscoKeyAdvertisement bool // whether we sent a TSMPDiscoAdvertisement or not to this endpoint + lastDiscoKeyAdvertisement mono.Time // last time we sent a TSMPDiscoAdvertisement or not to this endpoint derpAddr netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients) bestAddr addrQuality // best non-DERP path; zero if none; mutate via setBestAddrLocked() diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 5c4a385f7..5938a3096 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -1220,7 +1220,7 @@ func (c *Conn) RotateDiscoKey() { connCtx := c.connCtx for _, endpoint := range c.peerMap.byEpAddr { endpoint.ep.mu.Lock() - endpoint.ep.sentDiscoKeyAdvertisement = false + endpoint.ep.lastDiscoKeyAdvertisement = 0 endpoint.ep.mu.Unlock() } c.mu.Unlock() @@ -4335,8 +4335,8 @@ func (c *Conn) maybeSendTSMPDiscoAdvert(de *endpoint) { de.mu.Lock() defer de.mu.Unlock() - if !de.sentDiscoKeyAdvertisement { - de.sentDiscoKeyAdvertisement = true + if mono.Now().Sub(de.lastDiscoKeyAdvertisement) > discoKeyAdvertisementInterval { + de.lastDiscoKeyAdvertisement = mono.Now() c.tsmpDiscoKeyAvailablePub.Publish(NewDiscoKeyAvailable{ NodeFirstAddr: de.nodeAddr, NodeID: de.nodeID, diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index f05ee2c6c..10218dfb9 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -4409,3 +4409,47 @@ func TestReceiveTSMPDiscoKeyAdvertisement(t *testing.T) { t.Errorf("New disco key %s, does not match %s", newDiscoKey.ShortString(), ep.disco.Load().short) } } + +func TestSendingTSMPDiscoTimer(t *testing.T) { + t.Setenv("TS_USE_CACHED_NETMAP", "1") + conn := newTestConn(t) + tw := eventbustest.NewWatcher(t, conn.eventBus) + t.Cleanup(func() { conn.Close() }) + + peerKey := key.NewNode().Public() + ep := &endpoint{ + nodeID: 1, + publicKey: peerKey, + nodeAddr: netip.MustParseAddr("100.64.0.1"), + } + discoKey := key.NewDisco().Public() + ep.disco.Store(&endpointDisco{ + key: discoKey, + short: discoKey.ShortString(), + }) + ep.c = conn + conn.mu.Lock() + nodeView := (&tailcfg.Node{ + Key: ep.publicKey, + Addresses: []netip.Prefix{ + netip.MustParsePrefix("100.64.0.1/32"), + }, + }).View() + conn.peers = views.SliceOf([]tailcfg.NodeView{nodeView}) + conn.mu.Unlock() + + conn.peerMap.upsertEndpoint(ep, key.DiscoPublic{}) + + if ep.discoShort() != discoKey.ShortString() { + t.Errorf("Original disco key %s, does not match %s", discoKey.ShortString(), ep.discoShort()) + } + + conn.maybeSendTSMPDiscoAdvert(ep) + conn.maybeSendTSMPDiscoAdvert(ep) + eventbustest.ExpectExactly(tw, eventbustest.Type[NewDiscoKeyAvailable]()) + ep.mu.Lock() + ep.lastDiscoKeyAdvertisement = 0 + ep.mu.Unlock() + conn.maybeSendTSMPDiscoAdvert(ep) + eventbustest.Expect(tw, eventbustest.Type[NewDiscoKeyAvailable]()) +}