From be2f554dd3dc5d1943394c373ff6e6c61be006b3 Mon Sep 17 00:00:00 2001 From: Jordan Whited Date: Wed, 17 Jun 2026 18:01:25 -0700 Subject: [PATCH] control/controlknobs,wgengine/magicsock: disable TSMP disco advert if netmap caching is disabled Updates #20081 Signed-off-by: Jordan Whited --- control/controlknobs/controlknobs.go | 7 +++++ wgengine/magicsock/magicsock.go | 17 +++++++++-- wgengine/magicsock/magicsock_test.go | 44 ++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 2 deletions(-) diff --git a/control/controlknobs/controlknobs.go b/control/controlknobs/controlknobs.go index 93c10f26e..2f9b5143d 100644 --- a/control/controlknobs/controlknobs.go +++ b/control/controlknobs/controlknobs.go @@ -135,6 +135,11 @@ type Knobs struct { // underlay UDP packet TX path on Linux. Applies to magicsock and peer relay // UDP sockets. See [tailcfg.NodeAttrNeverGSOEqualTail]. NeverGSOEqualTail atomic.Bool + + // CacheNetworkMaps is whether the node should persistently cache network + // maps and use them to establish peer connectivity on start, if doing so + // is supported by the client and storage is available. + CacheNetworkMaps atomic.Bool } // UpdateFromNodeAttributes updates k (if non-nil) based on the provided self @@ -170,6 +175,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) { disableTUNUDPGRO = has(tailcfg.NodeAttrDisableTUNUDPGRO) disableTUNTCPGRO = has(tailcfg.NodeAttrDisableTUNTCPGRO) neverGSOEqualTail = has(tailcfg.NodeAttrNeverGSOEqualTail) + cacheNetworkMaps = has(tailcfg.NodeAttrCacheNetworkMaps) ) if has(tailcfg.NodeAttrOneCGNATEnable) { @@ -203,6 +209,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) { k.DisableTUNUDPGRO.Store(disableTUNUDPGRO) k.DisableTUNTCPGRO.Store(disableTUNTCPGRO) k.NeverGSOEqualTail.Store(neverGSOEqualTail) + k.CacheNetworkMaps.Store(cacheNetworkMaps) } // AsDebugJSON returns k as something that can be marshalled with json.Marshal diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 398803faf..d5c85d1f6 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -4526,8 +4526,10 @@ type NewDiscoKeyAvailable struct { // maybeSendTSMPDiscoAdvert conditionally emits an event indicating that we // should send our DiscoKey to the first node address of the magicksock endpoint. -// The event is only emitted if we are not already communicating directly and -// more than 60 seconds has passed since the last DiscoKey was sent. +// +// The event is suppressed if we are already communicating directly or +// less than 60 seconds has passed since the last DiscoKey was sent, or netmap +// caching is disabled on this node. // // We do not need the Conn to be locked, but the endpoint should be. func (c *Conn) maybeSendTSMPDiscoAdvert(de *endpoint) { @@ -4535,6 +4537,17 @@ func (c *Conn) maybeSendTSMPDiscoAdvert(de *endpoint) { return } + // Disable TSMP disco advert by default, unless network map caching is + // enabled for the local node. Caching network maps on the remote node is + // what really matters in terms of handling a TSMP disco advert and applying + // it in a useful way, but the TSMP disco advert implementation as it exists + // here has pathological behaviors. Therefore, it should be disabled for + // almost all tailnets, and we lean on the network map caching control knob + // for this purpose. See #20081. + if c.controlKnobs == nil || !c.controlKnobs.CacheNetworkMaps.Load() { + return + } + de.mu.Lock() defer de.mu.Unlock() diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index af8f6dea1..6d617398e 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -4515,6 +4515,10 @@ func TestSendingTSMPDiscoTimer(t *testing.T) { tw := eventbustest.NewWatcher(t, conn.eventBus) t.Cleanup(func() { conn.Close() }) + // maybeSendTSMPDiscoAdvert only advertises when netmap caching is enabled. + conn.controlKnobs = new(controlknobs.Knobs) + conn.controlKnobs.CacheNetworkMaps.Store(true) + peerKey := key.NewNode().Public() ep := &endpoint{ nodeID: 1, @@ -4576,3 +4580,43 @@ func TestSendingTSMPDiscoTimer(t *testing.T) { t.Errorf("expected only one event, got: %s", err) } } + +// TestSendingTSMPDiscoCachingDisabled verifies that maybeSendTSMPDiscoAdvert +// early-returns (sends no advert) when netmap caching is not enabled via the +// CacheNetworkMaps control knob, including when no knobs are present at all. +func TestSendingTSMPDiscoCachingDisabled(t *testing.T) { + tests := []struct { + name string + knobs *controlknobs.Knobs + }{ + {name: "no-knobs", knobs: nil}, + // Knobs present but CacheNetworkMaps left at its false default. + {name: "caching-disabled", knobs: new(controlknobs.Knobs)}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + conn := newTestConn(t) + t.Cleanup(func() { conn.Close() }) + conn.controlKnobs = tt.knobs + + ep := &endpoint{ + nodeID: 1, + publicKey: key.NewNode().Public(), + nodeAddr: netip.MustParseAddr("100.64.0.1"), + } + ep.c = conn + + // A fresh endpoint with a zero lastDiscoKeyAdvertisement and no + // direct bestAddr would otherwise advertise; the only thing + // suppressing it here is the disabled caching knob. On early + // return the timestamp is left untouched (zero). + conn.maybeSendTSMPDiscoAdvert(ep) + + ep.mu.Lock() + defer ep.mu.Unlock() + if !ep.lastDiscoKeyAdvertisement.IsZero() { + t.Errorf("lastDiscoKeyAdvertisement = %v; want zero (advert should have been suppressed)", ep.lastDiscoKeyAdvertisement) + } + }) + } +}