control/controlclient: take mapsession and release lock early in sub (#19192)

The disco key subscriber could deadlock in a scenario where a self node
update came through the control path into the mapSession after the disco
key subscriber had taken the lock, but before it had pushed the netmap
change, as both the subscriber and onSelfNodeChanged needs the
controlclient lock.

The subscriber can safely take the mapsession as the changequeue has its
own lock for inserting records, and also checks if the queue has been
closed before inserting.

Updates #12639

Signed-off-by: Claus Lensbøl <claus@tailscale.com>
This commit is contained in:
Claus Lensbøl
2026-03-31 12:47:13 -04:00
committed by GitHub
parent 61ac021c5d
commit 4334dfa7d5

View File

@@ -361,11 +361,16 @@ func NewDirect(opts Options) (*Direct, error) {
c.controlTimePub = eventbus.Publish[ControlTime](c.busClient)
discoKeyPub := eventbus.Publish[events.PeerDiscoKeyUpdate](c.busClient)
eventbus.SubscribeFunc(c.busClient, func(update events.DiscoKeyAdvertisement) {
c.mu.Lock()
defer c.mu.Unlock()
c.logf("controlclient direct: got TSMP disco key advertisement from %v via eventbus", update.Src)
if c.streamingMapSession != nil {
nm := c.streamingMapSession.netmap()
var nm *netmap.NetworkMap
c.mu.Lock()
sess := c.streamingMapSession
if sess != nil {
nm = c.streamingMapSession.netmap()
}
c.mu.Unlock()
if sess != nil {
peer, ok := nm.PeerByTailscaleIP(update.Src)
if !ok {
return
@@ -375,7 +380,7 @@ func NewDirect(opts Options) (*Direct, error) {
// If we update without error, return. If the err indicates that the
// mapSession has gone away, we want to fall back to pushing the key
// further down the chain.
if err := c.streamingMapSession.updateDiscoForNode(
if err := sess.updateDiscoForNode(
peer.ID(), peer.Key(), update.Key, time.Now(), false); err == nil ||
!errors.Is(err, ErrChangeQueueClosed) {
return