wgengine, ipn/ipnlocal: route PeerForIP through LocalBackend's live data

userspaceEngine.PeerForIP read from e.netMap.Peers and
e.lastCfgFull.Peers, both of which go stale when peers arrive via
netmap deltas (which skip Engine.SetNetworkMap and Engine.Reconfig).
Every PeerForIP caller (Engine.Ping, the TSMP disco-key handler,
pendopen diagnostics, tsdial.Dialer.UseNetstackForIP, and
LocalBackend.GetPeerEndpointChanges) would report "no matching peer"
for freshly-added peers.

Fix it the same way SetPeerByIPPacketFunc fixed the outbound packet
hot path: have LocalBackend install a callback that reads the live
nodeBackend. nb.NodeByAddr is built from both SelfNode and Peers
(updateNodeByAddrLocked), so a single lookup covers the common case
with IsSelf set when the matched node ID is SelfNode's. The subnet-
route / exit-node-default-route slow path goes through a new
Engine.PeerKeyForIP that exposes the engine's AllowedIPs BART table
(the same table the outbound packet hot path already consults, with
exit-node selection honored), and resolves the matched key back to a
NodeView via the live nodeBackend.

Updates #12542

Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
Change-Id: I0d4b0d8997c8e796b7367c46b49b61d4fdc717b0
This commit is contained in:
Brad Fitzpatrick
2026-06-23 19:37:47 +00:00
committed by Brad Fitzpatrick
parent e9ae398199
commit d4f2917c1b
5 changed files with 392 additions and 58 deletions

View File

@@ -654,6 +654,7 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo
nb.ready()
e.SetPeerByIPPacketFunc(b.lookupPeerByIP)
e.SetPeerForIPFunc(b.peerForIP)
e.SetPeerSessionStateFunc(b.onPeerWireGuardState)
e.SetNetLogNodeSource(netLogNodeSource{b})
e.SetWGPeerLookup(b.lookupPeerWireGuardString)
@@ -5837,6 +5838,43 @@ func (b *LocalBackend) lookupPeerByIP(ip netip.Addr) (key.NodePublic, bool) {
return peer.Key(), true
}
// peerForIP is the [wgengine.Engine.SetPeerForIPFunc] callback. It returns
// which peer is responsible for a given IP address. Despite the name, it
// can also return the self node (with IsSelf set). It handles both
// Tailscale IPs (returning the owning peer or self) and non-Tailscale
// addresses like subnet-routed IPs or exit-node global internet IPs
// (returning whichever peer would route that traffic).
func (b *LocalBackend) peerForIP(ip netip.Addr) (_ wgengine.PeerForIP, ok bool) {
nb := b.currentNode()
if tsaddr.IsTailscaleIP(ip) {
if nid, ok := nb.NodeByAddr(ip); ok {
if n, ok := nb.NodeByID(nid); ok {
self := nb.Self()
return wgengine.PeerForIP{
Node: n,
IsSelf: self.Valid() && self.ID() == nid,
Route: netip.PrefixFrom(ip, ip.BitLen()),
}, true
}
}
}
pk, route, ok := b.e.PeerKeyForIP(ip)
if !ok {
return wgengine.PeerForIP{}, false
}
nid, ok := nb.NodeByKey(pk)
if !ok {
return wgengine.PeerForIP{}, false
}
n, ok := nb.NodeByID(nid)
if !ok {
return wgengine.PeerForIP{}, false
}
return wgengine.PeerForIP{Node: n, Route: route}, true
}
// onPeerWireGuardState is called by wireguard-go, through wgengine, for
// serialized WireGuard session state transitions. wireguard-go is holding locks
// while calling this, so this must stay cheap, must not acquire b.mu, and must

View File

@@ -9177,3 +9177,281 @@ func TestResetAuthClearsMachineKey(t *testing.T) {
t.Fatalf("ReadState after clear: got err %v, want ErrStateNotExist", err)
}
}
func TestEnginePeerForIPAdjustsForPrefs(t *testing.T) {
// Build a netmap with:
// - self node at 100.64.0.1
// - exitA (node 1): exit node at 100.64.0.2
// - exitB (node 2): exit node at 100.64.0.3
// - subnetBig (node 3): subnet router for 10.0.0.0/16 at 100.64.0.4
// - subnetSmall (node 4): subnet router for 10.0.0.0/24 at 100.64.0.5
hi := (&tailcfg.Hostinfo{}).View()
selfNode := (&tailcfg.Node{
ID: 10,
StableID: "self",
Key: makeNodeKeyFromID(10),
DiscoKey: makeDiscoKeyFromID(10),
Name: "self",
Hostinfo: hi,
Cap: 26,
Addresses: []netip.Prefix{
netip.MustParsePrefix("100.64.0.1/32"),
},
MachineAuthorized: true,
}).View()
exitA := (&tailcfg.Node{
ID: 1,
StableID: "exitA",
Key: makeNodeKeyFromID(1),
DiscoKey: makeDiscoKeyFromID(1),
Name: "exitA",
Hostinfo: hi,
Cap: 26,
Addresses: []netip.Prefix{
netip.MustParsePrefix("100.64.0.2/32"),
},
AllowedIPs: append([]netip.Prefix{netip.MustParsePrefix("100.64.0.2/32")}, tsaddr.ExitRoutes()...),
MachineAuthorized: true,
HomeDERP: 1,
}).View()
exitB := (&tailcfg.Node{
ID: 2,
StableID: "exitB",
Key: makeNodeKeyFromID(2),
DiscoKey: makeDiscoKeyFromID(2),
Name: "exitB",
Hostinfo: hi,
Cap: 26,
Addresses: []netip.Prefix{
netip.MustParsePrefix("100.64.0.3/32"),
},
AllowedIPs: append([]netip.Prefix{netip.MustParsePrefix("100.64.0.3/32")}, tsaddr.ExitRoutes()...),
MachineAuthorized: true,
HomeDERP: 2,
}).View()
subnetBig := (&tailcfg.Node{
ID: 3,
StableID: "subnetBig",
Key: makeNodeKeyFromID(3),
DiscoKey: makeDiscoKeyFromID(3),
Name: "subnetBig",
Hostinfo: hi,
Cap: 26,
Addresses: []netip.Prefix{
netip.MustParsePrefix("100.64.0.4/32"),
},
AllowedIPs: []netip.Prefix{
netip.MustParsePrefix("100.64.0.4/32"),
netip.MustParsePrefix("10.0.0.0/16"),
},
PrimaryRoutes: []netip.Prefix{netip.MustParsePrefix("10.0.0.0/16")},
MachineAuthorized: true,
HomeDERP: 1,
}).View()
subnetSmall := (&tailcfg.Node{
ID: 4,
StableID: "subnetSmall",
Key: makeNodeKeyFromID(4),
DiscoKey: makeDiscoKeyFromID(4),
Name: "subnetSmall",
Hostinfo: hi,
Cap: 26,
Addresses: []netip.Prefix{
netip.MustParsePrefix("100.64.0.5/32"),
},
AllowedIPs: []netip.Prefix{
netip.MustParsePrefix("100.64.0.5/32"),
netip.MustParsePrefix("10.0.0.0/24"),
},
PrimaryRoutes: []netip.Prefix{netip.MustParsePrefix("10.0.0.0/24")},
MachineAuthorized: true,
HomeDERP: 1,
}).View()
nm := buildNetmapWithPeers(selfNode, exitA, exitB, subnetBig, subnetSmall)
var eng wgengine.Engine
var curT *testing.T // active subtest, for test helpers
wantPeer := func(ip string, n tailcfg.NodeView) {
t := curT
t.Helper()
pip, ok := eng.PeerForIP(netip.MustParseAddr(ip))
if !ok {
t.Fatalf("PeerForIP(%s): ok=false, want true", ip)
}
if pip.IsSelf {
t.Fatalf("PeerForIP(%s): IsSelf=true, want false", ip)
}
if pip.Node.Key() != n.Key() {
t.Fatalf("PeerForIP(%s): key=%v, want %v", ip, pip.Node.Key(), n.Key())
}
}
wantNotPeer := func(ip string) {
t := curT
t.Helper()
if _, ok := eng.PeerForIP(netip.MustParseAddr(ip)); ok {
t.Fatalf("PeerForIP(%s): ok=true, want false", ip)
}
}
wantKey := func(ip string, n tailcfg.NodeView) {
t := curT
t.Helper()
pk, _, ok := eng.PeerKeyForIP(netip.MustParseAddr(ip))
if !ok {
t.Fatalf("PeerKeyForIP(%s): ok=false, want true", ip)
}
if pk != n.Key() {
t.Fatalf("PeerKeyForIP(%s): key=%v, want %v", ip, pk, n.Key())
}
}
wantNotKey := func(ip string) {
t := curT
t.Helper()
if _, _, ok := eng.PeerKeyForIP(netip.MustParseAddr(ip)); ok {
t.Fatalf("PeerKeyForIP(%s): ok=true, want false", ip)
}
}
wantSelf := func(ip string) {
t := curT
t.Helper()
pip, ok := eng.PeerForIP(netip.MustParseAddr(ip))
if !ok {
t.Fatalf("PeerForIP(%s): ok=false, want true", ip)
}
if !pip.IsSelf {
t.Fatalf("PeerForIP(%s): IsSelf=false, want true", ip)
}
}
tests := []struct {
name string
prefs ipn.Prefs
check func()
}{
{
name: "no_routes_no_exit",
prefs: ipn.Prefs{
RouteAll: false,
ExitNodeID: "",
},
check: func() {
wantSelf("100.64.0.1")
wantPeer("100.64.0.2", exitA)
wantPeer("100.64.0.3", exitB)
wantPeer("100.64.0.4", subnetBig)
wantPeer("100.64.0.5", subnetSmall)
wantNotPeer("10.0.0.5")
wantNotPeer("10.0.1.5")
wantNotPeer("8.8.8.8")
wantNotKey("10.0.0.5")
wantNotKey("8.8.8.8")
},
},
{
name: "accept_routes_on",
prefs: ipn.Prefs{
RouteAll: true,
ExitNodeID: "",
},
check: func() {
// 10.0.0.5 is in both /16 and /24; longest prefix match picks subnetSmall.
wantPeer("10.0.0.5", subnetSmall)
wantKey("10.0.0.5", subnetSmall)
// 10.0.1.5 is in /16 only; goes to subnetBig.
wantPeer("10.0.1.5", subnetBig)
wantKey("10.0.1.5", subnetBig)
wantNotPeer("8.8.8.8")
},
},
{
name: "exit_node_A",
prefs: ipn.Prefs{
RouteAll: true,
ExitNodeID: "exitA",
},
check: func() {
wantPeer("8.8.8.8", exitA)
wantKey("8.8.8.8", exitA)
wantPeer("10.0.0.5", subnetSmall)
wantPeer("10.0.1.5", subnetBig)
},
},
{
name: "exit_node_B",
prefs: ipn.Prefs{
RouteAll: true,
ExitNodeID: "exitB",
},
check: func() {
wantPeer("8.8.8.8", exitB)
wantKey("8.8.8.8", exitB)
wantPeer("10.0.0.5", subnetSmall)
wantPeer("10.0.1.5", subnetBig)
},
},
{
name: "exit_node_off_routes_on",
prefs: ipn.Prefs{
RouteAll: true,
ExitNodeID: "",
},
check: func() {
wantNotPeer("8.8.8.8")
wantNotKey("8.8.8.8")
wantPeer("10.0.0.5", subnetSmall)
wantPeer("10.0.1.5", subnetBig)
},
},
{
name: "accept_routes_off",
prefs: ipn.Prefs{
RouteAll: false,
ExitNodeID: "",
},
check: func() {
wantNotPeer("10.0.0.5")
wantNotKey("10.0.0.5")
wantPeer("100.64.0.4", subnetBig)
wantPeer("100.64.0.5", subnetSmall)
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
lb := newTestLocalBackend(t)
nk := key.NewNode()
nmCopy := new(*nm)
nmCopy.NodeKey = nk.Public()
lb.mu.Lock()
err := lb.pm.SetPrefs((&ipn.Prefs{
ControlURL: "https://localhost:1/",
WantRunning: true,
RouteAll: tt.prefs.RouteAll,
ExitNodeID: tt.prefs.ExitNodeID,
Persist: &persist.Persist{PrivateNodeKey: nk},
}).View(), ipn.NetworkProfile{})
lb.mu.Unlock()
if err != nil {
t.Fatal(err)
}
lb.SetControlClientStatus(lb.cc, controlclient.Status{
NetMap: nmCopy,
LoggedIn: true,
})
eng = lb.sys.Engine.Get()
curT = t
tt.check()
})
}
}

View File

@@ -1983,6 +1983,10 @@ func (e *mockEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, size int, cb
func (e *mockEngine) InstallCaptureHook(packet.CaptureCallback) {}
func (e *mockEngine) SetPeerByIPPacketFunc(func(netip.Addr) (_ key.NodePublic, ok bool)) {}
func (e *mockEngine) SetPeerForIPFunc(func(netip.Addr) (_ wgengine.PeerForIP, ok bool)) {}
func (e *mockEngine) PeerKeyForIP(netip.Addr) (_ key.NodePublic, _ netip.Prefix, ok bool) {
return key.NodePublic{}, netip.Prefix{}, false
}
func (e *mockEngine) SetPeerSessionStateFunc(func(key.NodePublic, wgengine.PeerWireGuardState)) {
}
func (e *mockEngine) SetNetLogNodeSource(netlog.NodeSource) {}

View File

@@ -35,7 +35,6 @@
"tailscale.com/net/netmon"
"tailscale.com/net/packet"
"tailscale.com/net/sockstats"
"tailscale.com/net/tsaddr"
"tailscale.com/net/tsdial"
"tailscale.com/net/tstun"
"tailscale.com/syncs"
@@ -124,6 +123,11 @@ type userspaceEngine struct {
// the per-packet wgdev callback without locking.
peerByIPRoute atomic.Pointer[bart.Table[key.NodePublic]]
// peerForIP, if non-nil, is the callback installed via
// [userspaceEngine.SetPeerForIPFunc]. PeerForIP delegates to it
// for the cold-path control lookups (Ping, TSMP, pendopen, etc).
peerForIP atomic.Pointer[func(netip.Addr) (_ PeerForIP, ok bool)]
lastCfgFull wgcfg.Config
lastRouter *router.Config
lastDNSConfig dns.ConfigView // or invalid if none
@@ -1571,65 +1575,39 @@ func (e *userspaceEngine) ProbeLocks() {
e.wgLock.Unlock()
}
// PeerForIP returns the Node in the wireguard config
// that's responsible for handling the given IP address.
//
// If none is found in the wireguard config but one is found in
// the netmap, it's described in an error.
//
// peerForIP acquires both e.mu and e.wgLock, but neither at the same
// time.
func (e *userspaceEngine) PeerForIP(ip netip.Addr) (ret PeerForIP, ok bool) {
e.mu.Lock()
nm := e.netMap
e.mu.Unlock()
// SetPeerForIPFunc installs the callback used by [userspaceEngine.PeerForIP].
// See [Engine.SetPeerForIPFunc].
func (e *userspaceEngine) SetPeerForIPFunc(fn func(netip.Addr) (PeerForIP, bool)) {
if fn == nil {
e.peerForIP.Store(nil)
return
}
e.peerForIP.Store(&fn)
}
if nm == nil {
// PeerKeyForIP looks up ip in the engine's AllowedIPs table
// ([userspaceEngine.peerByIPRoute]). See [Engine.PeerKeyForIP].
func (e *userspaceEngine) PeerKeyForIP(ip netip.Addr) (pk key.NodePublic, route netip.Prefix, ok bool) {
if !ip.IsValid() {
return pk, route, false
}
rt := e.peerByIPRoute.Load()
if rt == nil {
return pk, route, false
}
route, pk, ok = rt.LookupPrefixLPM(netip.PrefixFrom(ip, ip.BitLen()))
return pk, route, ok
}
// PeerForIP returns the node responsible for handling the given IP.
// It delegates to the callback installed via [SetPeerForIPFunc]; engines
// without an installed callback return (zero, false).
func (e *userspaceEngine) PeerForIP(ip netip.Addr) (ret PeerForIP, ok bool) {
if !ip.IsValid() {
return ret, false
}
// Check for exact matches before looking for subnet matches.
// TODO(bradfitz): add maps for these. on NetworkMap?
for _, p := range nm.Peers {
for i := range p.Addresses().Len() {
a := p.Addresses().At(i)
if a.Addr() == ip && a.IsSingleIP() && tsaddr.IsTailscaleIP(ip) {
return PeerForIP{Node: p, Route: a}, true
}
}
}
addrs := nm.GetAddresses()
for i := range addrs.Len() {
if a := addrs.At(i); a.Addr() == ip && a.IsSingleIP() && tsaddr.IsTailscaleIP(ip) {
return PeerForIP{Node: nm.SelfNode, IsSelf: true, Route: a}, true
}
}
e.wgLock.Lock()
defer e.wgLock.Unlock()
// TODO(bradfitz): this is O(n peers). Add ART to netaddr?
var best netip.Prefix
var bestKey key.NodePublic
for _, p := range e.lastCfgFull.Peers {
for _, cidr := range p.AllowedIPs {
if !cidr.Contains(ip) {
continue
}
if !best.IsValid() || cidr.Bits() > best.Bits() {
best = cidr
bestKey = p.PublicKey
}
}
}
// And another pass. Probably better than allocating a map per peerForIP
// call. But TODO(bradfitz): add a lookup map to netmap.NetworkMap.
if !bestKey.IsZero() {
for _, p := range nm.Peers {
if p.Key() == bestKey {
return PeerForIP{Node: p, Route: best}, true
}
}
if fn := e.peerForIP.Load(); fn != nil {
return (*fn)(ip)
}
return ret, false
}

View File

@@ -100,9 +100,45 @@ type Engine interface {
ResetAndStop() (*Status, error)
// PeerForIP returns the node to which the provided IP routes,
// if any. If none is found, (nil, false) is returned.
// if any. If none is found, (zero, false) is returned.
//
// Despite the name, it can return the self node (with
// PeerForIP.IsSelf set). It handles Tailscale IPs, subnet-routed
// IPs, and exit-node global internet IPs, returning whichever
// node would handle that traffic.
//
// This is the cold path used by Ping, TSMP, pendopen diagnostics,
// and debug endpoints. It uses the same underlying data structures
// as the wireguard-go outbound packet path
// ([Engine.SetPeerByIPPacketFunc]), but is slower because it
// returns richer data (a full NodeView, the matched route prefix,
// and the IsSelf flag) requiring extra lookups.
//
// In production, the lookup is implemented by LocalBackend and
// plumbed in via [Engine.SetPeerForIPFunc]; the engine itself holds
// no peer-lookup state on this path.
PeerForIP(netip.Addr) (_ PeerForIP, ok bool)
// SetPeerForIPFunc installs a callback used by [Engine.PeerForIP].
// It parallels [Engine.SetPeerByIPPacketFunc] but serves the
// cold-path control lookups (Ping, TSMP, pendopen diagnostics,
// [tsdial.Dialer.UseNetstackForIP], debug endpoints).
//
// If fn is nil, PeerForIP returns (zero, false) for every IP.
//
// LocalBackend installs a func backed by the live nodeBackend for
// exact-match and self addresses, with [Engine.PeerKeyForIP]
// supplying the subnet-route / exit-node fallback.
SetPeerForIPFunc(fn func(netip.Addr) (_ PeerForIP, ok bool))
// PeerKeyForIP returns the peer's NodePublic and the matched prefix
// for the longest-prefix match of ip in the engine's AllowedIPs
// table (the wireguard config most recently installed via
// [Engine.Reconfig]). Exit-node selection is honored: an unselected
// exit node's 0.0.0.0/0 is not matched. It is the same table the
// outbound packet hot path consults via [Engine.SetPeerByIPPacketFunc].
PeerKeyForIP(netip.Addr) (_ key.NodePublic, _ netip.Prefix, ok bool)
// GetFilter returns the current packet filter, if any.
GetFilter() *filter.Filter