From d4f2917c1bfd27529ec7997d08a8f1aa9ea903f2 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Tue, 23 Jun 2026 19:37:47 +0000 Subject: [PATCH] wgengine, ipn/ipnlocal: route PeerForIP through LocalBackend's live data userspaceEngine.PeerForIP read from e.netMap.Peers and e.lastCfgFull.Peers, both of which go stale when peers arrive via netmap deltas (which skip Engine.SetNetworkMap and Engine.Reconfig). Every PeerForIP caller (Engine.Ping, the TSMP disco-key handler, pendopen diagnostics, tsdial.Dialer.UseNetstackForIP, and LocalBackend.GetPeerEndpointChanges) would report "no matching peer" for freshly-added peers. Fix it the same way SetPeerByIPPacketFunc fixed the outbound packet hot path: have LocalBackend install a callback that reads the live nodeBackend. nb.NodeByAddr is built from both SelfNode and Peers (updateNodeByAddrLocked), so a single lookup covers the common case with IsSelf set when the matched node ID is SelfNode's. The subnet- route / exit-node-default-route slow path goes through a new Engine.PeerKeyForIP that exposes the engine's AllowedIPs BART table (the same table the outbound packet hot path already consults, with exit-node selection honored), and resolves the matched key back to a NodeView via the live nodeBackend. Updates #12542 Signed-off-by: Brad Fitzpatrick Change-Id: I0d4b0d8997c8e796b7367c46b49b61d4fdc717b0 --- ipn/ipnlocal/local.go | 38 +++++ ipn/ipnlocal/local_test.go | 278 +++++++++++++++++++++++++++++++++++++ ipn/ipnlocal/state_test.go | 4 + wgengine/userspace.go | 92 +++++------- wgengine/wgengine.go | 38 ++++- 5 files changed, 392 insertions(+), 58 deletions(-) diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 2598eea38..b2b13fbc4 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -654,6 +654,7 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo nb.ready() e.SetPeerByIPPacketFunc(b.lookupPeerByIP) + e.SetPeerForIPFunc(b.peerForIP) e.SetPeerSessionStateFunc(b.onPeerWireGuardState) e.SetNetLogNodeSource(netLogNodeSource{b}) e.SetWGPeerLookup(b.lookupPeerWireGuardString) @@ -5837,6 +5838,43 @@ func (b *LocalBackend) lookupPeerByIP(ip netip.Addr) (key.NodePublic, bool) { return peer.Key(), true } +// peerForIP is the [wgengine.Engine.SetPeerForIPFunc] callback. It returns +// which peer is responsible for a given IP address. Despite the name, it +// can also return the self node (with IsSelf set). It handles both +// Tailscale IPs (returning the owning peer or self) and non-Tailscale +// addresses like subnet-routed IPs or exit-node global internet IPs +// (returning whichever peer would route that traffic). +func (b *LocalBackend) peerForIP(ip netip.Addr) (_ wgengine.PeerForIP, ok bool) { + nb := b.currentNode() + + if tsaddr.IsTailscaleIP(ip) { + if nid, ok := nb.NodeByAddr(ip); ok { + if n, ok := nb.NodeByID(nid); ok { + self := nb.Self() + return wgengine.PeerForIP{ + Node: n, + IsSelf: self.Valid() && self.ID() == nid, + Route: netip.PrefixFrom(ip, ip.BitLen()), + }, true + } + } + } + + pk, route, ok := b.e.PeerKeyForIP(ip) + if !ok { + return wgengine.PeerForIP{}, false + } + nid, ok := nb.NodeByKey(pk) + if !ok { + return wgengine.PeerForIP{}, false + } + n, ok := nb.NodeByID(nid) + if !ok { + return wgengine.PeerForIP{}, false + } + return wgengine.PeerForIP{Node: n, Route: route}, true +} + // onPeerWireGuardState is called by wireguard-go, through wgengine, for // serialized WireGuard session state transitions. wireguard-go is holding locks // while calling this, so this must stay cheap, must not acquire b.mu, and must diff --git a/ipn/ipnlocal/local_test.go b/ipn/ipnlocal/local_test.go index 79d49249e..dbd0769d7 100644 --- a/ipn/ipnlocal/local_test.go +++ b/ipn/ipnlocal/local_test.go @@ -9177,3 +9177,281 @@ func TestResetAuthClearsMachineKey(t *testing.T) { t.Fatalf("ReadState after clear: got err %v, want ErrStateNotExist", err) } } + +func TestEnginePeerForIPAdjustsForPrefs(t *testing.T) { + // Build a netmap with: + // - self node at 100.64.0.1 + // - exitA (node 1): exit node at 100.64.0.2 + // - exitB (node 2): exit node at 100.64.0.3 + // - subnetBig (node 3): subnet router for 10.0.0.0/16 at 100.64.0.4 + // - subnetSmall (node 4): subnet router for 10.0.0.0/24 at 100.64.0.5 + hi := (&tailcfg.Hostinfo{}).View() + + selfNode := (&tailcfg.Node{ + ID: 10, + StableID: "self", + Key: makeNodeKeyFromID(10), + DiscoKey: makeDiscoKeyFromID(10), + Name: "self", + Hostinfo: hi, + Cap: 26, + Addresses: []netip.Prefix{ + netip.MustParsePrefix("100.64.0.1/32"), + }, + MachineAuthorized: true, + }).View() + + exitA := (&tailcfg.Node{ + ID: 1, + StableID: "exitA", + Key: makeNodeKeyFromID(1), + DiscoKey: makeDiscoKeyFromID(1), + Name: "exitA", + Hostinfo: hi, + Cap: 26, + Addresses: []netip.Prefix{ + netip.MustParsePrefix("100.64.0.2/32"), + }, + AllowedIPs: append([]netip.Prefix{netip.MustParsePrefix("100.64.0.2/32")}, tsaddr.ExitRoutes()...), + MachineAuthorized: true, + HomeDERP: 1, + }).View() + + exitB := (&tailcfg.Node{ + ID: 2, + StableID: "exitB", + Key: makeNodeKeyFromID(2), + DiscoKey: makeDiscoKeyFromID(2), + Name: "exitB", + Hostinfo: hi, + Cap: 26, + Addresses: []netip.Prefix{ + netip.MustParsePrefix("100.64.0.3/32"), + }, + AllowedIPs: append([]netip.Prefix{netip.MustParsePrefix("100.64.0.3/32")}, tsaddr.ExitRoutes()...), + MachineAuthorized: true, + HomeDERP: 2, + }).View() + + subnetBig := (&tailcfg.Node{ + ID: 3, + StableID: "subnetBig", + Key: makeNodeKeyFromID(3), + DiscoKey: makeDiscoKeyFromID(3), + Name: "subnetBig", + Hostinfo: hi, + Cap: 26, + Addresses: []netip.Prefix{ + netip.MustParsePrefix("100.64.0.4/32"), + }, + AllowedIPs: []netip.Prefix{ + netip.MustParsePrefix("100.64.0.4/32"), + netip.MustParsePrefix("10.0.0.0/16"), + }, + PrimaryRoutes: []netip.Prefix{netip.MustParsePrefix("10.0.0.0/16")}, + MachineAuthorized: true, + HomeDERP: 1, + }).View() + + subnetSmall := (&tailcfg.Node{ + ID: 4, + StableID: "subnetSmall", + Key: makeNodeKeyFromID(4), + DiscoKey: makeDiscoKeyFromID(4), + Name: "subnetSmall", + Hostinfo: hi, + Cap: 26, + Addresses: []netip.Prefix{ + netip.MustParsePrefix("100.64.0.5/32"), + }, + AllowedIPs: []netip.Prefix{ + netip.MustParsePrefix("100.64.0.5/32"), + netip.MustParsePrefix("10.0.0.0/24"), + }, + PrimaryRoutes: []netip.Prefix{netip.MustParsePrefix("10.0.0.0/24")}, + MachineAuthorized: true, + HomeDERP: 1, + }).View() + + nm := buildNetmapWithPeers(selfNode, exitA, exitB, subnetBig, subnetSmall) + + var eng wgengine.Engine + var curT *testing.T // active subtest, for test helpers + + wantPeer := func(ip string, n tailcfg.NodeView) { + t := curT + t.Helper() + pip, ok := eng.PeerForIP(netip.MustParseAddr(ip)) + if !ok { + t.Fatalf("PeerForIP(%s): ok=false, want true", ip) + } + if pip.IsSelf { + t.Fatalf("PeerForIP(%s): IsSelf=true, want false", ip) + } + if pip.Node.Key() != n.Key() { + t.Fatalf("PeerForIP(%s): key=%v, want %v", ip, pip.Node.Key(), n.Key()) + } + } + wantNotPeer := func(ip string) { + t := curT + t.Helper() + if _, ok := eng.PeerForIP(netip.MustParseAddr(ip)); ok { + t.Fatalf("PeerForIP(%s): ok=true, want false", ip) + } + } + wantKey := func(ip string, n tailcfg.NodeView) { + t := curT + t.Helper() + pk, _, ok := eng.PeerKeyForIP(netip.MustParseAddr(ip)) + if !ok { + t.Fatalf("PeerKeyForIP(%s): ok=false, want true", ip) + } + if pk != n.Key() { + t.Fatalf("PeerKeyForIP(%s): key=%v, want %v", ip, pk, n.Key()) + } + } + wantNotKey := func(ip string) { + t := curT + t.Helper() + if _, _, ok := eng.PeerKeyForIP(netip.MustParseAddr(ip)); ok { + t.Fatalf("PeerKeyForIP(%s): ok=true, want false", ip) + } + } + wantSelf := func(ip string) { + t := curT + t.Helper() + pip, ok := eng.PeerForIP(netip.MustParseAddr(ip)) + if !ok { + t.Fatalf("PeerForIP(%s): ok=false, want true", ip) + } + if !pip.IsSelf { + t.Fatalf("PeerForIP(%s): IsSelf=false, want true", ip) + } + } + + tests := []struct { + name string + prefs ipn.Prefs + check func() + }{ + { + name: "no_routes_no_exit", + prefs: ipn.Prefs{ + RouteAll: false, + ExitNodeID: "", + }, + check: func() { + wantSelf("100.64.0.1") + wantPeer("100.64.0.2", exitA) + wantPeer("100.64.0.3", exitB) + wantPeer("100.64.0.4", subnetBig) + wantPeer("100.64.0.5", subnetSmall) + wantNotPeer("10.0.0.5") + wantNotPeer("10.0.1.5") + wantNotPeer("8.8.8.8") + wantNotKey("10.0.0.5") + wantNotKey("8.8.8.8") + }, + }, + { + name: "accept_routes_on", + prefs: ipn.Prefs{ + RouteAll: true, + ExitNodeID: "", + }, + check: func() { + // 10.0.0.5 is in both /16 and /24; longest prefix match picks subnetSmall. + wantPeer("10.0.0.5", subnetSmall) + wantKey("10.0.0.5", subnetSmall) + // 10.0.1.5 is in /16 only; goes to subnetBig. + wantPeer("10.0.1.5", subnetBig) + wantKey("10.0.1.5", subnetBig) + wantNotPeer("8.8.8.8") + }, + }, + { + name: "exit_node_A", + prefs: ipn.Prefs{ + RouteAll: true, + ExitNodeID: "exitA", + }, + check: func() { + wantPeer("8.8.8.8", exitA) + wantKey("8.8.8.8", exitA) + wantPeer("10.0.0.5", subnetSmall) + wantPeer("10.0.1.5", subnetBig) + }, + }, + { + name: "exit_node_B", + prefs: ipn.Prefs{ + RouteAll: true, + ExitNodeID: "exitB", + }, + check: func() { + wantPeer("8.8.8.8", exitB) + wantKey("8.8.8.8", exitB) + wantPeer("10.0.0.5", subnetSmall) + wantPeer("10.0.1.5", subnetBig) + }, + }, + { + name: "exit_node_off_routes_on", + prefs: ipn.Prefs{ + RouteAll: true, + ExitNodeID: "", + }, + check: func() { + wantNotPeer("8.8.8.8") + wantNotKey("8.8.8.8") + wantPeer("10.0.0.5", subnetSmall) + wantPeer("10.0.1.5", subnetBig) + }, + }, + { + name: "accept_routes_off", + prefs: ipn.Prefs{ + RouteAll: false, + ExitNodeID: "", + }, + check: func() { + wantNotPeer("10.0.0.5") + wantNotKey("10.0.0.5") + wantPeer("100.64.0.4", subnetBig) + wantPeer("100.64.0.5", subnetSmall) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + lb := newTestLocalBackend(t) + + nk := key.NewNode() + nmCopy := new(*nm) + nmCopy.NodeKey = nk.Public() + + lb.mu.Lock() + err := lb.pm.SetPrefs((&ipn.Prefs{ + ControlURL: "https://localhost:1/", + WantRunning: true, + RouteAll: tt.prefs.RouteAll, + ExitNodeID: tt.prefs.ExitNodeID, + Persist: &persist.Persist{PrivateNodeKey: nk}, + }).View(), ipn.NetworkProfile{}) + lb.mu.Unlock() + if err != nil { + t.Fatal(err) + } + + lb.SetControlClientStatus(lb.cc, controlclient.Status{ + NetMap: nmCopy, + LoggedIn: true, + }) + + eng = lb.sys.Engine.Get() + curT = t + tt.check() + }) + } +} diff --git a/ipn/ipnlocal/state_test.go b/ipn/ipnlocal/state_test.go index 08d95f0d7..6ca96e4bd 100644 --- a/ipn/ipnlocal/state_test.go +++ b/ipn/ipnlocal/state_test.go @@ -1983,6 +1983,10 @@ func (e *mockEngine) Ping(ip netip.Addr, pingType tailcfg.PingType, size int, cb func (e *mockEngine) InstallCaptureHook(packet.CaptureCallback) {} func (e *mockEngine) SetPeerByIPPacketFunc(func(netip.Addr) (_ key.NodePublic, ok bool)) {} +func (e *mockEngine) SetPeerForIPFunc(func(netip.Addr) (_ wgengine.PeerForIP, ok bool)) {} +func (e *mockEngine) PeerKeyForIP(netip.Addr) (_ key.NodePublic, _ netip.Prefix, ok bool) { + return key.NodePublic{}, netip.Prefix{}, false +} func (e *mockEngine) SetPeerSessionStateFunc(func(key.NodePublic, wgengine.PeerWireGuardState)) { } func (e *mockEngine) SetNetLogNodeSource(netlog.NodeSource) {} diff --git a/wgengine/userspace.go b/wgengine/userspace.go index b27a60895..aa9ce053c 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -35,7 +35,6 @@ "tailscale.com/net/netmon" "tailscale.com/net/packet" "tailscale.com/net/sockstats" - "tailscale.com/net/tsaddr" "tailscale.com/net/tsdial" "tailscale.com/net/tstun" "tailscale.com/syncs" @@ -124,6 +123,11 @@ type userspaceEngine struct { // the per-packet wgdev callback without locking. peerByIPRoute atomic.Pointer[bart.Table[key.NodePublic]] + // peerForIP, if non-nil, is the callback installed via + // [userspaceEngine.SetPeerForIPFunc]. PeerForIP delegates to it + // for the cold-path control lookups (Ping, TSMP, pendopen, etc). + peerForIP atomic.Pointer[func(netip.Addr) (_ PeerForIP, ok bool)] + lastCfgFull wgcfg.Config lastRouter *router.Config lastDNSConfig dns.ConfigView // or invalid if none @@ -1571,65 +1575,39 @@ func (e *userspaceEngine) ProbeLocks() { e.wgLock.Unlock() } -// PeerForIP returns the Node in the wireguard config -// that's responsible for handling the given IP address. -// -// If none is found in the wireguard config but one is found in -// the netmap, it's described in an error. -// -// peerForIP acquires both e.mu and e.wgLock, but neither at the same -// time. -func (e *userspaceEngine) PeerForIP(ip netip.Addr) (ret PeerForIP, ok bool) { - e.mu.Lock() - nm := e.netMap - e.mu.Unlock() +// SetPeerForIPFunc installs the callback used by [userspaceEngine.PeerForIP]. +// See [Engine.SetPeerForIPFunc]. +func (e *userspaceEngine) SetPeerForIPFunc(fn func(netip.Addr) (PeerForIP, bool)) { + if fn == nil { + e.peerForIP.Store(nil) + return + } + e.peerForIP.Store(&fn) +} - if nm == nil { +// PeerKeyForIP looks up ip in the engine's AllowedIPs table +// ([userspaceEngine.peerByIPRoute]). See [Engine.PeerKeyForIP]. +func (e *userspaceEngine) PeerKeyForIP(ip netip.Addr) (pk key.NodePublic, route netip.Prefix, ok bool) { + if !ip.IsValid() { + return pk, route, false + } + rt := e.peerByIPRoute.Load() + if rt == nil { + return pk, route, false + } + route, pk, ok = rt.LookupPrefixLPM(netip.PrefixFrom(ip, ip.BitLen())) + return pk, route, ok +} + +// PeerForIP returns the node responsible for handling the given IP. +// It delegates to the callback installed via [SetPeerForIPFunc]; engines +// without an installed callback return (zero, false). +func (e *userspaceEngine) PeerForIP(ip netip.Addr) (ret PeerForIP, ok bool) { + if !ip.IsValid() { return ret, false } - - // Check for exact matches before looking for subnet matches. - // TODO(bradfitz): add maps for these. on NetworkMap? - for _, p := range nm.Peers { - for i := range p.Addresses().Len() { - a := p.Addresses().At(i) - if a.Addr() == ip && a.IsSingleIP() && tsaddr.IsTailscaleIP(ip) { - return PeerForIP{Node: p, Route: a}, true - } - } - } - addrs := nm.GetAddresses() - for i := range addrs.Len() { - if a := addrs.At(i); a.Addr() == ip && a.IsSingleIP() && tsaddr.IsTailscaleIP(ip) { - return PeerForIP{Node: nm.SelfNode, IsSelf: true, Route: a}, true - } - } - - e.wgLock.Lock() - defer e.wgLock.Unlock() - - // TODO(bradfitz): this is O(n peers). Add ART to netaddr? - var best netip.Prefix - var bestKey key.NodePublic - for _, p := range e.lastCfgFull.Peers { - for _, cidr := range p.AllowedIPs { - if !cidr.Contains(ip) { - continue - } - if !best.IsValid() || cidr.Bits() > best.Bits() { - best = cidr - bestKey = p.PublicKey - } - } - } - // And another pass. Probably better than allocating a map per peerForIP - // call. But TODO(bradfitz): add a lookup map to netmap.NetworkMap. - if !bestKey.IsZero() { - for _, p := range nm.Peers { - if p.Key() == bestKey { - return PeerForIP{Node: p, Route: best}, true - } - } + if fn := e.peerForIP.Load(); fn != nil { + return (*fn)(ip) } return ret, false } diff --git a/wgengine/wgengine.go b/wgengine/wgengine.go index 4c02d60b1..8ca37a65b 100644 --- a/wgengine/wgengine.go +++ b/wgengine/wgengine.go @@ -100,9 +100,45 @@ type Engine interface { ResetAndStop() (*Status, error) // PeerForIP returns the node to which the provided IP routes, - // if any. If none is found, (nil, false) is returned. + // if any. If none is found, (zero, false) is returned. + // + // Despite the name, it can return the self node (with + // PeerForIP.IsSelf set). It handles Tailscale IPs, subnet-routed + // IPs, and exit-node global internet IPs, returning whichever + // node would handle that traffic. + // + // This is the cold path used by Ping, TSMP, pendopen diagnostics, + // and debug endpoints. It uses the same underlying data structures + // as the wireguard-go outbound packet path + // ([Engine.SetPeerByIPPacketFunc]), but is slower because it + // returns richer data (a full NodeView, the matched route prefix, + // and the IsSelf flag) requiring extra lookups. + // + // In production, the lookup is implemented by LocalBackend and + // plumbed in via [Engine.SetPeerForIPFunc]; the engine itself holds + // no peer-lookup state on this path. PeerForIP(netip.Addr) (_ PeerForIP, ok bool) + // SetPeerForIPFunc installs a callback used by [Engine.PeerForIP]. + // It parallels [Engine.SetPeerByIPPacketFunc] but serves the + // cold-path control lookups (Ping, TSMP, pendopen diagnostics, + // [tsdial.Dialer.UseNetstackForIP], debug endpoints). + // + // If fn is nil, PeerForIP returns (zero, false) for every IP. + // + // LocalBackend installs a func backed by the live nodeBackend for + // exact-match and self addresses, with [Engine.PeerKeyForIP] + // supplying the subnet-route / exit-node fallback. + SetPeerForIPFunc(fn func(netip.Addr) (_ PeerForIP, ok bool)) + + // PeerKeyForIP returns the peer's NodePublic and the matched prefix + // for the longest-prefix match of ip in the engine's AllowedIPs + // table (the wireguard config most recently installed via + // [Engine.Reconfig]). Exit-node selection is honored: an unselected + // exit node's 0.0.0.0/0 is not matched. It is the same table the + // outbound packet hot path consults via [Engine.SetPeerByIPPacketFunc]. + PeerKeyForIP(netip.Addr) (_ key.NodePublic, _ netip.Prefix, ok bool) + // GetFilter returns the current packet filter, if any. GetFilter() *filter.Filter