diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 88c433757..3068c85b1 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -329,6 +329,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/ipn/ipnstate from tailscale.com/client/local+ tailscale.com/ipn/localapi from tailscale.com/ipn/ipnserver+ tailscale.com/ipn/policy from tailscale.com/feature/portlist + tailscale.com/ipn/routecheck from tailscale.com/feature/routecheck tailscale.com/ipn/store from tailscale.com/cmd/tailscaled+ L tailscale.com/ipn/store/awsstore from tailscale.com/feature/condregister L tailscale.com/ipn/store/kubestore from tailscale.com/feature/condregister @@ -378,7 +379,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/net/stun from tailscale.com/ipn/localapi+ tailscale.com/net/tlsdial from tailscale.com/control/controlclient+ tailscale.com/net/tlsdial/blockblame from tailscale.com/net/tlsdial - tailscale.com/net/traffic from tailscale.com/ipn/ipnlocal + tailscale.com/net/traffic from tailscale.com/ipn/ipnlocal+ tailscale.com/net/tsaddr from tailscale.com/client/web+ tailscale.com/net/tsdial from tailscale.com/cmd/tailscaled+ 💣 tailscale.com/net/tshttpproxy from tailscale.com/feature/useproxy diff --git a/feature/routecheck/ipn.go b/feature/routecheck/ipn.go new file mode 100644 index 000000000..4cc62e9a2 --- /dev/null +++ b/feature/routecheck/ipn.go @@ -0,0 +1,18 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package routecheck + +import ( + "tailscale.com/ipn/ipnext" + "tailscale.com/ipn/routecheck" +) + +// NodeBackender is a shim between [ipnext.Host] and [routecheck.NodeBackender]. +type nodeBackender struct{ ipnext.Host } + +var _ routecheck.NodeBackender = nodeBackender{} + +func (nb nodeBackender) NodeBackend() routecheck.NodeBackend { + return nb.Host.NodeBackend() +} diff --git a/feature/routecheck/routecheck.go b/feature/routecheck/routecheck.go index 055ceb379..fc9bf9bac 100644 --- a/feature/routecheck/routecheck.go +++ b/feature/routecheck/routecheck.go @@ -12,6 +12,75 @@ // establish a WireGuard session. package routecheck +import ( + "fmt" + + "tailscale.com/ipn/ipnext" + "tailscale.com/ipn/routecheck" + "tailscale.com/types/logger" + "tailscale.com/types/netmap" +) + +// FeatureName is the name of the feature implemented by this package. +// It is also the [extension] name and the log prefix. +const featureName = "routecheck" + func init() { - // TODO(sfllaw): Initialize the new routecheck package. + ipnext.RegisterExtension(featureName, func(logf logger.Logf, b ipnext.SafeBackend) (ipnext.Extension, error) { + return &Extension{ + logf: logger.WithPrefix(logf, featureName+": "), + backend: b, + }, nil + }) +} + +// Extension implements the [ipnext.Extension] interface. +type Extension struct { + Client *routecheck.Client + + logf logger.Logf + backend ipnext.SafeBackend + nb nodeBackender + nm routecheck.NetMapper +} + +var _ ipnext.Extension = new(Extension) + +// Name implements the [ipnext.Extension.Name] interface method. +func (e *Extension) Name() string { + return featureName +} + +// Init implements the [ipnext.Extension.Init] interface method. +func (e *Extension) Init(h ipnext.Host) error { + e.nb = nodeBackender{h} + + nm, ok := e.backend.(routecheck.NetMapper) + if !ok { + return fmt.Errorf("backend %T does not implement routecheck.NetMapWaiter", e.backend) + } + e.nm = nm + + pinger := e.backend.Sys().Engine.Get() + + c, err := routecheck.NewClient(e.logf, e.nb, e.nm, pinger) + if err != nil { + return err + } + e.Client = c + + h.Hooks().OnNetMapToggle.Add(e.onNetMapToggle) + + return nil +} + +// Shutdown implements the [ipnext.Extension.Shutdown] interface method. +func (e *Extension) Shutdown() error { + return nil +} + +func (e *Extension) onNetMapToggle(nm *netmap.NetworkMap) { + if nm := e.nm.NetMapNoPeers(); nm != nil { + e.Client.NetMapAvailable(nm) + } } diff --git a/ipn/ipnext/ipnext.go b/ipn/ipnext/ipnext.go index 5ca50498a..1af259aac 100644 --- a/ipn/ipnext/ipnext.go +++ b/ipn/ipnext/ipnext.go @@ -22,6 +22,7 @@ "tailscale.com/types/key" "tailscale.com/types/logger" "tailscale.com/types/mapx" + "tailscale.com/types/netmap" "tailscale.com/types/views" "tailscale.com/wgengine/filter" ) @@ -375,6 +376,12 @@ type Hooks struct { // is created. It is called with the LocalBackend locked. NewControlClient feature.Hooks[NewControlClientCallback] + // OnNetMapToggle is called (with LocalBackend.mu held) when the network map + // is toggled from nil to non-nil, or non-nil to nil. This usually happens + // when the client connects to the control plane and receives the initial MapResponse, + // or when the client disconnects and the network map is cleared. + OnNetMapToggle feature.Hooks[func(*netmap.NetworkMap)] + // OnSelfChange is called (with LocalBackend.mu held) when the self node // changes, including changing to nothing (an invalid view). OnSelfChange feature.Hooks[func(tailcfg.NodeView)] @@ -465,10 +472,16 @@ type FilterHooks struct { // // It is not a snapshot in time but is locked to a particular node. type NodeBackend interface { + // Self returns the current node. + Self() tailcfg.NodeView + // AppendMatchingPeers appends all peers that match the predicate // to the base slice and returns it. AppendMatchingPeers(base []tailcfg.NodeView, pred func(tailcfg.NodeView) bool) []tailcfg.NodeView + // Peers returns all the current peers. + Peers() []tailcfg.NodeView + // PeerCaps returns the capabilities that src has to this node. PeerCaps(src netip.Addr) tailcfg.PeerCapMap diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 9b9ea58fb..d87e696f2 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -6839,7 +6839,8 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) { }() } - oldSelf := b.currentNode().NetMap().SelfNodeOrZero() + oldNetMap := b.currentNode().NetMap() + oldSelf := oldNetMap.SelfNodeOrZero() b.dialer.SetNetMap(nm) if ns, ok := b.sys.Netstack.GetOK(); ok { @@ -6918,6 +6919,12 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) { } + if oldNetMap != nm && (oldNetMap == nil || nm == nil) { + for _, f := range b.extHost.Hooks().OnNetMapToggle { + f(nm) + } + } + if !oldSelf.Equal(nm.SelfNodeOrZero()) { for _, f := range b.extHost.Hooks().OnSelfChange { f(nm.SelfNode) diff --git a/ipn/ipnlocal/node_backend.go b/ipn/ipnlocal/node_backend.go index 3c21ff2a8..087d9df3f 100644 --- a/ipn/ipnlocal/node_backend.go +++ b/ipn/ipnlocal/node_backend.go @@ -161,6 +161,7 @@ func (nb *nodeBackend) Context() context.Context { return nb.ctx } +// Self returns the current node. func (nb *nodeBackend) Self() tailcfg.NodeView { nb.mu.Lock() defer nb.mu.Unlock() diff --git a/ipn/routecheck/log.go b/ipn/routecheck/log.go new file mode 100644 index 000000000..0a92cefe9 --- /dev/null +++ b/ipn/routecheck/log.go @@ -0,0 +1,32 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package routecheck + +import ( + "log" + + "tailscale.com/envknob" +) + +// Debugging tweakable. +var debugRoutecheck = envknob.RegisterBool("TS_DEBUG_ROUTECHECK") + +// Logf calls [Client.Logf] to print to a logger. +// Arguments are handled in the manner of fmt.Printf. +func (c *Client) logf(format string, a ...any) { + if c.Logf != nil { + c.Logf(format, a...) + } else { + log.Printf(format, a...) + } +} + +// Vlogf calls [Client.Logf] to print to a logger, only when in debug mode, +// which is when the TS_DEBUG_ROUTECHECK environment variable is set. +// Arguments are handled in the manner of fmt.Printf. +func (c *Client) vlogf(format string, a ...any) { + if c.Verbose || debugRoutecheck() { + c.logf(format, a...) + } +} diff --git a/ipn/routecheck/probe.go b/ipn/routecheck/probe.go new file mode 100644 index 000000000..1fb577928 --- /dev/null +++ b/ipn/routecheck/probe.go @@ -0,0 +1,275 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package routecheck + +import ( + "cmp" + "context" + "iter" + "net/netip" + "slices" + "time" + + "golang.org/x/sync/errgroup" + "tailscale.com/ipn/ipnstate" + "tailscale.com/net/traffic" + "tailscale.com/syncs" + "tailscale.com/tailcfg" + "tailscale.com/util/clientmetric" + "tailscale.com/util/mak" +) + +var ( + metricPing = clientmetric.NewCounter("routecheck_ping") + metricPingError = clientmetric.NewCounter("routecheck_ping_error") + metricPingReachable = clientmetric.NewCounter("routecheck_ping_reachable") + metricPingTimeout = clientmetric.NewCounter("routecheck_ping_timeout") + metricProbe = clientmetric.NewCounter("routecheck_probe") +) + +// DefaultTimeout is the default time allowed for a response before a peer is considered unreachable. +const DefaultTimeout = 4 * time.Second + +type probed struct { + id tailcfg.NodeID + name string + addr netip.Addr + routes []netip.Prefix +} + +func (c *Client) probe(ctx context.Context, nodes iter.Seq[probed], limit int, timeout time.Duration) (*Report, error) { + metricProbe.Add(1) + + g, ctx := errgroup.WithContext(ctx) + if limit > 0 { + g.SetLimit(limit) + } + + var mu syncs.Mutex + r := &Report{} + + // TODO(sfllaw): Since the nodes are sorted by priority, + // where earlier nodes have high traffic-steering scores, + // it should be possible to deprioritize or skip probes + // if there are already enough responses for a particular resource. + // This optimization has not been implemented yet, so all nodes are probed. + for n := range nodes { + g.Go(func() error { + metricPing.Add(1) + // TODO(sfllaw): Why did we choose Disco ping instead of TSMP ping? + // After all, a TSMP ping proves that the peer Tailscale node is there + // and that both nodes know each other’s WireGuard keys, + // while a Disco ping only proves that the peer can be found using DERP. + // However, TSMP is wrapped in a long-lived WireGuard connection, + // which is too expensive when generating a reachability report. + // + // Since WireGuard connections are established using a single round-trip, + // there is no existing way to confirm that a WireGuard connection + // can be established without burdening the peer with lingering state. + // WireGuard could be extended with a special `handshake_initiation` + // that only verifies that a connection could be established, + // requesting this with a sentinel in `handshake_initiation.mac2`. + // The peer would send a valid but stateless `handshake_response`, + // using a random ephemeral_private key and not record any state. + // See https://www.wireguard.com/protocol/ and tailscale/tailscale#19670. + pong, err := c.ping(ctx, n.addr, tailcfg.PingDisco, timeout) + if err != nil { + // Returning an error would cancel the errgroup. + if err != context.DeadlineExceeded { + c.vlogf("ping %s (%s): error: %v", n.addr, n.id, err) + metricPingError.Add(1) + } + // Ping timed out, so assume that the node is unreachable. + c.vlogf("ping %s (%s): timed out", n.addr, n.id) + metricPingTimeout.Add(1) + return nil + } else if pong == nil { + c.vlogf("ping %s (%s): error: no response", n.addr, n.id) + metricPingError.Add(1) + return nil + } else { + c.vlogf("ping %s (%s): result: %f ms (err: %v)", n.addr, n.id, pong.LatencySeconds*1000, pong.Err) + metricPingReachable.Add(1) + } + + mu.Lock() + defer mu.Unlock() + if _, ok := r.Reachable[n.id]; !ok { + mak.Set(&r.Reachable, n.id, Node{ + ID: n.id, + Name: n.name, + Addr: n.addr, + Routes: n.routes, + }) + } + return nil + }) + } + g.Wait() + r.Done = time.Now() + return r, nil +} + +// Probe actively probes the sequence of nodes and returns a reachability [Report]. +// If limit is positive, it limits the number of concurrent active probes; +// a limit of zero will ping every node at once. +// A peer is considered unreachable if it doesn’t respond within the timeout. +// +// This function will probe nodes in order, so better candidates should be +// sorted earlier in the sequence. This function may use ordering to skip some probes +// if it has discovered enough reachable peers. +// +// This function tries both the IPv4 and IPv6 addresses. +func (c *Client) Probe(ctx context.Context, nodes iter.Seq[tailcfg.NodeView], limit int, timeout time.Duration) (*Report, error) { + is4, is6 := supportsIPVersions(c.nb.NodeBackend().Self()) + if is4 == nil && is6 == nil { + return nil, nil + } + addrFor := addrPicker(is4, is6) + + // Assumed nodes are ones that we assume are reachable, + // because we can’t probe nodes that don’t understand Disco pings. + var assumed []tailcfg.NodeView + + var dsts iter.Seq[probed] = func(yield func(probed) bool) { + for n := range nodes { + if n.IsWireGuardOnly() { + assumed = append(assumed, n) + continue // Probably can’t speak Disco or DERP. + } + + // Probe one of the tailnet addresses. + addr := addrFor(n) + if !addr.IsValid() { + continue // No valid addresses. + } + if !yield(probed{ + id: n.ID(), + name: n.Name(), + addr: addr, + routes: routes(n), + }) { + return + } + } + } + + r, err := c.probe(ctx, dsts, limit, timeout) + if err != nil { + return nil, err + } + + // Mix in the assumed nodes. + for _, n := range assumed { + addr := addrFor(n) + if !addr.IsValid() { + continue // No valid addresses. + } + id := n.ID() + if _, ok := r.Reachable[id]; !ok { + mak.Set(&r.Reachable, id, Node{ + ID: id, + Name: n.Name(), + Addr: addr, + Routes: routes(n), + }) + } + } + return r, nil +} + +// ProbeAllHARouters actively probes all High Availability routers in parallel +// and returns a [Report] that identifies which of these routers are reachable. +// If limit is positive, it limits the number of concurrent active probes; +// a limit of zero will ping every candidate at once. +// A peer is considered unreachable if it doesn’t respond within the timeout. +func (c *Client) ProbeAllHARouters(ctx context.Context, limit int, timeout time.Duration) (*Report, error) { + nm, err := c.waitForNetMap(ctx) + if err != nil { + return nil, err + } + + // When a prefix is routed by multiple nodes, we probe those nodes. + // There is no point to probing a router when it is the only choice. + // These nodes are referred to a High Availability (HA) routers. + var nodes []tailcfg.NodeView + for _, rs := range c.RoutersByPrefix() { + if len(rs) <= 1 { + continue + } + nodes = append(nodes, rs...) // Note: this introduces duplicates. + } + + // Sort by Node.ID and deduplicate to avoid double-probing. + slices.SortFunc(nodes, func(a, b tailcfg.NodeView) int { + return cmp.Compare(a.ID(), b.ID()) + }) + nodes = slices.CompactFunc(nodes, func(a, b tailcfg.NodeView) bool { + return a.ID() == b.ID() + }) + + // Each node should probe starting with the highest scoring node. + // We use rendezvous hashing to break ties in a consistent manner + // while still preventing swarming. + ss := traffic.ScoresFor(nm.SelfNode.ID(), nodes) + ss.SortNodes(nodes) + + return c.Probe(ctx, slices.Values(nodes), limit, timeout) +} + +// Ping returns the result of a ping to the peer handling the given IP. +// It returns a [context.DeadlineExceeded] error if the peer doesn’t respond within the timeout. +func (c *Client) ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType, timeout time.Duration) (*ipnstate.PingResult, error) { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + ch := make(chan *ipnstate.PingResult, 1) + c.pinger.Ping(ip, pingType, 0, func(pr *ipnstate.PingResult) { + select { + case ch <- pr: + default: + } + }) + select { + case pr := <-ch: + return pr, nil + case <-ctx.Done(): + return nil, ctx.Err() + } +} + +func supportsIPVersions(n tailcfg.NodeView) (is4, is6 func(netip.Addr) bool) { + if !n.Valid() { + return nil, nil + } + for _, ip := range n.Addresses().All() { + addr := ip.Addr() + if addr.Is4() { + is4 = func(addr netip.Addr) bool { return addr.Is4() } + } else if addr.Is6() { + is6 = func(addr netip.Addr) bool { return addr.Is6() } + } + if is4 != nil && is6 != nil { + break + } + } + return is4, is6 +} + +func addrPicker(is4, is6 func(netip.Addr) bool) func(n tailcfg.NodeView) netip.Addr { + return func(n tailcfg.NodeView) netip.Addr { + var zero netip.Addr + for _, ip := range n.Addresses().All() { + // Find a compatible IP address. + addr := ip.Addr() + if is4 != nil && is4(addr) { + return addr + } + if is6 != nil && is6(addr) { + return addr + } + } + return zero + } +} diff --git a/ipn/routecheck/report.go b/ipn/routecheck/report.go new file mode 100644 index 000000000..f80df9179 --- /dev/null +++ b/ipn/routecheck/report.go @@ -0,0 +1,61 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package routecheck + +import ( + "context" + "net/netip" + "time" + + "tailscale.com/tailcfg" + "tailscale.com/util/clientmetric" +) + +var ( + metricReport = clientmetric.NewCounter("routecheck_report") +) + +// Report returns the latest reachability report. +// Returns nil if a report isn’t available, which happens during initialization. +func (c *Client) Report() *Report { + metricReport.Add(1) + nm := c.nm.NetMapNoPeers() + if nm == nil { + return nil // The report wasn’t available. + } + + // TODO(sfllaw): Return the latest snapshot produced by background probing. + r, err := c.ProbeAllHARouters(context.TODO(), 5, DefaultTimeout) + if err != nil { + c.logf("reachability report error: %v", err) + } + return r +} + +// Report contains the result of a single routecheck. +type Report struct { + // Done is the time when the report was finished. + Done time.Time + + // Reachable is the set of nodes that were reachable from the current host + // when this report was compiled. Missing nodes may or may not be reachable. + Reachable map[tailcfg.NodeID]Node +} + +// Node represents a node in the reachability report. +type Node struct { + ID tailcfg.NodeID + + // Name is the FQDN of the node. + // It is also the MagicDNS name for the node. + // It has a trailing dot. + // e.g. "host.tail-scale.ts.net." + Name string + + // Addr is the IP address that was probed. + Addr netip.Addr + + // Routes are the subnets that the node will route. + Routes []netip.Prefix +} diff --git a/ipn/routecheck/routecheck.go b/ipn/routecheck/routecheck.go new file mode 100644 index 000000000..8d9aba990 --- /dev/null +++ b/ipn/routecheck/routecheck.go @@ -0,0 +1,139 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +// Package routecheck performs status checks for routes from the current host. +package routecheck + +import ( + "context" + "errors" + "net/netip" + "sync" + + "tailscale.com/ipn/ipnstate" + "tailscale.com/tailcfg" + "tailscale.com/types/logger" + "tailscale.com/types/netmap" +) + +// Client generates Reports describing the result of both passive and active +// reachability probing. +type Client struct { + // Verbose enables verbose logging. + Verbose bool + + // Logf optionally specifies where to log to. + // If nil, log.Printf is used. + Logf logger.Logf + + // These elements are read-only after initialization. + nb NodeBackender + nm NetMapper + pinger Pinger + + // NetMapAvailable is raised when the first network map is received + // after connecting to the control plane. + netMapAvailable sync.Cond +} + +// NetMapper is the interface that returns the current [netmap.NetworkMap]. +type NetMapper interface { + // NetMapNoPeers returns the latest cached network map received from + // controlclient WITHOUT a freshly-built Peers slice. + // + // On a tailnet with frequent peer churn the cached netmap's Peers slice + // can be stale relative to the live per-node-backend peers map; non-Peers + // fields (SelfNode, DNS, PacketFilter, capabilities, ...) are always + // current. Use this for any caller that does not need to iterate Peers, + // since it's O(1) regardless of tailnet size. + // + // Returns nil if no network map has been received yet. + NetMapNoPeers() *netmap.NetworkMap + + // NetMapWithPeers returns the latest network map with the Peers slice + // populated. + // + // Currently this is the same as [LocalBackend.NetMapNoPeers]: the cached + // netmap's Peers slice may be stale relative to the live per-node-backend + // peers map. A follow-up change will switch this method to return a + // freshly-built netmap with up-to-date Peers, at O(N) cost per call. + // Callers that genuinely need the up-to-date peer set should use this + // method (and document why) so the upcoming change reaches them. + // + // Returns nil if no network map has been received yet. + NetMapWithPeers() *netmap.NetworkMap +} + +// NodeBackender is the interface that returns the current [NodeBackend]. +type NodeBackender interface { + NodeBackend() NodeBackend +} + +// NodeBackend is an interface to query the current node and its peers. +// +// It is not a snapshot in time but is locked to a particular node. +type NodeBackend interface { + // Self returns the current node. + Self() tailcfg.NodeView + + // Peers returns all the current peers. + Peers() []tailcfg.NodeView +} + +// Pinger is the interface that wraps the [tailscale.com/ipn/ipnlocal.LocalBackend.Ping] method. +type Pinger interface { + Ping(ip netip.Addr, pingType tailcfg.PingType, size int, cb func(*ipnstate.PingResult)) +} + +// NewClient returns a client that probes its peers using this LocalBackend. +func NewClient(logf logger.Logf, nb NodeBackender, nm NetMapper, pinger Pinger) (*Client, error) { + if nb == nil { + return nil, errors.New("NodeBackender must be set") + } + if nm == nil { + return nil, errors.New("NetMapper must be set") + } + if pinger == nil { + return nil, errors.New("Pinger must be set") + } + c := &Client{ + Logf: logf, + nb: nb, + nm: nm, + pinger: pinger, + } + c.netMapAvailable.L = new(sync.Mutex) + return c, nil +} + +func (c *Client) NetMapAvailable(nm *netmap.NetworkMap) { + if nm == nil { + return // client disconnected + } + c.netMapAvailable.Broadcast() +} + +func (c *Client) waitForNetMap(ctx context.Context) (*netmap.NetworkMap, error) { + cond := &c.netMapAvailable + + stopf := context.AfterFunc(ctx, func() { + // Lock cond to ensure that Broadcast is called after the Wait below. + cond.L.Lock() + defer cond.L.Unlock() + cond.Broadcast() + }) + defer stopf() + + cond.L.Lock() + defer cond.L.Unlock() + for { + nm := c.nm.NetMapNoPeers() + if nm != nil { + return nm, nil + } + cond.Wait() + if err := ctx.Err(); err != nil { + return nil, err + } + } +} diff --git a/ipn/routecheck/routecheck_test.go b/ipn/routecheck/routecheck_test.go new file mode 100644 index 000000000..562991a0f --- /dev/null +++ b/ipn/routecheck/routecheck_test.go @@ -0,0 +1,487 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package routecheck_test + +import ( + "fmt" + "maps" + "net/netip" + "slices" + "testing" + "testing/synctest" + "time" + + gcmp "github.com/google/go-cmp/cmp" + gcmpopts "github.com/google/go-cmp/cmp/cmpopts" + + "tailscale.com/ipn/ipnstate" + "tailscale.com/ipn/routecheck" + "tailscale.com/net/tsaddr" + "tailscale.com/tailcfg" + "tailscale.com/types/netmap" + "tailscale.com/util/mak" + "tailscale.com/util/set" +) + +func TestReport(t *testing.T) { + for _, tc := range []struct { + name string + init bool // true before the netmap has been loaded + peers []tailcfg.NodeView + gone []tailcfg.NodeID // cannot ping these nodes + want []tailcfg.NodeID // Report.Reachable nodes + }{ + { + name: "before-netmap", + init: true, + want: nil, + }, + { + name: "no-peers", + peers: []tailcfg.NodeView{}, + want: []tailcfg.NodeID{}, + }, + { + name: "no-routers", + peers: []tailcfg.NodeView{ + makeNode(1, withName("peer1")), + }, + want: []tailcfg.NodeID{}, + }, + { + name: "no-choice", + peers: []tailcfg.NodeView{ + makeNode(11, withName("exit11"), withExitRoutes()), + makeNode(21, withName("subnet21"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48"))), + }, + want: []tailcfg.NodeID{}, + }, + { + name: "all-good", + peers: []tailcfg.NodeView{ + makeNode(11, withName("exit11"), withExitRoutes()), + makeNode(12, withName("exit12"), withExitRoutes()), + makeNode(21, withName("subnet21"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48"))), + makeNode(22, withName("subnet22"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48"))), + }, + want: []tailcfg.NodeID{11, 12, 21, 22}, + }, + { + name: "none-good", + peers: []tailcfg.NodeView{ + makeNode(11, withName("exit11"), withExitRoutes()), + makeNode(12, withName("exit12"), withExitRoutes()), + makeNode(21, withName("subnet21"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48"))), + makeNode(22, withName("subnet22"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48"))), + }, + gone: []tailcfg.NodeID{11, 12, 21, 22}, + want: []tailcfg.NodeID{}, + }, + { + name: "some-good", + peers: []tailcfg.NodeView{ + makeNode(11, withName("exit11"), withExitRoutes()), + makeNode(12, withName("exit12"), withExitRoutes()), + makeNode(21, withName("subnet21"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48"))), + makeNode(22, withName("subnet22"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48"))), + }, + gone: []tailcfg.NodeID{11, 22}, + want: []tailcfg.NodeID{12, 21}, + }, + } { + makeDB := func(nodes []tailcfg.NodeView) map[tailcfg.NodeID]routecheck.Node { + if len(nodes) == 0 { + return nil + } + db := make(map[tailcfg.NodeID]routecheck.Node) + for _, n := range tc.peers { + db[n.ID()] = routecheck.Node{ + ID: n.ID(), + Name: n.Name(), + Addr: n.Addresses().At(0).Addr(), + Routes: n.AllowedIPs().AsSlice()[2:], + } + } + return db + } + cmpDiff := func(want, got any) string { + return gcmp.Diff(want, got, + gcmpopts.EquateComparable(netip.Addr{}, netip.Prefix{})) + } + + t.Run(tc.name, func(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + // The backend is initialized without a NetMap. + b := newStubBackend(tailcfg.NodeView{}, nil, withGone(tc.gone...)) + if !tc.init { + self := makeNode(99, withName("self")) + b = newStubBackend(self, tc.peers, withGone(tc.gone...)) + } + c, err := routecheck.NewClient(t.Logf, b, b, b) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + got := c.Report() + now := time.Now() // synctest will freeze time. + + var want *routecheck.Report + peers := makeDB(tc.peers) + if !tc.init { + want = &routecheck.Report{ + Done: now, + } + for _, nid := range tc.want { + mak.Set(&want.Reachable, nid, peers[nid]) + } + } + + if diff := cmpDiff(want, got); diff != "" { + t.Errorf("-want +got:\n%s", diff) + } + }) + }) + } +} + +func TestRoutersByPrefix(t *testing.T) { + type routersByPrefix map[netip.Prefix][]tailcfg.NodeID + simplify := func(rs routecheck.RoutersByPrefix) routersByPrefix { + out := make(routersByPrefix, len(rs)) + for p, ns := range rs { + for _, n := range ns { + out[p] = append(out[p], n.ID()) + } + slices.Sort(out[p]) + } + return out + } + + for _, tc := range []struct { + name string + peers []tailcfg.NodeView + want routersByPrefix + }{ + { + name: "no-peers", + peers: []tailcfg.NodeView{}, + want: routersByPrefix{}, + }, + { + name: "no-routers", + peers: []tailcfg.NodeView{ + makeNode(1, withName("peer1")), + }, + want: routersByPrefix{}, + }, + { + name: "one-exit-node", + peers: []tailcfg.NodeView{ + makeNode(1, withName("peer1")), + makeNode(11, withName("exit11"), withExitRoutes()), + }, + want: routersByPrefix{ + netip.MustParsePrefix("0.0.0.0/0"): {11}, + netip.MustParsePrefix("::/0"): {11}, + }, + }, + { + name: "overlapping-exit-nodes", + peers: []tailcfg.NodeView{ + makeNode(1, withName("peer1")), + makeNode(11, withName("exit11"), withExitRoutes()), + makeNode(12, withName("exit12"), withExitRoutes()), + }, + want: routersByPrefix{ + netip.MustParsePrefix("0.0.0.0/0"): {11, 12}, + netip.MustParsePrefix("::/0"): {11, 12}, + }, + }, + { + name: "one-subnet-router", + peers: []tailcfg.NodeView{ + makeNode(1, withName("peer1")), + makeNode(21, withName("subnet21"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48"))), + }, + want: routersByPrefix{ + netip.MustParsePrefix("192.168.1.0/24"): {21}, + netip.MustParsePrefix("2002:c000:0100::/48"): {21}, + }, + }, + { + name: "overlapping-subnet-routers", + peers: []tailcfg.NodeView{ + makeNode(1, withName("peer1")), + makeNode(21, withName("subnet21"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48"))), + makeNode(22, withName("subnet22"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48"))), + }, + want: routersByPrefix{ + netip.MustParsePrefix("192.168.1.0/24"): {21, 22}, + netip.MustParsePrefix("2002:c000:0100::/48"): {21, 22}, + }, + }, + { + name: "disjoint-subnet-routers", + peers: []tailcfg.NodeView{ + makeNode(1, withName("peer1")), + makeNode(21, withName("subnet21"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48"))), + makeNode(22, withName("subnet22"), + withRoutes(netip.MustParsePrefix("192.168.2.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0200::/48"))), + }, + want: routersByPrefix{ + netip.MustParsePrefix("192.168.1.0/24"): {21}, + netip.MustParsePrefix("2002:c000:0100::/48"): {21}, + netip.MustParsePrefix("192.168.2.0/24"): {22}, + netip.MustParsePrefix("2002:c000:0200::/48"): {22}, + }, + }, + { + name: "multiple-routes", + peers: []tailcfg.NodeView{ + makeNode(1, withName("peer1")), + makeNode(21, withName("subnet21"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48")), + withRoutes(netip.MustParsePrefix("192.168.2.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0200::/48"))), + makeNode(22, withName("subnet22"), + withRoutes(netip.MustParsePrefix("192.168.2.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0200::/48")), + withRoutes(netip.MustParsePrefix("192.168.3.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0300::/48"))), + makeNode(23, withName("subnet23"), + withRoutes(netip.MustParsePrefix("192.168.3.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0300::/48")), + withRoutes(netip.MustParsePrefix("192.168.4.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0400::/48"))), + }, + want: routersByPrefix{ + netip.MustParsePrefix("192.168.1.0/24"): {21}, + netip.MustParsePrefix("2002:c000:0100::/48"): {21}, + netip.MustParsePrefix("192.168.2.0/24"): {21, 22}, + netip.MustParsePrefix("2002:c000:0200::/48"): {21, 22}, + netip.MustParsePrefix("192.168.3.0/24"): {22, 23}, + netip.MustParsePrefix("2002:c000:0300::/48"): {22, 23}, + netip.MustParsePrefix("192.168.4.0/24"): {23}, + netip.MustParsePrefix("2002:c000:0400::/48"): {23}, + }, + }, + { + name: "both-exit-nodes-and-routers", + peers: []tailcfg.NodeView{ + makeNode(1, withName("peer1")), + makeNode(11, withName("exit11"), withExitRoutes()), + makeNode(12, withName("exit12"), withExitRoutes()), + makeNode(21, withName("subnet21"), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48")), + withRoutes(netip.MustParsePrefix("192.168.2.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0200::/48"))), + makeNode(22, withName("subnet22"), + withRoutes(netip.MustParsePrefix("192.168.2.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0200::/48")), + withRoutes(netip.MustParsePrefix("192.168.3.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0300::/48"))), + }, + want: routersByPrefix{ + netip.MustParsePrefix("0.0.0.0/0"): {11, 12}, + netip.MustParsePrefix("::/0"): {11, 12}, + netip.MustParsePrefix("192.168.1.0/24"): {21}, + netip.MustParsePrefix("2002:c000:0100::/48"): {21}, + netip.MustParsePrefix("192.168.2.0/24"): {21, 22}, + netip.MustParsePrefix("2002:c000:0200::/48"): {21, 22}, + netip.MustParsePrefix("192.168.3.0/24"): {22}, + netip.MustParsePrefix("2002:c000:0300::/48"): {22}, + }, + }, + { + name: "mixed-nodes", + peers: []tailcfg.NodeView{ + makeNode(1, withName("peer1")), + makeNode(31, withName("router31"), + withExitRoutes(), + withRoutes(netip.MustParsePrefix("192.168.1.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0100::/48")), + withRoutes(netip.MustParsePrefix("192.168.2.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0200::/48"))), + makeNode(32, withName("router32"), + withExitRoutes(), + withRoutes(netip.MustParsePrefix("192.168.2.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0200::/48")), + withRoutes(netip.MustParsePrefix("192.168.3.0/24")), + withRoutes(netip.MustParsePrefix("2002:c000:0300::/48"))), + }, + want: routersByPrefix{ + netip.MustParsePrefix("0.0.0.0/0"): {31, 32}, + netip.MustParsePrefix("::/0"): {31, 32}, + netip.MustParsePrefix("192.168.1.0/24"): {31}, + netip.MustParsePrefix("2002:c000:0100::/48"): {31}, + netip.MustParsePrefix("192.168.2.0/24"): {31, 32}, + netip.MustParsePrefix("2002:c000:0200::/48"): {31, 32}, + netip.MustParsePrefix("192.168.3.0/24"): {32}, + netip.MustParsePrefix("2002:c000:0300::/48"): {32}, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + self := makeNode(99, withName("self")) + b := newStubBackend(self, tc.peers) + c, err := routecheck.NewClient(t.Logf, b, b, b) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + got := simplify(c.RoutersByPrefix()) + if !maps.EqualFunc(got, tc.want, slices.Equal) { + t.Errorf("got %+v, want %+v", got, tc.want) + } + }) + } + +} + +type nodeOptFunc func(*tailcfg.Node) + +func makeNode(id tailcfg.NodeID, opts ...nodeOptFunc) tailcfg.NodeView { + addresses := []netip.Prefix{ + netip.MustParsePrefix(fmt.Sprintf("192.168.0.%d/32", id)), + netip.MustParsePrefix(fmt.Sprintf("fd7a:115c:a1e0::%d/128", id)), + } + node := &tailcfg.Node{ + ID: id, + StableID: tailcfg.StableNodeID(fmt.Sprintf("stable%d", id)), + Name: fmt.Sprintf("node%d", id), + Online: new(true), + MachineAuthorized: true, + HomeDERP: int(id), + Addresses: addresses, + AllowedIPs: addresses, + } + for _, opt := range opts { + opt(node) + } + return node.View() +} + +func withExitRoutes() nodeOptFunc { + return withRoutes(tsaddr.ExitRoutes()...) +} + +func withName(name string) nodeOptFunc { + return func(n *tailcfg.Node) { + n.Name = name + } +} + +func withRoutes(routes ...netip.Prefix) nodeOptFunc { + return func(n *tailcfg.Node) { + n.AllowedIPs = append(n.AllowedIPs, routes...) + } +} + +var _ routecheck.NodeBackender = &stubBackend{} +var _ routecheck.NodeBackend = &stubBackend{} +var _ routecheck.NetMapper = &stubBackend{} +var _ routecheck.Pinger = &stubBackend{} + +type stubBackend struct { + self tailcfg.NodeView + peers []tailcfg.NodeView + gone set.Set[tailcfg.NodeID] +} + +type backendOptFunc func(*stubBackend) + +func newStubBackend(self tailcfg.NodeView, peers []tailcfg.NodeView, opts ...backendOptFunc) *stubBackend { + b := &stubBackend{ + self: self, + peers: slices.Clone(peers), + } + for _, opt := range opts { + opt(b) + } + return b +} + +func (b *stubBackend) NetMapNoPeers() *netmap.NetworkMap { + if !b.self.Valid() { + return nil + } + return &netmap.NetworkMap{ + SelfNode: b.self, + Peers: nil, // No peers. + } +} + +func (b *stubBackend) NetMapWithPeers() *netmap.NetworkMap { + nm := b.NetMapNoPeers() + if nm != nil { + nm.Peers = b.peers + } + return nm +} + +func (nb *stubBackend) NodeBackend() routecheck.NodeBackend { + return nb +} + +func (nb *stubBackend) Self() tailcfg.NodeView { + return nb.self +} + +func (nb *stubBackend) Peers() []tailcfg.NodeView { + return nb.peers +} + +func (b *stubBackend) Ping(ip netip.Addr, pingType tailcfg.PingType, size int, cb func(*ipnstate.PingResult)) { + // Does the IP address match one of the peers’ addresses? + for _, n := range b.peers { + for _, a := range n.Addresses().All() { + if a.Addr() != ip { + continue + } + + if b.gone.Contains(n.ID()) { + continue + } + + go cb(&ipnstate.PingResult{ + IP: ip.String(), + NodeIP: ip.String(), + NodeName: n.Name(), + LatencySeconds: 0.01, + }) + } + } +} + +func withGone(gone ...tailcfg.NodeID) backendOptFunc { + return func(b *stubBackend) { + b.gone = set.SetOf(gone) + } + +} diff --git a/ipn/routecheck/routes.go b/ipn/routecheck/routes.go new file mode 100644 index 000000000..b14d67db8 --- /dev/null +++ b/ipn/routecheck/routes.go @@ -0,0 +1,51 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package routecheck + +import ( + "net/netip" + + "tailscale.com/tailcfg" + "tailscale.com/util/mak" +) + +// RoutersByPrefix represents a map of nodes grouped by the subnet that they route. +type RoutersByPrefix map[netip.Prefix][]tailcfg.NodeView + +// RoutersByPrefix returns a map of nodes grouped by the subnet that they route. +// Nodes that route for /0 prefixes are exit nodes, their subnet is the Internet. +// The result omits any prefix that is one of a node’s local addresses. +// +// Note: Fallback routes are not supported by design. If a subnet prefix +// contained within another more general prefix has no reachable routers, +// traffic is still sent to one of those unreachable routers. +// Routers for the general prefix aren’t candidates. See tailscale/tailscale#18550. +func (c *Client) RoutersByPrefix() RoutersByPrefix { + var routers RoutersByPrefix + for _, n := range c.nb.NodeBackend().Peers() { + for _, pfx := range routes(n) { + mak.Set(&routers, pfx, append(routers[pfx], n)) + } + } + return routers +} + +// Routes returns a slice of subnets that the given node will route. +// If the node is an exit node, the result will contain at least one /0 prefix. +// If the node is a subnet router, the result will contain a smaller prefix. +// The result omits any prefix that is one of the node’s local addresses. +func routes(n tailcfg.NodeView) []netip.Prefix { + var routes []netip.Prefix +AllowedIPs: + for _, pfx := range n.AllowedIPs().All() { + // Routers never forward their own local addresses. + for _, addr := range n.Addresses().All() { + if pfx == addr { + continue AllowedIPs + } + } + routes = append(routes, pfx) + } + return routes +}