diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 5d3bbd36e..945b0dbd4 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -1570,6 +1570,11 @@ func (b *LocalBackend) PeerCaps(src netip.Addr) tailcfg.PeerCapMap { return b.currentNode().PeerCaps(src) } +// Peers returns all the current peers in an undefined order. +func (b *LocalBackend) Peers() []tailcfg.NodeView { + return b.currentNode().Peers() +} + func (b *LocalBackend) GetFilterForTest() *filter.Filter { testenv.AssertInTest() nb := b.currentNode() diff --git a/ipn/routecheck/log.go b/ipn/routecheck/log.go new file mode 100644 index 000000000..3fcfa60aa --- /dev/null +++ b/ipn/routecheck/log.go @@ -0,0 +1,22 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package routecheck + +import ( + "log" +) + +func (c *Client) logf(format string, a ...any) { + if c.Logf != nil { + c.Logf(format, a...) + } else { + log.Printf(format, a...) + } +} + +func (c *Client) vlogf(format string, a ...any) { + if c.Verbose || debugRoutecheck() { + c.logf(format, a...) + } +} diff --git a/ipn/routecheck/routecheck.go b/ipn/routecheck/routecheck.go new file mode 100644 index 000000000..bb5808977 --- /dev/null +++ b/ipn/routecheck/routecheck.go @@ -0,0 +1,252 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +// Package routecheck performs status checks for routes from the current host. +package routecheck + +import ( + "cmp" + "context" + "errors" + "iter" + "math/rand/v2" + "net/netip" + "slices" + "time" + + "golang.org/x/sync/errgroup" + "tailscale.com/envknob" + "tailscale.com/ipn" + "tailscale.com/ipn/ipnstate" + "tailscale.com/syncs" + "tailscale.com/tailcfg" + "tailscale.com/types/logger" + "tailscale.com/types/netmap" + "tailscale.com/util/mak" + "tailscale.com/util/set" +) + +// Debugging and experimentation tweakables. +var ( + debugRoutecheck = envknob.RegisterBool("TS_DEBUG_ROUTECHECK") +) + +// Report contains the result of a single routecheck. +type Report struct { + // Now is the time when the report was finished. + Now time.Time + + // Reachable is the set of nodes that were reachable from the current host + // when this report was compiled. Missing nodes may or may not be reachable. + reachable set.Set[tailcfg.NodeID] +} + +// Client generates Reports describing the result of both passive and active +// reachability probing. +type Client struct { + // Verbose enables verbose logging. + Verbose bool + + // Logf optionally specifies where to log to. + // If nil, log.Printf is used. + Logf logger.Logf + + // These elements are read-only after initialization. + b LocalBackend +} + +type LocalBackend interface { + NetMap() *netmap.NetworkMap + Peers() []tailcfg.NodeView + Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType, size int) (*ipnstate.PingResult, error) + WatchNotifications(ctx context.Context, mask ipn.NotifyWatchOpt, onWatchAdded func(), fn func(roNotify *ipn.Notify) (keepGoing bool)) +} + +// NewClient returns a client that probes using this [ipnlocal.LocalBackend]. +func NewClient(b LocalBackend) (*Client, error) { + if b == nil { + return nil, errors.New("LocalBackend must be set") + } + return &Client{b: b}, nil +} + +// Probe actively probes the sequence of nodes and returns a reachability [Report]. +// If limit is positive, it limits the number of concurrent active probes; +// a limit of zero will ping every node at once. +// This function tries both the IPv4 and IPv6 addresses +func (c *Client) Probe(ctx context.Context, nodes iter.Seq[tailcfg.NodeView], limit int) (*Report, error) { + var canIPv4, canIPv6 bool + for _, ip := range c.b.NetMap().SelfNode.Addresses().All() { + addr := ip.Addr() + if addr.Is4() { + canIPv4 = true + } else if addr.Is6() { + canIPv6 = true + } + } + + g, ctx := errgroup.WithContext(ctx) + if limit > 0 { + g.SetLimit(limit) + } + + var ( + mu syncs.Mutex + r = &Report{ + reachable: make(set.Set[tailcfg.NodeID]), + } + ) + + for n := range nodes { + // Ping one of the tailnet addresses. + for _, ip := range n.Addresses().All() { + // Skip this probe if there is an IP version mismatch. + addr := ip.Addr() + if addr.Is4() && !canIPv4 { + continue + } + if addr.Is6() && !canIPv6 { + continue + } + + g.Go(func() error { + nid := n.ID() + pong, err := c.b.Ping(ctx, addr, tailcfg.PingTSMP, 0) + if err != nil { + // Returning an error would cancel the errgroup. + c.vlogf("ping %s (%s): error: %v", addr, nid, err) + } else { + c.vlogf("ping %s (%s): result: %f ms (err: %v)", addr, nid, pong.LatencySeconds*1000, pong.Err) + } + + mu.Lock() + defer mu.Unlock() + r.reachable.Add(nid) + return nil + }) + break + } + } + g.Wait() + r.Now = time.Now() + return r, nil +} + +// ProbeAllPeers actively probes all peers in parallel and returns a [Report] +// that identifies which nodes are reachable. If limit is positive, it limits +// the number of concurrent active probes; a limit of zero will ping every +// candidate at once. +func (c *Client) ProbeAllPeers(ctx context.Context, limit int) (*Report, error) { + nm := c.waitForInitialNetMap(ctx) + return c.Probe(ctx, slices.Values(nm.Peers), limit) +} + +// ProbeAllHARouters actively probes all High Availability routers in parallel +// and returns a [Report] that identifies which of these routers are reachable. +// If limit is positive, it limits the number of concurrent active probes; +// a limit of zero will ping every candidate at once. +func (c *Client) ProbeAllHARouters(ctx context.Context, limit int) (*Report, error) { + nm := c.waitForInitialNetMap(ctx) + + // When a prefix is routed by multiple nodes, we probe those nodes. + // There is no point to probing a router when it is the only choice. + // These nodes are referred to a High Availability (HA) routers. + var nodes []tailcfg.NodeView + for _, rs := range c.RoutersByPrefix() { + if len(rs) <= 1 { + continue + } + nodes = append(nodes, rs...) // Note: this introduces duplicates. + } + + // Sort by Node.ID and deduplicate to avoid double-probing. + slices.SortFunc(nodes, func(a, b tailcfg.NodeView) int { + return cmp.Compare(a.ID(), b.ID()) + }) + slices.CompactFunc(nodes, func(a, b tailcfg.NodeView) bool { + return a.ID() == b.ID() + }) + + // To prevent swarming, each node should probe in a different order. + seed := uint64(nm.SelfNode.ID()) + rnd := rand.New(rand.NewPCG(seed, seed)) + rnd.Shuffle(len(nodes), func(i, j int) { + nodes[i], nodes[j] = nodes[j], nodes[i] + }) + + return c.Probe(ctx, slices.Values(nodes), limit) +} + +// WaitForInitialNetMap returns the current [netmap.NetworkMap], if present. +// If the network map is missing because the client just started, +// this function will wait for the control plane to send it before returning. +func (c *Client) waitForInitialNetMap(ctx context.Context) *netmap.NetworkMap { + nm := c.b.NetMap() + if nm != nil { + return nm + } + + // Wait for the initial NetworkMap to arrive: + c.b.WatchNotifications(ctx, ipn.NotifyInitialNetMap, nil, func(n *ipn.Notify) (keepGoing bool) { + nm = n.NetMap + return nm == nil // Keep going until nm contains a network map. + }) + return nm +} + +// Routers returns a sequence of nodes that are routers, which will advertise +// more [tailcfg.Node.AllowedIPs] than the node’s own [tailcfg.Node.Addresses]. +func (c *Client) Routers() iter.Seq[tailcfg.NodeView] { + return func(yield func(tailcfg.NodeView) bool) { + for _, n := range c.b.Peers() { + AllowedIPs: + for _, pfx := range n.AllowedIPs().All() { + // Routers never forward their own local addresses. + for _, addr := range n.Addresses().All() { + if pfx == addr { + continue AllowedIPs + } + } + if !yield(n) { + return + } + } + } + } +} + +// RoutersByPrefix returns a map of nodes that route for a particular subnet. +// Nodes that route for /0 prefixes are exit nodes, their subnet is the Internet. +func (c *Client) RoutersByPrefix() map[netip.Prefix][]tailcfg.NodeView { + var routers map[netip.Prefix][]tailcfg.NodeView + for _, n := range c.b.Peers() { + for _, pfx := range n.AllowedIPs().All() { + mak.Set(&routers, pfx, append(routers[pfx], n)) + } + continue + } + return routers +} + +// Routes returns a slice of subnets that the given node will route. +// If the node is an exit node, the result will contain at least one /0 prefix. +// If the node is a subnet router, the result will contain a smaller prefix. +// The result omits any prefix that is one of the node’s local addresses. +func routes(n tailcfg.NodeView) []netip.Prefix { + var routes []netip.Prefix +AllowedIPs: + for _, pfx := range n.AllowedIPs().All() { + // Routers never forward their own local addresses. + for _, addr := range n.Addresses().All() { + if pfx == addr { + continue AllowedIPs + } + } + routes = append(routes, pfx) + } + return routes +} + +// EarlyExit is used to exit early out of a [ipnext.NodeBackend.AppendMatchingPeers] loop. +// It is a sentinel type used by panic and recover. +type earlyExit struct{}