mirror of
https://github.com/tailscale/tailscale.git
synced 2026-04-03 06:02:30 -04:00
ipn/routecheck: track reachability changes incrementally
The routecheck client will watch the IPN bus for WireGuard engine updates, ipn.NotifyWatchEngineUpdates, which supplies it with status updates on a regular timer and also on immediate changes. The GUI clients already use this mechanism over the Local Client API, so we aren’t introducing any significant overhead. Routecheck will track established WireGuard tunnels using these engine updates. If a tunnel is established and this node is receiving traffic over that link, then we can conclude that the destination peer is not just reachable, but actively in use. Due to Cryptokey Routing, we should prefer active tunnels to avoid breaking existing connections. Updates #17366 Updates tailscale/corp#33033 Signed-off-by: Simon Law <sfllaw@tailscale.com>
This commit is contained in:
119
ipn/routecheck/incremental.go
Normal file
119
ipn/routecheck/incremental.go
Normal file
@@ -0,0 +1,119 @@
|
||||
// Copyright (c) Tailscale Inc & contributors
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package routecheck
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/netip"
|
||||
"time"
|
||||
|
||||
"tailscale.com/ipn/ipnstate"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/key"
|
||||
"tailscale.com/types/netmap"
|
||||
"tailscale.com/util/set"
|
||||
"tailscale.com/wgengine"
|
||||
)
|
||||
|
||||
// Init loads the initial [netmap.NetworkMap] assuming that a peer is reachable
|
||||
// if it’s connected to the control plane, i.e. [tailcfg.Hostinfo.Online] is set.
|
||||
// That’s not necessarily true, but we must make early routing decisions
|
||||
// before active probing is complete.
|
||||
func (c *Client) init(nm *netmap.NetworkMap) {
|
||||
var r = &Report{
|
||||
reachable: make(set.Set[tailcfg.NodeID]),
|
||||
}
|
||||
|
||||
nids := make(map[key.NodePublic]tailcfg.NodeID)
|
||||
for _, n := range nm.Peers {
|
||||
if !n.Valid() {
|
||||
continue
|
||||
}
|
||||
if len(routes(n)) == 0 {
|
||||
// Connectors, i.e. exit nodes or subnet routers,
|
||||
// are the only nodes that are chosen by reachability.
|
||||
// Peer with no routes don’t need to be checked.
|
||||
continue
|
||||
}
|
||||
if n.Online().Get() {
|
||||
r.reachable.Add(n.ID())
|
||||
nids[n.Key()] = n.ID()
|
||||
}
|
||||
}
|
||||
r.Now = time.Now()
|
||||
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
c.report = r
|
||||
c.nids = nids
|
||||
}
|
||||
|
||||
// Watch compares the previous set of traffic flows to the current ones.
|
||||
// If we are receiving data from a peer, then we know that it is reachable.
|
||||
// Otherwise, we will need to actively probe that peer to be sure.
|
||||
func (c *Client) watch(flows map[key.NodePublic]ipnstate.PeerStatusLite) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
// TODO: consult the netmap to remove nodes that are gone and add new nodes.
|
||||
|
||||
prev := c.flows
|
||||
for k, s := range c.flows {
|
||||
if prev[k].RxBytes != s.RxBytes { // wraparound is possible
|
||||
nid := c.nids[k]
|
||||
c.report.reachable.Add(nid)
|
||||
}
|
||||
}
|
||||
c.report.Now = time.Now()
|
||||
c.flows = flows
|
||||
|
||||
// TODO: What do I do with good after this? Is this where we set the tripwire?
|
||||
}
|
||||
|
||||
// Report generates and returns a reachability report by either
|
||||
// passively checking for activity in each node’s [ipnstate.PeerStatusLite] or
|
||||
// by actively probing.
|
||||
func (c *Client) Report(ctx context.Context) (*Report, error) {
|
||||
status := c.b.Status().Peer
|
||||
r := Report{reachable: make(set.Set[tailcfg.NodeID])}
|
||||
for pfx, peers := range c.RoutersByPrefix() {
|
||||
for _, n := range peers {
|
||||
nid := n.ID()
|
||||
if _, ok := r.reachable[nid]; ok {
|
||||
continue // Already probed
|
||||
}
|
||||
|
||||
if st := status[n.Key()]; st != nil {
|
||||
rx, tx := st.RxBytes, st.TxBytes
|
||||
last := st.LastHandshake
|
||||
// Check if the previous status is any good
|
||||
}
|
||||
}
|
||||
}
|
||||
r.Now = time.Now()
|
||||
return &r, nil
|
||||
}
|
||||
|
||||
// GetReport gets a report by probing all .
|
||||
func (c *Client) UpdateReport(ctx context.Context, r *Report, routes []netip.Prefix) (*Report, error) {
|
||||
return &Report{
|
||||
Now: time.Now(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// TODO: The GUIs use something like NotifyWatchEngineUpdates on the ipnbus. We should do something similar, since that will update things every 2 seconds via c.b.pollRequestEngineStatus.
|
||||
// We should also check ipn.NotifyInitialNetMap to just set Online for everything.
|
||||
// StatusCallback
|
||||
func (c *Client) setWgengineStatus(s *wgengine.Status, err error) {
|
||||
if err != nil {
|
||||
c.logf("wgengine status error: %v", err)
|
||||
return
|
||||
}
|
||||
if s == nil {
|
||||
c.logf("[unexpected] non-error wgengine update with status=nil: %v", s)
|
||||
return
|
||||
}
|
||||
p := s.Peers
|
||||
|
||||
}
|
||||
@@ -20,6 +20,7 @@
|
||||
"tailscale.com/ipn/ipnstate"
|
||||
"tailscale.com/syncs"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/key"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/types/netmap"
|
||||
"tailscale.com/util/mak"
|
||||
@@ -52,13 +53,21 @@ type Client struct {
|
||||
Logf logger.Logf
|
||||
|
||||
// These elements are read-only after initialization.
|
||||
b LocalBackend
|
||||
b LocalBackend
|
||||
cancel context.CancelFunc
|
||||
|
||||
// The mutex protects the following elements.
|
||||
mu syncs.Mutex
|
||||
report *Report
|
||||
nids map[key.NodePublic]tailcfg.NodeID
|
||||
flows map[key.NodePublic]ipnstate.PeerStatusLite
|
||||
}
|
||||
|
||||
type LocalBackend interface {
|
||||
NetMap() *netmap.NetworkMap
|
||||
Peers() []tailcfg.NodeView
|
||||
Ping(ctx context.Context, ip netip.Addr, pingType tailcfg.PingType, size int) (*ipnstate.PingResult, error)
|
||||
Status() *ipnstate.Status
|
||||
WatchNotifications(ctx context.Context, mask ipn.NotifyWatchOpt, onWatchAdded func(), fn func(roNotify *ipn.Notify) (keepGoing bool))
|
||||
WhoIs(proto string, ipp netip.AddrPort) (n tailcfg.NodeView, u tailcfg.UserProfile, ok bool)
|
||||
}
|
||||
@@ -282,6 +291,28 @@ func routes(n tailcfg.NodeView) []netip.Prefix {
|
||||
return routes
|
||||
}
|
||||
|
||||
// Start registers the client the [ipnlocal.LocalBackend]’s IPN bus
|
||||
// to bootstrap with the initial network map and to watch for traffic flows.
|
||||
func (c *Client) Start(ctx context.Context) {
|
||||
ctx, c.cancel = context.WithCancel(ctx)
|
||||
opts := ipn.NotifyInitialNetMap | ipn.NotifyWatchEngineUpdates | ipn.NotifyRateLimit
|
||||
c.b.WatchNotifications(ctx, opts, nil, func(n *ipn.Notify) bool {
|
||||
if n.NetMap != nil {
|
||||
c.init(n.NetMap)
|
||||
}
|
||||
if n.Engine != nil {
|
||||
c.watch(n.Engine.LivePeers)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
// Close implements the [io.Closer] interface.
|
||||
func (c *Client) Close() error {
|
||||
c.cancel()
|
||||
return nil
|
||||
}
|
||||
|
||||
// EarlyExit is used to exit early out of a [ipnext.NodeBackend.AppendMatchingPeers] loop.
|
||||
// It is a sentinel type used by panic and recover.
|
||||
type earlyExit struct{}
|
||||
|
||||
Reference in New Issue
Block a user