tstest/natlab/vmtest: add test for direct conn with cached netmap (#19660)

When a peer is not able to connect to control after a restart and is
using a cached netmap, that nodes should be able to connect to another
peer in its tailnet (given that the home DERP of that peer has not
changed in the meantime).

Add test that starts two peers and connects them to a tailnet with
caching enabled. Then blackhole traffic to control from one peer and
restart it. Verify that the connection between the two ends up direct.

Adds facilities for expecting a certain path type between nodes.

Updates: #19597

Signed-off-by: Claus Lensbøl <claus@tailscale.com>
This commit is contained in:
Claus Lensbøl
2026-05-08 16:57:27 -04:00
committed by GitHub
parent ee2378b141
commit 469d356ed8
4 changed files with 162 additions and 5 deletions

View File

@@ -436,6 +436,14 @@ func (n *Node) LanIP(net *vnet.Network) netip.Addr {
return n.vnetNode.LanIP(net)
}
// DropControlTraffic sets up a blackhole for control traffic for just this
// node on all the networks belonging to the node.
func (n *Node) DropControlTraffic() {
for _, network := range n.nets {
network.BlackholeControlForAddr(n.LanIP(network))
}
}
// NodeOption types for configuring nodes.
type nodeOptOS OSImage
@@ -1669,3 +1677,68 @@ func findKernelPath(goMod string) (string, error) {
}
return "", fmt.Errorf("gokrazy-kernel not found in %s", goMod)
}
// PingRoute describes what connection type was used to transfer a Disco ping.
type PingRoute string
const (
PingRouteDirect PingRoute = "direct"
PingRouteDERP PingRoute = "derp"
PingRouteLocal PingRoute = "local"
PingRouteNil PingRoute = "nil"
)
// classifyPing finds what kind of route has been used on a ping path.
// It is only really relevant for DiscoPings.
func classifyPing(pr *ipnstate.PingResult) PingRoute {
if pr == nil {
return PingRouteNil
}
if pr.Endpoint == "" {
return PingRouteDERP
}
ap, err := netip.ParseAddrPort(pr.Endpoint)
if err == nil && ap.Addr().IsPrivate() {
return PingRouteLocal
}
return PingRouteDirect
}
// PingExpect retries disco pings until the result matches wantRoute or the
// timeout is reached. It is using DiscoPings as this is the only ping type
// that can classify the connection type.
func (e *Env) PingExpect(from, to *Node, wantRoute PingRoute, timeout time.Duration) error {
e.t.Helper()
ctx, cancel := context.WithTimeout(e.t.Context(), timeout)
defer cancel()
var lastRoute PingRoute
toSt, err := to.agent.Status(ctx)
if err != nil {
return fmt.Errorf("ping: can't get %s status: %w", to.name, err)
}
if len(toSt.Self.TailscaleIPs) == 0 {
return fmt.Errorf("ping: %s has no Tailscale IPs", to.name)
}
targetIP := toSt.Self.TailscaleIPs[0]
for ctx.Err() == nil {
pingCtx, pingCancel := context.WithTimeout(ctx, 3*time.Second)
pr, err := from.agent.PingWithOpts(pingCtx, targetIP, tailcfg.PingDisco, local.PingOpts{})
pingCancel()
if err == nil && pr.Err == "" {
if got := classifyPing(pr); got == wantRoute {
e.t.Logf("Saw ping type %q", got)
return nil
} else {
e.t.Logf("Saw ping type %q", got)
lastRoute = got
}
}
select {
case <-time.After(500 * time.Millisecond):
case <-ctx.Done():
}
}
return fmt.Errorf("ping route = %q, want %q (after %v)", lastRoute, wantRoute, timeout)
}

View File

@@ -919,9 +919,6 @@ func TestCachedNetmapAfterRestart(t *testing.T) {
aNet := env.AddNetwork("1.0.0.1", "192.168.1.1/24", vnet.EasyNAT)
bNet := env.AddNetwork("2.0.0.1", "192.168.2.1/24", vnet.EasyNAT)
aNet.SetPostConnectControlBlackhole(true)
bNet.SetPostConnectControlBlackhole(true)
a := env.AddNode("a", aNet,
vmtest.OS(vmtest.Gokrazy),
tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil})
@@ -945,8 +942,9 @@ func TestCachedNetmapAfterRestart(t *testing.T) {
connectStep.End(nil)
cutControlStep.Begin()
aNet.PostConnectedToControl()
bNet.PostConnectedToControl()
// Both nodes lose connection to control
a.DropControlTraffic()
b.DropControlTraffic()
env.ControlServer().SetOnMapRequest(func(nk key.NodePublic) {
panic(fmt.Sprintf("got connection from %v", nk))
})
@@ -978,3 +976,56 @@ func TestCachedNetmapAfterRestart(t *testing.T) {
}
pingStep.End(nil)
}
// TestDirectConnectionWithCachedNetmap verifies that two nodes with netmap
// caching enabled (NodeAttrCacheNetworkMaps) can re-establish a direct
// WireGuard tunnel after one is restarted while the control server is
// unreachable. After restart the node must use only its on-disk cached
// netmaps to re-connect and ping the other (still online) node.
func TestDirectConnectionWithCachedNetmapOnOneNode(t *testing.T) {
env := vmtest.New(t)
aNet := env.AddNetwork("1.0.0.1", "192.168.1.1/24", vnet.EasyNAT)
bNet := env.AddNetwork("2.0.0.1", "192.168.2.1/24", vnet.EasyNAT)
a := env.AddNode("a", aNet,
vmtest.OS(vmtest.Gokrazy),
tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil})
b := env.AddNode("b", bNet,
vmtest.OS(vmtest.Gokrazy),
tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil})
cutControlStep := env.AddStep("Cut control server access")
restartStep := env.AddStep("Restart tailscaled on a")
tsmpPingStep := env.AddStep("Ping a → b TSMP (cached netmap, no control)")
DiscoPingStep := env.AddStep("Ping a → b Disco (want Direct)")
env.Start()
cutControlStep.Begin()
a.DropControlTraffic()
env.ControlServer().SetOnMapRequest(func(nk key.NodePublic) {
if env.ControlServer().Node(nk).Name == a.Name() {
panic(fmt.Sprintf("got connection from %v", a.Name()))
}
})
cutControlStep.End(nil)
restartStep.Begin()
env.RestartTailscaled(a)
restartStep.End(nil)
tsmpPingStep.Begin()
if err := env.Ping(a, b, tailcfg.PingTSMP, 30*time.Second); err != nil {
tsmpPingStep.End(err)
t.Fatal(err)
}
tsmpPingStep.End(nil)
DiscoPingStep.Begin()
if err := env.PingExpect(a, b, vmtest.PingRouteDirect, 30*time.Second); err != nil {
DiscoPingStep.End(err)
t.Fatal(err)
}
DiscoPingStep.End(nil)
}

View File

@@ -445,6 +445,12 @@ func (n *Network) PostConnectedToControl() {
n.network.SetControlBlackholed(n.postConnectBlackholeControl)
}
// BlackholeControlForAddr sets weither the network should drop all control
// traffic for the specified addr starting immediately.
func (n *Network) BlackholeControlForAddr(addr netip.Addr) {
n.network.BlackholeControlForAddr(addr)
}
// NetworkService is a service that can be added to a network.
type NetworkService string

View File

@@ -606,6 +606,9 @@ type network struct {
// writers is a map of MAC -> networkWriters to write packets to that MAC.
// It contains entries for connected nodes only.
writers syncs.Map[MAC, networkWriter] // MAC -> to networkWriter for that MAC
blackholeMu sync.Mutex
blackholeMap map[netip.Addr]netip.Addr // blackholeMap contains address pairs for dropping traffic (in either direction)
}
// registerWriter registers a client address with a MAC address.
@@ -653,6 +656,19 @@ func (n *network) SetControlBlackholed(v bool) {
n.blackholeControl = v
}
// BlackholeControlForAddr sets up a map entry, ensuring that traffic to or from
// control from the addr is dropped.
func (n *network) BlackholeControlForAddr(addr netip.Addr) {
n.blackholeMu.Lock()
defer n.blackholeMu.Unlock()
if addr.Is6() {
mak.Set(&n.blackholeMap, addr, fakeControl.v6)
} else {
mak.Set(&n.blackholeMap, addr, fakeControl.v4)
}
}
// nodeNIC represents a single network interface on a node.
// For multi-homed nodes, additional NICs beyond the primary are stored in node.extraNICs.
type nodeNIC struct {
@@ -1621,6 +1637,17 @@ func (n *network) HandleEthernetPacketForRouter(ep EthernetPacket) {
// Blackhole the packet.
return
}
// Drop traffic to/from address pairs in the blackholeMap.
n.blackholeMu.Lock()
defer n.blackholeMu.Unlock()
if src, ok := n.blackholeMap[flow.dst]; ok && flow.src == src {
return
}
if dst, ok := n.blackholeMap[flow.src]; ok && flow.dst == dst {
return
}
var base *layers.BaseLayer
proto := header.IPv4ProtocolNumber
if v4, ok := packet.Layer(layers.LayerTypeIPv4).(*layers.IPv4); ok {