From 469d356ed899c0a097e6bcb7907fea819dcf3cba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Claus=20Lensb=C3=B8l?= Date: Fri, 8 May 2026 16:57:27 -0400 Subject: [PATCH] tstest/natlab/vmtest: add test for direct conn with cached netmap (#19660) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a peer is not able to connect to control after a restart and is using a cached netmap, that nodes should be able to connect to another peer in its tailnet (given that the home DERP of that peer has not changed in the meantime). Add test that starts two peers and connects them to a tailnet with caching enabled. Then blackhole traffic to control from one peer and restart it. Verify that the connection between the two ends up direct. Adds facilities for expecting a certain path type between nodes. Updates: #19597 Signed-off-by: Claus Lensbøl --- tstest/natlab/vmtest/vmtest.go | 73 +++++++++++++++++++++++++++++ tstest/natlab/vmtest/vmtest_test.go | 61 ++++++++++++++++++++++-- tstest/natlab/vnet/conf.go | 6 +++ tstest/natlab/vnet/vnet.go | 27 +++++++++++ 4 files changed, 162 insertions(+), 5 deletions(-) diff --git a/tstest/natlab/vmtest/vmtest.go b/tstest/natlab/vmtest/vmtest.go index 9b029a119..df3cca81a 100644 --- a/tstest/natlab/vmtest/vmtest.go +++ b/tstest/natlab/vmtest/vmtest.go @@ -436,6 +436,14 @@ func (n *Node) LanIP(net *vnet.Network) netip.Addr { return n.vnetNode.LanIP(net) } +// DropControlTraffic sets up a blackhole for control traffic for just this +// node on all the networks belonging to the node. +func (n *Node) DropControlTraffic() { + for _, network := range n.nets { + network.BlackholeControlForAddr(n.LanIP(network)) + } +} + // NodeOption types for configuring nodes. type nodeOptOS OSImage @@ -1669,3 +1677,68 @@ func findKernelPath(goMod string) (string, error) { } return "", fmt.Errorf("gokrazy-kernel not found in %s", goMod) } + +// PingRoute describes what connection type was used to transfer a Disco ping. +type PingRoute string + +const ( + PingRouteDirect PingRoute = "direct" + PingRouteDERP PingRoute = "derp" + PingRouteLocal PingRoute = "local" + PingRouteNil PingRoute = "nil" +) + +// classifyPing finds what kind of route has been used on a ping path. +// It is only really relevant for DiscoPings. +func classifyPing(pr *ipnstate.PingResult) PingRoute { + if pr == nil { + return PingRouteNil + } + + if pr.Endpoint == "" { + return PingRouteDERP + } + + ap, err := netip.ParseAddrPort(pr.Endpoint) + if err == nil && ap.Addr().IsPrivate() { + return PingRouteLocal + } + return PingRouteDirect +} + +// PingExpect retries disco pings until the result matches wantRoute or the +// timeout is reached. It is using DiscoPings as this is the only ping type +// that can classify the connection type. +func (e *Env) PingExpect(from, to *Node, wantRoute PingRoute, timeout time.Duration) error { + e.t.Helper() + ctx, cancel := context.WithTimeout(e.t.Context(), timeout) + defer cancel() + var lastRoute PingRoute + toSt, err := to.agent.Status(ctx) + if err != nil { + return fmt.Errorf("ping: can't get %s status: %w", to.name, err) + } + if len(toSt.Self.TailscaleIPs) == 0 { + return fmt.Errorf("ping: %s has no Tailscale IPs", to.name) + } + targetIP := toSt.Self.TailscaleIPs[0] + for ctx.Err() == nil { + pingCtx, pingCancel := context.WithTimeout(ctx, 3*time.Second) + pr, err := from.agent.PingWithOpts(pingCtx, targetIP, tailcfg.PingDisco, local.PingOpts{}) + pingCancel() + if err == nil && pr.Err == "" { + if got := classifyPing(pr); got == wantRoute { + e.t.Logf("Saw ping type %q", got) + return nil + } else { + e.t.Logf("Saw ping type %q", got) + lastRoute = got + } + } + select { + case <-time.After(500 * time.Millisecond): + case <-ctx.Done(): + } + } + return fmt.Errorf("ping route = %q, want %q (after %v)", lastRoute, wantRoute, timeout) +} diff --git a/tstest/natlab/vmtest/vmtest_test.go b/tstest/natlab/vmtest/vmtest_test.go index cadf570d1..9390c2267 100644 --- a/tstest/natlab/vmtest/vmtest_test.go +++ b/tstest/natlab/vmtest/vmtest_test.go @@ -919,9 +919,6 @@ func TestCachedNetmapAfterRestart(t *testing.T) { aNet := env.AddNetwork("1.0.0.1", "192.168.1.1/24", vnet.EasyNAT) bNet := env.AddNetwork("2.0.0.1", "192.168.2.1/24", vnet.EasyNAT) - aNet.SetPostConnectControlBlackhole(true) - bNet.SetPostConnectControlBlackhole(true) - a := env.AddNode("a", aNet, vmtest.OS(vmtest.Gokrazy), tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil}) @@ -945,8 +942,9 @@ func TestCachedNetmapAfterRestart(t *testing.T) { connectStep.End(nil) cutControlStep.Begin() - aNet.PostConnectedToControl() - bNet.PostConnectedToControl() + // Both nodes lose connection to control + a.DropControlTraffic() + b.DropControlTraffic() env.ControlServer().SetOnMapRequest(func(nk key.NodePublic) { panic(fmt.Sprintf("got connection from %v", nk)) }) @@ -978,3 +976,56 @@ func TestCachedNetmapAfterRestart(t *testing.T) { } pingStep.End(nil) } + +// TestDirectConnectionWithCachedNetmap verifies that two nodes with netmap +// caching enabled (NodeAttrCacheNetworkMaps) can re-establish a direct +// WireGuard tunnel after one is restarted while the control server is +// unreachable. After restart the node must use only its on-disk cached +// netmaps to re-connect and ping the other (still online) node. +func TestDirectConnectionWithCachedNetmapOnOneNode(t *testing.T) { + env := vmtest.New(t) + + aNet := env.AddNetwork("1.0.0.1", "192.168.1.1/24", vnet.EasyNAT) + bNet := env.AddNetwork("2.0.0.1", "192.168.2.1/24", vnet.EasyNAT) + + a := env.AddNode("a", aNet, + vmtest.OS(vmtest.Gokrazy), + tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil}) + b := env.AddNode("b", bNet, + vmtest.OS(vmtest.Gokrazy), + tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil}) + + cutControlStep := env.AddStep("Cut control server access") + restartStep := env.AddStep("Restart tailscaled on a") + tsmpPingStep := env.AddStep("Ping a → b TSMP (cached netmap, no control)") + DiscoPingStep := env.AddStep("Ping a → b Disco (want Direct)") + + env.Start() + + cutControlStep.Begin() + a.DropControlTraffic() + env.ControlServer().SetOnMapRequest(func(nk key.NodePublic) { + if env.ControlServer().Node(nk).Name == a.Name() { + panic(fmt.Sprintf("got connection from %v", a.Name())) + } + }) + cutControlStep.End(nil) + + restartStep.Begin() + env.RestartTailscaled(a) + restartStep.End(nil) + + tsmpPingStep.Begin() + if err := env.Ping(a, b, tailcfg.PingTSMP, 30*time.Second); err != nil { + tsmpPingStep.End(err) + t.Fatal(err) + } + tsmpPingStep.End(nil) + + DiscoPingStep.Begin() + if err := env.PingExpect(a, b, vmtest.PingRouteDirect, 30*time.Second); err != nil { + DiscoPingStep.End(err) + t.Fatal(err) + } + DiscoPingStep.End(nil) +} diff --git a/tstest/natlab/vnet/conf.go b/tstest/natlab/vnet/conf.go index 7cfd0e38c..191de9e18 100644 --- a/tstest/natlab/vnet/conf.go +++ b/tstest/natlab/vnet/conf.go @@ -445,6 +445,12 @@ func (n *Network) PostConnectedToControl() { n.network.SetControlBlackholed(n.postConnectBlackholeControl) } +// BlackholeControlForAddr sets weither the network should drop all control +// traffic for the specified addr starting immediately. +func (n *Network) BlackholeControlForAddr(addr netip.Addr) { + n.network.BlackholeControlForAddr(addr) +} + // NetworkService is a service that can be added to a network. type NetworkService string diff --git a/tstest/natlab/vnet/vnet.go b/tstest/natlab/vnet/vnet.go index c11e32843..06c382aef 100644 --- a/tstest/natlab/vnet/vnet.go +++ b/tstest/natlab/vnet/vnet.go @@ -606,6 +606,9 @@ type network struct { // writers is a map of MAC -> networkWriters to write packets to that MAC. // It contains entries for connected nodes only. writers syncs.Map[MAC, networkWriter] // MAC -> to networkWriter for that MAC + + blackholeMu sync.Mutex + blackholeMap map[netip.Addr]netip.Addr // blackholeMap contains address pairs for dropping traffic (in either direction) } // registerWriter registers a client address with a MAC address. @@ -653,6 +656,19 @@ func (n *network) SetControlBlackholed(v bool) { n.blackholeControl = v } +// BlackholeControlForAddr sets up a map entry, ensuring that traffic to or from +// control from the addr is dropped. +func (n *network) BlackholeControlForAddr(addr netip.Addr) { + n.blackholeMu.Lock() + defer n.blackholeMu.Unlock() + + if addr.Is6() { + mak.Set(&n.blackholeMap, addr, fakeControl.v6) + } else { + mak.Set(&n.blackholeMap, addr, fakeControl.v4) + } +} + // nodeNIC represents a single network interface on a node. // For multi-homed nodes, additional NICs beyond the primary are stored in node.extraNICs. type nodeNIC struct { @@ -1621,6 +1637,17 @@ func (n *network) HandleEthernetPacketForRouter(ep EthernetPacket) { // Blackhole the packet. return } + + // Drop traffic to/from address pairs in the blackholeMap. + n.blackholeMu.Lock() + defer n.blackholeMu.Unlock() + if src, ok := n.blackholeMap[flow.dst]; ok && flow.src == src { + return + } + if dst, ok := n.blackholeMap[flow.src]; ok && flow.dst == dst { + return + } + var base *layers.BaseLayer proto := header.IPv4ProtocolNumber if v4, ok := packet.Layer(layers.LayerTypeIPv4).(*layers.IPv4); ok {