diff --git a/tstest/natlab/vmtest/vmtest.go b/tstest/natlab/vmtest/vmtest.go index 9b029a119..df3cca81a 100644 --- a/tstest/natlab/vmtest/vmtest.go +++ b/tstest/natlab/vmtest/vmtest.go @@ -436,6 +436,14 @@ func (n *Node) LanIP(net *vnet.Network) netip.Addr { return n.vnetNode.LanIP(net) } +// DropControlTraffic sets up a blackhole for control traffic for just this +// node on all the networks belonging to the node. +func (n *Node) DropControlTraffic() { + for _, network := range n.nets { + network.BlackholeControlForAddr(n.LanIP(network)) + } +} + // NodeOption types for configuring nodes. type nodeOptOS OSImage @@ -1669,3 +1677,68 @@ func findKernelPath(goMod string) (string, error) { } return "", fmt.Errorf("gokrazy-kernel not found in %s", goMod) } + +// PingRoute describes what connection type was used to transfer a Disco ping. +type PingRoute string + +const ( + PingRouteDirect PingRoute = "direct" + PingRouteDERP PingRoute = "derp" + PingRouteLocal PingRoute = "local" + PingRouteNil PingRoute = "nil" +) + +// classifyPing finds what kind of route has been used on a ping path. +// It is only really relevant for DiscoPings. +func classifyPing(pr *ipnstate.PingResult) PingRoute { + if pr == nil { + return PingRouteNil + } + + if pr.Endpoint == "" { + return PingRouteDERP + } + + ap, err := netip.ParseAddrPort(pr.Endpoint) + if err == nil && ap.Addr().IsPrivate() { + return PingRouteLocal + } + return PingRouteDirect +} + +// PingExpect retries disco pings until the result matches wantRoute or the +// timeout is reached. It is using DiscoPings as this is the only ping type +// that can classify the connection type. +func (e *Env) PingExpect(from, to *Node, wantRoute PingRoute, timeout time.Duration) error { + e.t.Helper() + ctx, cancel := context.WithTimeout(e.t.Context(), timeout) + defer cancel() + var lastRoute PingRoute + toSt, err := to.agent.Status(ctx) + if err != nil { + return fmt.Errorf("ping: can't get %s status: %w", to.name, err) + } + if len(toSt.Self.TailscaleIPs) == 0 { + return fmt.Errorf("ping: %s has no Tailscale IPs", to.name) + } + targetIP := toSt.Self.TailscaleIPs[0] + for ctx.Err() == nil { + pingCtx, pingCancel := context.WithTimeout(ctx, 3*time.Second) + pr, err := from.agent.PingWithOpts(pingCtx, targetIP, tailcfg.PingDisco, local.PingOpts{}) + pingCancel() + if err == nil && pr.Err == "" { + if got := classifyPing(pr); got == wantRoute { + e.t.Logf("Saw ping type %q", got) + return nil + } else { + e.t.Logf("Saw ping type %q", got) + lastRoute = got + } + } + select { + case <-time.After(500 * time.Millisecond): + case <-ctx.Done(): + } + } + return fmt.Errorf("ping route = %q, want %q (after %v)", lastRoute, wantRoute, timeout) +} diff --git a/tstest/natlab/vmtest/vmtest_test.go b/tstest/natlab/vmtest/vmtest_test.go index cadf570d1..9390c2267 100644 --- a/tstest/natlab/vmtest/vmtest_test.go +++ b/tstest/natlab/vmtest/vmtest_test.go @@ -919,9 +919,6 @@ func TestCachedNetmapAfterRestart(t *testing.T) { aNet := env.AddNetwork("1.0.0.1", "192.168.1.1/24", vnet.EasyNAT) bNet := env.AddNetwork("2.0.0.1", "192.168.2.1/24", vnet.EasyNAT) - aNet.SetPostConnectControlBlackhole(true) - bNet.SetPostConnectControlBlackhole(true) - a := env.AddNode("a", aNet, vmtest.OS(vmtest.Gokrazy), tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil}) @@ -945,8 +942,9 @@ func TestCachedNetmapAfterRestart(t *testing.T) { connectStep.End(nil) cutControlStep.Begin() - aNet.PostConnectedToControl() - bNet.PostConnectedToControl() + // Both nodes lose connection to control + a.DropControlTraffic() + b.DropControlTraffic() env.ControlServer().SetOnMapRequest(func(nk key.NodePublic) { panic(fmt.Sprintf("got connection from %v", nk)) }) @@ -978,3 +976,56 @@ func TestCachedNetmapAfterRestart(t *testing.T) { } pingStep.End(nil) } + +// TestDirectConnectionWithCachedNetmap verifies that two nodes with netmap +// caching enabled (NodeAttrCacheNetworkMaps) can re-establish a direct +// WireGuard tunnel after one is restarted while the control server is +// unreachable. After restart the node must use only its on-disk cached +// netmaps to re-connect and ping the other (still online) node. +func TestDirectConnectionWithCachedNetmapOnOneNode(t *testing.T) { + env := vmtest.New(t) + + aNet := env.AddNetwork("1.0.0.1", "192.168.1.1/24", vnet.EasyNAT) + bNet := env.AddNetwork("2.0.0.1", "192.168.2.1/24", vnet.EasyNAT) + + a := env.AddNode("a", aNet, + vmtest.OS(vmtest.Gokrazy), + tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil}) + b := env.AddNode("b", bNet, + vmtest.OS(vmtest.Gokrazy), + tailcfg.NodeCapMap{tailcfg.NodeAttrCacheNetworkMaps: nil}) + + cutControlStep := env.AddStep("Cut control server access") + restartStep := env.AddStep("Restart tailscaled on a") + tsmpPingStep := env.AddStep("Ping a → b TSMP (cached netmap, no control)") + DiscoPingStep := env.AddStep("Ping a → b Disco (want Direct)") + + env.Start() + + cutControlStep.Begin() + a.DropControlTraffic() + env.ControlServer().SetOnMapRequest(func(nk key.NodePublic) { + if env.ControlServer().Node(nk).Name == a.Name() { + panic(fmt.Sprintf("got connection from %v", a.Name())) + } + }) + cutControlStep.End(nil) + + restartStep.Begin() + env.RestartTailscaled(a) + restartStep.End(nil) + + tsmpPingStep.Begin() + if err := env.Ping(a, b, tailcfg.PingTSMP, 30*time.Second); err != nil { + tsmpPingStep.End(err) + t.Fatal(err) + } + tsmpPingStep.End(nil) + + DiscoPingStep.Begin() + if err := env.PingExpect(a, b, vmtest.PingRouteDirect, 30*time.Second); err != nil { + DiscoPingStep.End(err) + t.Fatal(err) + } + DiscoPingStep.End(nil) +} diff --git a/tstest/natlab/vnet/conf.go b/tstest/natlab/vnet/conf.go index 7cfd0e38c..191de9e18 100644 --- a/tstest/natlab/vnet/conf.go +++ b/tstest/natlab/vnet/conf.go @@ -445,6 +445,12 @@ func (n *Network) PostConnectedToControl() { n.network.SetControlBlackholed(n.postConnectBlackholeControl) } +// BlackholeControlForAddr sets weither the network should drop all control +// traffic for the specified addr starting immediately. +func (n *Network) BlackholeControlForAddr(addr netip.Addr) { + n.network.BlackholeControlForAddr(addr) +} + // NetworkService is a service that can be added to a network. type NetworkService string diff --git a/tstest/natlab/vnet/vnet.go b/tstest/natlab/vnet/vnet.go index c11e32843..06c382aef 100644 --- a/tstest/natlab/vnet/vnet.go +++ b/tstest/natlab/vnet/vnet.go @@ -606,6 +606,9 @@ type network struct { // writers is a map of MAC -> networkWriters to write packets to that MAC. // It contains entries for connected nodes only. writers syncs.Map[MAC, networkWriter] // MAC -> to networkWriter for that MAC + + blackholeMu sync.Mutex + blackholeMap map[netip.Addr]netip.Addr // blackholeMap contains address pairs for dropping traffic (in either direction) } // registerWriter registers a client address with a MAC address. @@ -653,6 +656,19 @@ func (n *network) SetControlBlackholed(v bool) { n.blackholeControl = v } +// BlackholeControlForAddr sets up a map entry, ensuring that traffic to or from +// control from the addr is dropped. +func (n *network) BlackholeControlForAddr(addr netip.Addr) { + n.blackholeMu.Lock() + defer n.blackholeMu.Unlock() + + if addr.Is6() { + mak.Set(&n.blackholeMap, addr, fakeControl.v6) + } else { + mak.Set(&n.blackholeMap, addr, fakeControl.v4) + } +} + // nodeNIC represents a single network interface on a node. // For multi-homed nodes, additional NICs beyond the primary are stored in node.extraNICs. type nodeNIC struct { @@ -1621,6 +1637,17 @@ func (n *network) HandleEthernetPacketForRouter(ep EthernetPacket) { // Blackhole the packet. return } + + // Drop traffic to/from address pairs in the blackholeMap. + n.blackholeMu.Lock() + defer n.blackholeMu.Unlock() + if src, ok := n.blackholeMap[flow.dst]; ok && flow.src == src { + return + } + if dst, ok := n.blackholeMap[flow.src]; ok && flow.dst == dst { + return + } + var base *layers.BaseLayer proto := header.IPv4ProtocolNumber if v4, ok := packet.Layer(layers.LayerTypeIPv4).(*layers.IPv4); ok {