From 364b952d62a3f293384833d35e5f5da83475634c Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Tue, 26 May 2026 23:09:52 +0000 Subject: [PATCH] cmd/containerboot: track peers from IPN bus updates, stop using netmap.NetworkMap Some tests in another repo were broken by tailscale/tailscale#19607. This fixes them, by finishing off the rest of the migration away from netmap.NetworkMap on the IPN bus in containerboot. Containerboot used to rebuild a full NetworkMap-shaped view while reacting to IPN bus notifications. Now it insteads has its own netmapState type (immutable) of exactly what it needs to track, and sends those immutable values around, making cheap edits of new immutable values when an IPN bus edit arrives. This should make cmd/containerboot scale to much larger tailnets now too. Fixes #19852 Fixes tailscale/corp#42347 Updates #12542 Change-Id: I88adaf061f85f677f954a764935e6654329d75a6 Signed-off-by: Brad Fitzpatrick --- cmd/containerboot/egressservices.go | 45 +++-- cmd/containerboot/main.go | 226 +++++++++++++++++++------ cmd/containerboot/main_test.go | 143 +++++++--------- cmd/containerboot/serve.go | 8 - control/controlclient/map_test.go | 50 ++++++ flake.nix | 2 +- flakehashes.json | 4 +- go.mod | 1 + go.sum | 2 + ipn/backend.go | 29 +++- ipn/ipnlocal/local.go | 36 +++- ipn/ipnlocal/state_test.go | 43 +++++ ipn/ipnstate/ipnstate.go | 3 + shell.nix | 2 +- tstest/integration/integration_test.go | 90 ++++++++++ 15 files changed, 508 insertions(+), 176 deletions(-) diff --git a/cmd/containerboot/egressservices.go b/cmd/containerboot/egressservices.go index abde12523..7ae7110ab 100644 --- a/cmd/containerboot/egressservices.go +++ b/cmd/containerboot/egressservices.go @@ -27,7 +27,7 @@ "tailscale.com/kube/egressservices" "tailscale.com/kube/kubeclient" "tailscale.com/kube/kubetypes" - "tailscale.com/types/netmap" + "tailscale.com/types/views" "tailscale.com/util/httpm" "tailscale.com/util/linuxfw" "tailscale.com/util/mak" @@ -55,7 +55,7 @@ type egressProxy struct { tsClient *local.Client // never nil - netmapChan chan *netmap.NetworkMap // chan to receive netmap updates on + netmapChan chan netmapState // chan to receive netmap state updates on podIPv4 string // never empty string, currently only IPv4 is supported @@ -87,7 +87,7 @@ type httpClient interface { // - the mounted egress config has changed // - the proxy's tailnet IP addresses have changed // - tailnet IPs have changed for any backend targets specified by tailnet FQDN -func (ep *egressProxy) run(ctx context.Context, nm *netmap.NetworkMap, opts egressProxyRunOpts) error { +func (ep *egressProxy) run(ctx context.Context, nm netmapState, opts egressProxyRunOpts) error { ep.configure(opts) var tickChan <-chan time.Time var eventChan <-chan fsnotify.Event @@ -136,7 +136,7 @@ type egressProxyRunOpts struct { kc kubeclient.Client tsClient *local.Client stateSecret string - netmapChan chan *netmap.NetworkMap + netmapChan chan netmapState podIPv4 string tailnetAddrs []netip.Prefix } @@ -165,7 +165,7 @@ func (ep *egressProxy) configure(opts egressProxyRunOpts) { // any firewall rules need to be updated. Currently using status in state Secret as a reference for what is the current // firewall configuration is good enough because - the status is keyed by the Pod IP - we crash the Pod on errors such // as failed firewall update -func (ep *egressProxy) sync(ctx context.Context, nm *netmap.NetworkMap) error { +func (ep *egressProxy) sync(ctx context.Context, nm netmapState) error { cfgs, err := ep.getConfigs() if err != nil { return fmt.Errorf("error retrieving egress service configs: %w", err) @@ -186,16 +186,15 @@ func (ep *egressProxy) sync(ctx context.Context, nm *netmap.NetworkMap) error { return nil } -// addrsHaveChanged returns true if the provided netmap update contains tailnet address change for this proxy node. -// Netmap must not be nil. -func (ep *egressProxy) addrsHaveChanged(nm *netmap.NetworkMap) bool { - return !reflect.DeepEqual(ep.tailnetAddrs, nm.SelfNode.Addresses()) +// addrsHaveChanged returns true if the provided netmap state contains tailnet address change for this proxy node. +func (ep *egressProxy) addrsHaveChanged(nm netmapState) bool { + return !views.SliceEqual(views.SliceOf(ep.tailnetAddrs), nm.self.Addresses()) } // syncEgressConfigs adds and deletes firewall rules to match the desired // configuration. It uses the provided status to determine what is currently // applied and updates the status after a successful sync. -func (ep *egressProxy) syncEgressConfigs(cfgs egressservices.Configs, status *egressservices.Status, nm *netmap.NetworkMap) (*egressservices.Status, error) { +func (ep *egressProxy) syncEgressConfigs(cfgs egressservices.Configs, status *egressservices.Status, nm netmapState) (*egressservices.Status, error) { if !(wantsServicesConfigured(cfgs) || hasServicesConfigured(status)) { return nil, nil } @@ -234,7 +233,7 @@ func (ep *egressProxy) syncEgressConfigs(cfgs egressservices.Configs, status *eg // family. for _, t := range tailnetTargetIPs { var local netip.Addr - for _, pfx := range nm.SelfNode.Addresses().All() { + for _, pfx := range nm.self.Addresses().All() { if !pfx.IsSingleIP() { continue } @@ -424,7 +423,7 @@ func (ep *egressProxy) getStatus(ctx context.Context) (*egressservices.Status, e // setStatus writes egress proxy's currently configured firewall to the state // Secret and updates proxy's tailnet addresses. -func (ep *egressProxy) setStatus(ctx context.Context, status *egressservices.Status, nm *netmap.NetworkMap) error { +func (ep *egressProxy) setStatus(ctx context.Context, status *egressservices.Status, nm netmapState) error { // Pod IP is used to determine if a stored status applies to THIS proxy Pod. if status == nil { status = &egressservices.Status{} @@ -447,7 +446,7 @@ func (ep *egressProxy) setStatus(ctx context.Context, status *egressservices.Sta if err := ep.kc.JSONPatchResource(ctx, ep.stateSecret, kubeclient.TypeSecrets, []kubeclient.JSONPatch{patch}); err != nil { return fmt.Errorf("error patching state Secret: %w", err) } - ep.tailnetAddrs = nm.SelfNode.Addresses().AsSlice() + ep.tailnetAddrs = nm.self.Addresses().AsSlice() return nil } @@ -457,7 +456,7 @@ func (ep *egressProxy) setStatus(ctx context.Context, status *egressservices.Sta // FQDN, resolve the FQDN and return the resolved IPs. It checks if the // netfilter runner supports IPv6 NAT and skips any IPv6 addresses if it // doesn't. -func (ep *egressProxy) tailnetTargetIPsForSvc(svc egressservices.Config, nm *netmap.NetworkMap) (addrs []netip.Addr, err error) { +func (ep *egressProxy) tailnetTargetIPsForSvc(svc egressservices.Config, nm netmapState) (addrs []netip.Addr, err error) { if svc.TailnetTarget.IP != "" { addr, err := netip.ParseAddr(svc.TailnetTarget.IP) if err != nil { @@ -473,8 +472,8 @@ func (ep *egressProxy) tailnetTargetIPsForSvc(svc egressservices.Config, nm *net if svc.TailnetTarget.FQDN == "" { return nil, errors.New("unexpected egress service config- neither tailnet target IP nor FQDN is set") } - if nm == nil { - log.Printf("netmap is not available, unable to determine backend addresses for %s", svc.TailnetTarget.FQDN) + if !nm.self.Valid() { + log.Printf("netmap state is not available, unable to determine backend addresses for %s", svc.TailnetTarget.FQDN) return addrs, nil } egressAddrs, err := resolveTailnetFQDN(nm, svc.TailnetTarget.FQDN) @@ -501,26 +500,26 @@ func (ep *egressProxy) tailnetTargetIPsForSvc(svc egressservices.Config, nm *net return addrs, nil } -// shouldResync parses netmap update and returns true if the update contains +// shouldResync parses netmap state update and returns true if the update contains // changes for which the egress proxy's firewall should be reconfigured. -func (ep *egressProxy) shouldResync(nm *netmap.NetworkMap) bool { - if nm == nil { +func (ep *egressProxy) shouldResync(nm netmapState) bool { + if !nm.self.Valid() { return false } // If proxy's tailnet addresses have changed, resync. - if !reflect.DeepEqual(nm.SelfNode.Addresses().AsSlice(), ep.tailnetAddrs) { + if !views.SliceEqual(nm.self.Addresses(), views.SliceOf(ep.tailnetAddrs)) { log.Printf("node addresses have changed, trigger egress config resync") - ep.tailnetAddrs = nm.SelfNode.Addresses().AsSlice() + ep.tailnetAddrs = nm.self.Addresses().AsSlice() return true } // If the IPs for any of the egress services configured via FQDN have // changed, resync. for fqdn, ips := range ep.targetFQDNs { - for _, nn := range nm.Peers { + for nn := range nm.peers() { if equalFQDNs(nn.Name(), fqdn) { - if !reflect.DeepEqual(ips, nn.Addresses().AsSlice()) { + if !views.SliceEqual(views.SliceOf(ips), nn.Addresses()) { log.Printf("backend addresses for egress target %q have changed old IPs %v, new IPs %v trigger egress config resync", nn.Name(), ips, nn.Addresses().AsSlice()) return true } diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index 705b73bc6..fcfa756b3 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -120,6 +120,7 @@ "errors" "fmt" "io/fs" + "iter" "log" "math" "net" @@ -136,11 +137,12 @@ "syscall" "time" + "github.com/benbjohnson/immutable" "golang.org/x/sys/unix" - "tailscale.com/client/local" "tailscale.com/health" "tailscale.com/ipn" + "tailscale.com/ipn/ipnstate" kubeutils "tailscale.com/k8s-operator" "tailscale.com/kube/authkey" healthz "tailscale.com/kube/health" @@ -151,7 +153,7 @@ "tailscale.com/net/tsaddr" "tailscale.com/tailcfg" "tailscale.com/types/logger" - "tailscale.com/types/netmap" + "tailscale.com/types/views" "tailscale.com/util/deephash" "tailscale.com/util/dnsname" "tailscale.com/util/linuxfw" @@ -168,6 +170,137 @@ func getAutoAdvertiseBool() bool { return defaultBool("TS_EXPERIMENTAL_SERVICE_AUTO_ADVERTISEMENT", true) } +const containerbootWatchMask = ipn.NotifyInitialStatus | + ipn.NotifyPeerChanges | + ipn.NotifyNoNetMap + +func notifyState(n ipn.Notify) (_ ipn.State, ok bool) { + if n.State != nil { + return *n.State, true + } + if n.InitialStatus != nil && n.InitialStatus.BackendState != "" { + if state, ok := ipn.StateFromString(n.InitialStatus.BackendState); ok { + return state, true + } + } + return ipn.NoState, false +} + +var netmapStatePeerIDHasher = immutable.NewHasher(tailcfg.NodeID(0)) + +type netmapState struct { + self tailcfg.NodeView + peersByID *immutable.Map[tailcfg.NodeID, tailcfg.NodeView] + peersByName *immutable.Map[string, tailcfg.NodeView] // keyed by tailcfg.Node.Name when NodeID is unavailable + certDomains views.Slice[string] + dnsExtraRecords views.Slice[tailcfg.DNSRecord] +} + +func (s netmapState) updateFromNotify(n ipn.Notify) netmapState { + if n.InitialStatus != nil { + s = s.updateFromStatus(n.InitialStatus) + } + if n.SelfChange != nil { + s.self = n.SelfChange.View() + } + for _, p := range n.PeersChanged { + s = s.upsertPeer(p.View()) + } + for _, id := range n.PeersRemoved { + if s.peersByID != nil { + s.peersByID = s.peersByID.Delete(id) + } + } + return s +} + +func (s netmapState) updateFromStatus(st *ipnstate.Status) netmapState { + s.certDomains = views.SliceOf(st.CertDomains) + s.dnsExtraRecords = views.SliceOf(st.ExtraRecords) + if st.Self != nil { + s.self = nodeFromPeerStatus(st.Self).View() + } + if len(st.Peer) != 0 { + s.peersByID = nil + s.peersByName = nil + for _, ps := range st.Peer { + s = s.upsertPeer(nodeFromPeerStatus(ps).View()) + } + } + return s +} + +func (s netmapState) upsertPeer(n tailcfg.NodeView) netmapState { + if !n.Valid() { + return s + } + if s.peersByID == nil { + s.peersByID = immutable.NewMap[tailcfg.NodeID, tailcfg.NodeView](netmapStatePeerIDHasher) + } + if s.peersByName == nil { + s.peersByName = immutable.NewMap[string, tailcfg.NodeView](nil) + } + if n.ID() != 0 { + s.peersByID = s.peersByID.Set(n.ID(), n) + if name := n.Name(); name != "" { + s.peersByName = s.peersByName.Delete(name) + } + return s + } + if n.Name() != "" { + s.peersByName = s.peersByName.Set(n.Name(), n) + } + return s +} + +func nodeFromPeerStatus(ps *ipnstate.PeerStatus) *tailcfg.Node { + if ps == nil { + return nil + } + n := &tailcfg.Node{ + ID: ps.NodeID, + StableID: ps.ID, + Name: ps.DNSName, + Key: ps.PublicKey, + } + for _, ip := range ps.TailscaleIPs { + n.Addresses = append(n.Addresses, netip.PrefixFrom(ip, ip.BitLen())) + } + if ps.AllowedIPs != nil { + n.AllowedIPs = ps.AllowedIPs.AsSlice() + } + return n +} + +func (s netmapState) peers() iter.Seq[tailcfg.NodeView] { + return func(yield func(tailcfg.NodeView) bool) { + if s.peersByID != nil { + it := s.peersByID.Iterator() + for { + _, p, ok := it.Next() + if !ok { + break + } + if !yield(p) { + return + } + } + } + if s.peersByName != nil { + it := s.peersByName.Iterator() + for { + _, p, ok := it.Next() + if !ok { + break + } + if !yield(p) { + return + } + } + } + } +} + func main() { if err := run(); err != nil && !errors.Is(err, context.Canceled) { log.Fatal(err) @@ -308,7 +441,7 @@ func run() error { } } - w, err := client.WatchIPNBus(bootCtx, ipn.NotifyInitialNetMap|ipn.NotifyInitialPrefs|ipn.NotifyInitialState|ipn.NotifyInitialHealthState|ipn.NotifyRateLimit) + w, err := client.WatchIPNBus(bootCtx, containerbootWatchMask|ipn.NotifyInitialPrefs|ipn.NotifyInitialHealthState) if err != nil { return fmt.Errorf("failed to watch tailscaled for updates: %w", err) } @@ -348,7 +481,7 @@ func run() error { if err := tailscaleUp(bootCtx, cfg); err != nil { return fmt.Errorf("failed to auth tailscale: %w", err) } - w, err = client.WatchIPNBus(bootCtx, ipn.NotifyInitialNetMap|ipn.NotifyInitialState|ipn.NotifyRateLimit) + w, err = client.WatchIPNBus(bootCtx, containerbootWatchMask) if err != nil { return fmt.Errorf("rewatching tailscaled for updates after auth: %w", err) } @@ -368,8 +501,8 @@ func run() error { return fmt.Errorf("failed to read from tailscaled: %w", err) } - if n.State != nil { - switch *n.State { + if state, ok := notifyState(n); ok { + switch state { case ipn.NeedsLogin: if isOneStepConfig(cfg) { // This could happen if this is the first time tailscaled was run for this @@ -405,7 +538,7 @@ func run() error { // deadline to continue monitoring for changes. break authLoop default: - log.Printf("tailscaled in state %q, waiting", *n.State) + log.Printf("tailscaled in state %q, waiting", state) } } @@ -460,7 +593,7 @@ func run() error { } } - w, err = client.WatchIPNBus(ctx, ipn.NotifyInitialNetMap|ipn.NotifyInitialState|ipn.NotifyRateLimit) + w, err = client.WatchIPNBus(ctx, containerbootWatchMask) if err != nil { return fmt.Errorf("rewatching tailscaled for updates after auth: %w", err) } @@ -539,7 +672,7 @@ func run() error { failedResolveAttempts++ } - var egressSvcsNotify chan *netmap.NetworkMap + var egressSvcsNotify chan netmapState notifyChan := make(chan ipn.Notify) errChan := make(chan error) go func() { @@ -553,12 +686,7 @@ func run() error { } } }() - // Peer set changes (Add/Remove) no longer ride on the IPN bus; poll - // periodically so egress FQDN resolution and peer-aware work picks - // them up. SelfChange covers prompt self changes. - const peerPollInterval = 15 * time.Second - peerPoll := time.NewTicker(peerPollInterval) - defer peerPoll.Stop() + var nmState netmapState var wg sync.WaitGroup runLoop: @@ -576,19 +704,17 @@ func run() error { return fmt.Errorf("failed to read from tailscaled: %w", err) case err := <-cfgWatchErrChan: return fmt.Errorf("failed to watch tailscaled config: %w", err) - case <-peerPoll.C: - processNetmap = true case n := <-notifyChan: - // TODO: (ChaosInTheCRD) Add node removed check when supported by ipn - if n.State != nil && *n.State != ipn.Running { + nmState = nmState.updateFromNotify(n) + if state, ok := notifyState(n); ok && state != ipn.Running { // Something's gone wrong and we've left the authenticated state. // Our container image never recovered gracefully from this, and the // control flow required to make it work now is hard. So, just crash // the container and rely on the container runtime to restart us, // whereupon we'll go through initial auth again. - return fmt.Errorf("tailscaled left running state (now in state %q), exiting", *n.State) + return fmt.Errorf("tailscaled left running state (now in state %q), exiting", state) } - if n.SelfChange != nil { + if n.InitialStatus != nil || n.SelfChange != nil || len(n.PeersChanged) != 0 || len(n.PeersRemoved) != 0 || len(n.PeerChangedPatch) != 0 { processNetmap = true } case <-tc: @@ -618,13 +744,12 @@ func run() error { if !processNetmap { continue } - nm, err := fetchNetMap(ctx, client) - if err != nil { - log.Printf("error fetching netmap: %v", err) + self := nmState.self + if !self.Valid() { continue } - if nm != nil { - addrs = nm.SelfNode.Addresses().AsSlice() + { + addrs = self.Addresses().AsSlice() newCurrentIPs := deephash.Hash(&addrs) ipsHaveChanged := newCurrentIPs != currentIPs @@ -636,14 +761,14 @@ func run() error { // Kubernetes Secret to clean up tailnet nodes // for proxies whose route setup continuously // fails. - deviceID := nm.SelfNode.StableID() + deviceID := self.StableID() if hasKubeStateStore(cfg) && deephash.Update(¤tDeviceID, &deviceID) { - if err := kc.storeDeviceID(ctx, nm.SelfNode.StableID()); err != nil { + if err := kc.storeDeviceID(ctx, deviceID); err != nil { return fmt.Errorf("storing device ID in Kubernetes Secret: %w", err) } } if cfg.TailnetTargetFQDN != "" { - egressAddrs, err := resolveTailnetFQDN(nm, cfg.TailnetTargetFQDN) + egressAddrs, err := resolveTailnetFQDN(nmState, cfg.TailnetTargetFQDN) if err != nil { log.Print(err.Error()) break @@ -699,7 +824,10 @@ func run() error { backendAddrs = newBackendAddrs } if cfg.ServeConfigPath != "" { - cd := certDomainFromNetmap(nm) + var cd string + if nmState.certDomains.Len() != 0 { + cd = nmState.certDomains.At(0) + } if cd == "" { cd = kubetypes.ValueNoHTTPS } @@ -742,9 +870,9 @@ func run() error { // set up ensures that the operator does not // advertize endpoints of broken proxies. // TODO (irbekrm): instead of using the IP and FQDN, have some other mechanism for the proxy signal that it is 'Ready'. - deviceEndpoints := []any{nm.SelfNode.Name(), nm.SelfNode.Addresses()} + deviceEndpoints := []any{self.Name(), self.Addresses()} if hasKubeStateStore(cfg) && deephash.Update(¤tDeviceEndpoints, &deviceEndpoints) { - if err := kc.storeDeviceEndpoints(ctx, nm.SelfNode.Name(), nm.SelfNode.Addresses().AsSlice()); err != nil { + if err := kc.storeDeviceEndpoints(ctx, self.Name(), addrs); err != nil { return fmt.Errorf("storing device IPs and FQDN in Kubernetes Secret: %w", err) } } @@ -773,7 +901,7 @@ func run() error { } if egressSvcsNotify != nil { - egressSvcsNotify <- nm + egressSvcsNotify <- nmState } } if !startupTasksDone { @@ -795,7 +923,7 @@ func run() error { // will crash this node. if cfg.EgressProxiesCfgPath != "" { log.Printf("configuring egress proxy using configuration file at %s", cfg.EgressProxiesCfgPath) - egressSvcsNotify = make(chan *netmap.NetworkMap) + egressSvcsNotify = make(chan netmapState) opts := egressProxyRunOpts{ cfgPath: cfg.EgressProxiesCfgPath, nfr: nfr, @@ -807,7 +935,7 @@ func run() error { tailnetAddrs: addrs, } go func() { - if err := ep.run(ctx, nm, opts); err != nil { + if err := ep.run(ctx, nmState, opts); err != nil { egressSvcsErrorChan <- err } }() @@ -987,30 +1115,26 @@ func runHTTPServer(mux *http.ServeMux, addr string) (close func() error) { } } -// fetchNetMap fetches the current netmap from tailscaled via the -// "current-netmap" localapi debug action. The debug action's payload -// shape is intentionally not part of any stable API; containerboot -// reads its own internal-package types out of it. New external consumers -// should not rely on this — see [local.Client.Status] and friends. -func fetchNetMap(ctx context.Context, lc *local.Client) (*netmap.NetworkMap, error) { - return local.GetDebugResultJSON[*netmap.NetworkMap](ctx, lc, "current-netmap") -} - // resolveTailnetFQDN resolves a tailnet FQDN to a list of IP prefixes, which // can be either a peer device, a Tailscale Service, or a 4via6 synthesized // DNS name (e.g. "10-1-0-5-via-7.tailnet.ts.net"). -func resolveTailnetFQDN(nm *netmap.NetworkMap, fqdn string) ([]netip.Prefix, error) { +func resolveTailnetFQDN(nm netmapState, fqdn string) ([]netip.Prefix, error) { dnsFQDN, err := dnsname.ToFQDN(fqdn) if err != nil { return nil, fmt.Errorf("error parsing %q as FQDN: %w", fqdn, err) } // Check all peer devices first. - for _, p := range nm.Peers { + var ret []netip.Prefix + for p := range nm.peers() { if strings.EqualFold(p.Name(), dnsFQDN.WithTrailingDot()) { - return p.Addresses().AsSlice(), nil + ret = p.Addresses().AsSlice() + break } } + if ret != nil { + return ret, nil + } // If not found yet, check for a matching Tailscale Service. if svcIPs := serviceIPsFromNetMap(nm, dnsFQDN); len(svcIPs) != 0 { @@ -1020,7 +1144,7 @@ func resolveTailnetFQDN(nm *netmap.NetworkMap, fqdn string) ([]netip.Prefix, err // If not found yet, check for a matching 4via6 DNS name. if addr, ok := resolveViaDomain(dnsFQDN); ok { prefix := netip.PrefixFrom(addr, addr.BitLen()) - for _, nn := range nm.Peers { + for nn := range nm.peers() { for _, allowedIP := range nn.AllowedIPs().All() { if allowedIP.Contains(addr) { return []netip.Prefix{prefix}, nil @@ -1036,9 +1160,9 @@ func resolveTailnetFQDN(nm *netmap.NetworkMap, fqdn string) ([]netip.Prefix, err // serviceIPsFromNetMap returns all IPs of a Tailscale Service if its FQDN is // found in the netmap. Note that Tailscale Services are not a first-class // object in the netmap, so we guess based on DNS ExtraRecords and AllowedIPs. -func serviceIPsFromNetMap(nm *netmap.NetworkMap, fqdn dnsname.FQDN) []netip.Prefix { +func serviceIPsFromNetMap(nm netmapState, fqdn dnsname.FQDN) []netip.Prefix { var extraRecords []tailcfg.DNSRecord - for _, rec := range nm.DNS.ExtraRecords { + for _, rec := range nm.dnsExtraRecords.All() { recFQDN, err := dnsname.ToFQDN(rec.Name) if err != nil { continue @@ -1060,7 +1184,7 @@ func serviceIPsFromNetMap(nm *netmap.NetworkMap, fqdn dnsname.FQDN) []netip.Pref continue } ipPrefix := netip.PrefixFrom(ip, ip.BitLen()) - for _, ps := range nm.Peers { + for ps := range nm.peers() { for _, allowedIP := range ps.AllowedIPs().All() { if allowedIP == ipPrefix { prefixes = append(prefixes, ipPrefix) diff --git a/cmd/containerboot/main_test.go b/cmd/containerboot/main_test.go index 40f575250..9905123bd 100644 --- a/cmd/containerboot/main_test.go +++ b/cmd/containerboot/main_test.go @@ -35,12 +35,13 @@ "tailscale.com/cmd/testwrapper/flakytest" "tailscale.com/health" "tailscale.com/ipn" + "tailscale.com/ipn/ipnstate" "tailscale.com/kube/egressservices" "tailscale.com/kube/kubeclient" "tailscale.com/kube/kubetypes" "tailscale.com/tailcfg" "tailscale.com/tstest" - "tailscale.com/types/netmap" + "tailscale.com/types/key" ) const configFileAuthKey = "some-auth-key" @@ -52,6 +53,7 @@ func TestContainerBoot(t *testing.T) { t.Fatalf("Building containerboot: %v", err) } egressStatus := egressSvcStatus("foo", "foo.tailnetxyz.ts.net", "100.64.0.2") + egressStatusUpdated := egressSvcStatus("foo", "foo.tailnetxyz.ts.net", "100.64.0.3") metricsURL := func(port int) string { return fmt.Sprintf("http://127.0.0.1:%d/metrics", port) @@ -71,12 +73,6 @@ type phase struct { // Waits below to be true before proceeding to the next phase. Notify *ipn.Notify - // If non-nil, install this NetMap on the fake LocalAPI before - // sending Notify. This is the replacement for the old - // Notify.NetMap field; reactive consumers fetch the current - // netmap via /localapi/v0/netmap on their own. - NetMap *netmap.NetworkMap - // WantCmds is the commands that containerboot should run in this phase. WantCmds []string @@ -392,19 +388,12 @@ type testCase struct { Name: "test-node.test.ts.net.", Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32")}, }, - }, - NetMap: &netmap.NetworkMap{ - SelfNode: (&tailcfg.Node{ - StableID: tailcfg.StableNodeID("myID"), - Name: "test-node.test.ts.net.", - Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32")}, - }).View(), - Peers: []tailcfg.NodeView{ - (&tailcfg.Node{ + PeersChanged: []*tailcfg.Node{ + { StableID: tailcfg.StableNodeID("ipv6ID"), Name: "ipv6-node.test.ts.net.", Addresses: []netip.Prefix{netip.MustParsePrefix("::1/128")}, - }).View(), + }, }, }, WantLog: "no forwarding rules for egress addresses [::1/128], host supports IPv6: false", @@ -646,13 +635,6 @@ type testCase struct { Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32")}, }, }, - NetMap: &netmap.NetworkMap{ - SelfNode: (&tailcfg.Node{ - StableID: tailcfg.StableNodeID("newID"), - Name: "new-name.test.ts.net.", - Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32")}, - }).View(), - }, WantKubeSecret: map[string]string{ "authkey": "tskey-key", "device_fqdn": "new-name.test.ts.net.", @@ -1114,19 +1096,12 @@ type testCase struct { Name: "test-node.test.ts.net.", Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32")}, }, - }, - NetMap: &netmap.NetworkMap{ - SelfNode: (&tailcfg.Node{ - StableID: tailcfg.StableNodeID("myID"), - Name: "test-node.test.ts.net.", - Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32")}, - }).View(), - Peers: []tailcfg.NodeView{ - (&tailcfg.Node{ + PeersChanged: []*tailcfg.Node{ + { StableID: tailcfg.StableNodeID("fooID"), Name: "foo.tailnetxyz.ts.net.", Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.2/32")}, - }).View(), + }, }, }, WantKubeSecret: map[string]string{ @@ -1141,6 +1116,23 @@ type testCase struct { egressSvcTerminateURL(env.localAddrPort): 200, }, }, + { + Notify: &ipn.Notify{ + PeersChanged: []*tailcfg.Node{{ + StableID: tailcfg.StableNodeID("fooID"), + Name: "foo.tailnetxyz.ts.net.", + Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.3/32")}, + }}, + }, + WantKubeSecret: map[string]string{ + "egress-services": string(mustJSON(t, egressStatusUpdated)), + "authkey": "tskey-key", + "device_fqdn": "test-node.test.ts.net.", + "device_id": "myID", + "device_ips": `["100.64.0.1"]`, + kubetypes.KeyCapVer: capver, + }, + }, }, } }, @@ -1295,17 +1287,8 @@ type testCase struct { t.Fatalf("phase %d: updating mtime for %q: %v", i, path, err) } } - nmForFake := p.NetMap - if nmForFake == nil && p.Notify != nil && p.Notify.SelfChange != nil { - // Synthesize a minimal netmap from SelfChange so - // containerboot's NetMap() fetch returns - // something usable when the test only set Notify. - nmForFake = &netmap.NetworkMap{ - SelfNode: p.Notify.SelfChange.View(), - } - } - if nmForFake != nil { - env.lapi.SetNetMap(nmForFake) + if p.Notify != nil && p.Notify.InitialStatus == nil { + p.Notify.InitialStatus = statusFromNotify(p.Notify) } env.lapi.Notify(p.Notify) if p.Signal != nil { @@ -1499,7 +1482,6 @@ type localAPI struct { sync.Mutex cond *sync.Cond notify *ipn.Notify - netmap *netmap.NetworkMap // served by /localapi/v0/netmap } func (lc *localAPI) Start() error { @@ -1536,44 +1518,45 @@ func (lc *localAPI) Notify(n *ipn.Notify) { lc.cond.Broadcast() } -// SetNetMap installs the netmap that the fake /localapi/v0/netmap endpoint -// will return. -func (lc *localAPI) SetNetMap(nm *netmap.NetworkMap) { - lc.Lock() - defer lc.Unlock() - lc.netmap = nm +func statusFromNotify(n *ipn.Notify) *ipnstate.Status { + st := new(ipnstate.Status) + if n.State != nil { + st.BackendState = n.State.String() + } + if n.SelfChange != nil { + st.Self = peerStatusFromNode(n.SelfChange.View()) + } + if len(n.PeersChanged) != 0 { + st.Peer = map[key.NodePublic]*ipnstate.PeerStatus{} + for _, p := range n.PeersChanged { + pv := p.View() + st.Peer[pv.Key()] = peerStatusFromNode(pv) + } + } + return st +} + +func peerStatusFromNode(n tailcfg.NodeView) *ipnstate.PeerStatus { + ps := &ipnstate.PeerStatus{ + ID: n.StableID(), + NodeID: n.ID(), + PublicKey: n.Key(), + DNSName: n.Name(), + } + for _, p := range n.Addresses().All() { + if p.IsSingleIP() { + ps.TailscaleIPs = append(ps.TailscaleIPs, p.Addr()) + } + } + if n.AllowedIPs().Len() != 0 { + v := n.AllowedIPs() + ps.AllowedIPs = &v + } + return ps } func (lc *localAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) { switch r.URL.Path { - case "/localapi/v0/netmap": - w.Header().Set("Content-Type", "application/json") - lc.Lock() - nm := lc.netmap - lc.Unlock() - if nm == nil { - http.Error(w, "no netmap", http.StatusServiceUnavailable) - return - } - json.NewEncoder(w).Encode(nm) - return - case "/localapi/v0/debug": - // containerboot fetches the netmap via the "current-netmap" - // debug action; serve it like /localapi/v0/netmap above. - if r.URL.Query().Get("action") != "current-netmap" { - http.Error(w, "unsupported debug action", http.StatusNotFound) - return - } - w.Header().Set("Content-Type", "application/json") - lc.Lock() - nm := lc.netmap - lc.Unlock() - if nm == nil { - http.Error(w, "no netmap", http.StatusServiceUnavailable) - return - } - json.NewEncoder(w).Encode(nm) - return case "/localapi/v0/serve-config": switch r.Method { case "GET": diff --git a/cmd/containerboot/serve.go b/cmd/containerboot/serve.go index f64d2d24f..f5423630f 100644 --- a/cmd/containerboot/serve.go +++ b/cmd/containerboot/serve.go @@ -24,7 +24,6 @@ "tailscale.com/kube/kubetypes" klc "tailscale.com/kube/localclient" "tailscale.com/kube/services" - "tailscale.com/types/netmap" ) // watchServeConfigChanges watches path for changes, and when it sees one, reads @@ -142,13 +141,6 @@ func refreshAdvertiseServices(ctx context.Context, sc *ipn.ServeConfig, lc klc.L return nil } -func certDomainFromNetmap(nm *netmap.NetworkMap) string { - if len(nm.DNS.CertDomains) == 0 { - return "" - } - return nm.DNS.CertDomains[0] -} - func updateServeConfig(ctx context.Context, sc *ipn.ServeConfig, certDomain string, lc klc.LocalClient) error { if !isValidHTTPSConfig(certDomain, sc) { return nil diff --git a/control/controlclient/map_test.go b/control/controlclient/map_test.go index 4e6acb5c7..057e0b4c8 100644 --- a/control/controlclient/map_test.go +++ b/control/controlclient/map_test.go @@ -1317,6 +1317,56 @@ func (nu *countingNetmapUpdater) UpdateFullNetmap(nm *netmap.NetworkMap) { nu.full.Add(1) } +type countingDeltaNetmapUpdater struct { + countingNetmapUpdater + delta atomic.Int64 +} + +func (nu *countingDeltaNetmapUpdater) UpdateNetmapDelta([]netmap.NodeMutation) bool { + nu.delta.Add(1) + return true +} + +func TestExistingPeerReplacementHandledIncrementally(t *testing.T) { + nu := &countingDeltaNetmapUpdater{} + ms := newTestMapSession(t, nu) + ctx := t.Context() + + peer := &tailcfg.Node{ + ID: 1, + StableID: "peer", + Name: "peer.example.ts.net.", + Key: key.NewNode().Public(), + DiscoKey: key.NewDisco().Public(), + Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32")}, + AllowedIPs: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32")}, + Hostinfo: (&tailcfg.Hostinfo{}).View(), + } + if err := ms.handleNonKeepAliveMapResponse(ctx, &tailcfg.MapResponse{ + Node: &tailcfg.Node{Name: "self.example.ts.net."}, + Peers: []*tailcfg.Node{peer}, + }, false); err != nil { + t.Fatal(err) + } + if got := nu.full.Load(); got != 1 { + t.Fatalf("full updates after initial response = %d; want 1", got) + } + + replacement := peer.Clone() + replacement.AllowedIPs = append(replacement.AllowedIPs, netip.MustParsePrefix("100.64.0.2/32")) + if err := ms.handleNonKeepAliveMapResponse(ctx, &tailcfg.MapResponse{ + PeersChanged: []*tailcfg.Node{replacement}, + }, false); err != nil { + t.Fatal(err) + } + if got := nu.full.Load(); got != 1 { + t.Errorf("full updates after route-changing peer replacement = %d; want 1", got) + } + if got := nu.delta.Load(); got != 1 { + t.Errorf("delta updates after route-changing peer replacement = %d; want 1", got) + } +} + // tests (*mapSession).patchifyPeersChanged; smaller tests are in TestPeerChangeDiff func TestPatchifyPeersChanged(t *testing.T) { hi := (&tailcfg.Hostinfo{}).View() diff --git a/flake.nix b/flake.nix index a48ba78fe..4ea629f46 100644 --- a/flake.nix +++ b/flake.nix @@ -164,4 +164,4 @@ }); }; } -# nix-direnv cache busting line: sha256-rueM6HBKbu8WBw4atwubJD3c0O95zAkUVeOkARDiX18= +# nix-direnv cache busting line: sha256-pXdOQRF8EpZ+fFYhsEgAT04k3h88gdOZ42e0AMeV4Wc= diff --git a/flakehashes.json b/flakehashes.json index f3552d3fd..5fb1f097f 100644 --- a/flakehashes.json +++ b/flakehashes.json @@ -4,7 +4,7 @@ "sri": "sha256-uskKJHUzzIQ74VuzoQKrlz+6tCY/YKnv+BjJduPwt6Q=" }, "vendor": { - "goModSum": "sha256-MqRZjijiKUKNgFmZmw6bMOfrU6HLh3iJYe+7lCKb0Uw=", - "sri": "sha256-rueM6HBKbu8WBw4atwubJD3c0O95zAkUVeOkARDiX18=" + "goModSum": "sha256-nI9Dzn2PeLKj9mnskoHhwZNix+6H7pA4nlB75sDAn1U=", + "sri": "sha256-pXdOQRF8EpZ+fFYhsEgAT04k3h88gdOZ42e0AMeV4Wc=" } } diff --git a/go.mod b/go.mod index 22995ed1f..8a8d146e7 100644 --- a/go.mod +++ b/go.mod @@ -162,6 +162,7 @@ require ( github.com/armon/go-metrics v0.4.1 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/beevik/ntp v0.3.0 // indirect + github.com/benbjohnson/immutable v0.4.3 github.com/blang/semver/v4 v4.0.0 // indirect github.com/boltdb/bolt v1.3.1 // indirect github.com/bombsimon/wsl/v4 v4.2.1 // indirect diff --git a/go.sum b/go.sum index 023be5cc2..a944e111d 100644 --- a/go.sum +++ b/go.sum @@ -187,6 +187,8 @@ github.com/axiomhq/hyperloglog v0.0.0-20240319100328-84253e514e02/go.mod h1:k08r github.com/beevik/ntp v0.2.0/go.mod h1:hIHWr+l3+/clUnF44zdK+CWW7fO8dR5cIylAQ76NRpg= github.com/beevik/ntp v0.3.0 h1:xzVrPrE4ziasFXgBVBZJDP0Wg/KpMwk2KHJ4Ba8GrDw= github.com/beevik/ntp v0.3.0/go.mod h1:hIHWr+l3+/clUnF44zdK+CWW7fO8dR5cIylAQ76NRpg= +github.com/benbjohnson/immutable v0.4.3 h1:GYHcksoJ9K6HyAUpGxwZURrbTkXA0Dh4otXGqbhdrjA= +github.com/benbjohnson/immutable v0.4.3/go.mod h1:qJIKKSmdqz1tVzNtst1DZzvaqOU1onk1rc03IeM3Owk= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= diff --git a/ipn/backend.go b/ipn/backend.go index bee4f1295..bc8320f89 100644 --- a/ipn/backend.go +++ b/ipn/backend.go @@ -5,6 +5,7 @@ import ( "fmt" + "slices" "strings" "time" @@ -35,15 +36,27 @@ // ID tokens used by the Android client. const GoogleIDTokenType = "ts_android_google_login" +var stateStrings = [...]string{ + "NoState", + "InUseOtherUser", + "NeedsLogin", + "NeedsMachineAuth", + "Stopped", + "Starting", + "Running", +} + func (s State) String() string { - return [...]string{ - "NoState", - "InUseOtherUser", - "NeedsLogin", - "NeedsMachineAuth", - "Stopped", - "Starting", - "Running"}[s] + return stateStrings[s] +} + +// StateFromString parses s as a State string value. +func StateFromString(s string) (_ State, ok bool) { + i := slices.Index(stateStrings[:], s) + if i == -1 { + return NoState, false + } + return State(i), true } // EngineStatus contains WireGuard engine stats. diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index a8625f6a9..ff49c4b06 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -1417,6 +1417,7 @@ func (b *LocalBackend) updateStatusLocked(sb *ipnstate.StatusBuilder) { } if nm != nil { s.CertDomains = append([]string(nil), nm.DNS.CertDomains...) + s.ExtraRecords = append([]tailcfg.DNSRecord(nil), nm.DNS.ExtraRecords...) s.MagicDNSSuffix = nm.MagicDNSSuffix() if s.CurrentTailnet == nil { s.CurrentTailnet = &ipnstate.TailnetStatus{} @@ -2325,6 +2326,7 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo defer b.mu.Unlock() cn := b.currentNode() + needsAuthReconfig := netmapDeltaNeedsAuthReconfig(cn, muts) cn.UpdateNetmapDelta(muts) // Dispatch Upsert/Remove per-peer to magicsock, and any per-field @@ -2348,6 +2350,9 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo } } ms.UpdateNetmapDelta(muts) + if needsAuthReconfig { + b.authReconfigLocked() + } // If auto exit nodes are enabled and our exit node went offline, // we need to schedule picking a new one. @@ -2411,6 +2416,33 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo return true } +func netmapDeltaNeedsAuthReconfig(cn *nodeBackend, muts []netmap.NodeMutation) bool { + for _, m := range muts { + switch m := m.(type) { + case netmap.NodeMutationUpsert: + old, ok := cn.NodeByID(m.Node.ID()) + if !ok { + continue + } + if peerRouteConfigChanged(old, m.Node) { + return true + } + } + } + return false +} + +func peerRouteConfigChanged(old, new tailcfg.NodeView) bool { + return old.Key() != new.Key() || + old.DiscoKey() != new.DiscoKey() || + !views.SliceEqual(old.AllowedIPs(), new.AllowedIPs()) || + old.Expired() != new.Expired() || + old.IsJailed() != new.IsJailed() || + old.IsWireGuardOnly() != new.IsWireGuardOnly() || + old.SelfNodeV4MasqAddrForThisPeer() != new.SelfNodeV4MasqAddrForThisPeer() || + old.SelfNodeV6MasqAddrForThisPeer() != new.SelfNodeV6MasqAddrForThisPeer() +} + // UpdatePacketFilter implements [controlclient.PacketFilterUpdater]. // // It is called by the controlclient when a MapResponse carries a new packet @@ -2506,7 +2538,7 @@ func mutationsAreWorthyOfRecalculatingSuggestedExitNode(muts []netmap.NodeMutati // [tailcfg.PeerChange] for use in [ipn.Notify.PeerChangedPatch]. Multiple // mutations against the same node are merged into a single PeerChange. // -// Add/Remove mutations are skipped (they ride +// Upsert/Remove mutations are skipped (they ride // [ipn.Notify.PeersChanged]/[ipn.Notify.PeersRemoved]). Any other mutation // type that doesn't fit a [tailcfg.PeerChange] causes ok=false; the caller // should fall back to a full netmap rebuild. @@ -5662,7 +5694,7 @@ func (b *LocalBackend) authReconfigLocked() { cn := b.currentNode() - nm := cn.NetMap() + nm := cn.netMapWithPeers() if nm == nil { b.logf("[v1] authReconfig: netmap not yet valid. Skipping.") return diff --git a/ipn/ipnlocal/state_test.go b/ipn/ipnlocal/state_test.go index 104c29a3f..4eb8ac136 100644 --- a/ipn/ipnlocal/state_test.go +++ b/ipn/ipnlocal/state_test.go @@ -9,6 +9,7 @@ "fmt" "math/rand/v2" "net/netip" + "slices" "strings" "sync" "sync/atomic" @@ -1573,6 +1574,48 @@ func TestEngineReconfigOnStateChange(t *testing.T) { } } +func TestEngineReconfigOnPeerRouteDelta(t *testing.T) { + connect := &ipn.MaskedPrefs{Prefs: ipn.Prefs{WantRunning: true}, WantRunningSet: true} + peerAddr := netip.MustParsePrefix("100.64.1.1/32") + vipAddr := netip.MustParsePrefix("100.99.99.99/32") + + peer := makePeer(1, withName("node-1"), withAddresses(peerAddr)) + peerStruct := peer.AsStruct() + peerStruct.AllowedIPs = []netip.Prefix{peerAddr} + peer = peerStruct.View() + + nm := buildNetmapWithPeers( + makePeer(2, withName("node-2"), withAddresses(netip.MustParsePrefix("100.64.1.2/32"))), + peer, + ) + + lb, engine, cc := newLocalBackendWithMockEngineAndControl(t, false) + mustDo(t)(lb.Start(ipn.Options{})) + mustDo2(t)(lb.EditPrefs(connect)) + cc().authenticated(nm) + + replacement := nm.Peers[0].AsStruct() + replacement.AllowedIPs = append(replacement.AllowedIPs, vipAddr) + if !lb.UpdateNetmapDelta([]netmap.NodeMutation{netmap.NodeMutationUpsert{Node: replacement.View()}}) { + t.Fatal("UpdateNetmapDelta = false, want true") + } + + cfg := engine.Config() + if cfg == nil { + t.Fatal("engine config is nil") + } + for _, peer := range cfg.Peers { + if peer.PublicKey != replacement.Key { + continue + } + if !slices.Contains(peer.AllowedIPs, vipAddr) { + t.Fatalf("peer AllowedIPs = %v; want %v", peer.AllowedIPs, vipAddr) + } + return + } + t.Fatalf("engine config missing peer %v", replacement.Key.ShortString()) +} + // TestSendPreservesAuthURL tests that wgengine updates arriving in the middle of // processing an auth URL doesn't result in the auth URL being cleared. func TestSendPreservesAuthURL(t *testing.T) { diff --git a/ipn/ipnstate/ipnstate.go b/ipn/ipnstate/ipnstate.go index f0b9dcc82..19192dfa6 100644 --- a/ipn/ipnstate/ipnstate.go +++ b/ipn/ipnstate/ipnstate.go @@ -73,6 +73,9 @@ type Status struct { // trailing periods, and without any "_acme-challenge." prefix. CertDomains []string + // ExtraRecords contains extra DNS records to add to the DNS resolver. + ExtraRecords []tailcfg.DNSRecord + // Peer is the state of each peer, keyed by each peer's current public key. Peer map[key.NodePublic]*PeerStatus diff --git a/shell.nix b/shell.nix index efbecac42..1a1207cb3 100644 --- a/shell.nix +++ b/shell.nix @@ -16,4 +16,4 @@ ) { src = ./.; }).shellNix -# nix-direnv cache busting line: sha256-rueM6HBKbu8WBw4atwubJD3c0O95zAkUVeOkARDiX18= +# nix-direnv cache busting line: sha256-pXdOQRF8EpZ+fFYhsEgAT04k3h88gdOZ42e0AMeV4Wc= diff --git a/tstest/integration/integration_test.go b/tstest/integration/integration_test.go index 48f57e121..6c1dfcecb 100644 --- a/tstest/integration/integration_test.go +++ b/tstest/integration/integration_test.go @@ -914,6 +914,96 @@ func TestIncrementalMapUpdatePeersRemoved(t *testing.T) { d2.MustCleanShutdown(t) } +// TestIncrementalMapUpdatePeerAllowedIPsReachability verifies that an incremental +// peer upsert changing a peer's AllowedIPs reprograms the local WireGuard config. +// This covers VIP additions at runtime, where the VIP route is not reachable +// before the map mutation but is reachable over TSMP afterward. +func TestIncrementalMapUpdatePeerAllowedIPsReachability(t *testing.T) { + tstest.Shard(t) + tstest.Parallel(t) + env := NewTestEnv(t) + + n1 := NewTestNode(t, env) + d1 := n1.StartDaemon() + defer d1.MustCleanShutdown(t) + n1.AwaitListening() + n1.MustUp() + n1.AwaitRunning() + + n2 := NewTestNode(t, env) + d2 := n2.StartDaemon() + defer d2.MustCleanShutdown(t) + n2.AwaitListening() + n2.MustUp() + n2.AwaitRunning() + + n1Status := n1.MustStatus() + n2Status := n2.MustStatus() + tnode1 := env.Control.Node(n1Status.Self.PublicKey) + if tnode1 == nil { + t.Fatalf("control has no node for %v", n1Status.Self.PublicKey) + } + tnode2 := env.Control.Node(n2Status.Self.PublicKey) + if tnode2 == nil { + t.Fatalf("control has no node for %v", n2Status.Self.PublicKey) + } + + vip := netip.MustParseAddr("100.99.99.99") + vipPrefix := netip.PrefixFrom(vip, vip.BitLen()) + + if err := n1.Tailscale("ping", "--tsmp", "--c=1", "--timeout=5s", n2.AwaitIP4().String()).Run(); err != nil { + t.Fatalf("initial ping n1 -> n2: %v", err) + } + if err := n1.Tailscale("ping", "--tsmp", "--c=1", "--timeout=1s", vip.String()).Run(); err == nil { + t.Fatalf("ping n1 -> n2 VIP %v before AllowedIPs delta succeeded unexpectedly", vip) + } + + mr, err := env.Control.MapResponse(&tailcfg.MapRequest{NodeKey: tnode1.Key}) + if err != nil { + t.Fatalf("MapResponse: %v", err) + } + var replacement *tailcfg.Node + for _, p := range mr.Peers { + if p.ID == tnode2.ID { + replacement = p.Clone() + break + } + } + if replacement == nil { + t.Fatalf("MapResponse for n1 has no peer n2") + } + + replacement.AllowedIPs = append(replacement.AllowedIPs, vipPrefix) + if !env.Control.AddRawMapResponse(tnode1.Key, &tailcfg.MapResponse{ + PeersChanged: []*tailcfg.Node{replacement}, + }) { + t.Fatalf("failed to add map response") + } + + if err := tstest.WaitFor(5*time.Second, func() error { + st := n1.MustStatus() + p, ok := st.Peer[tnode2.Key] + if !ok { + return fmt.Errorf("node 1 doesn't see node 2 as a peer") + } + if p.AllowedIPs == nil { + return fmt.Errorf("node 1 sees node 2 with no AllowedIPs") + } + for _, allowedIP := range p.AllowedIPs.All() { + if allowedIP == vipPrefix { + return nil + } + } + return fmt.Errorf("node 1 sees node 2 AllowedIPs %v; want %v", p.AllowedIPs, vipPrefix) + }); err != nil { + t.Fatal(err) + } + + if err := n1.Tailscale("ping", "--tsmp", "--c=1", "--timeout=5s", vip.String()).Run(); err != nil { + t.Fatalf("ping n1 -> n2 VIP %v after AllowedIPs delta: %v", vip, err) + } +} + func TestNodeAddressIPFields(t *testing.T) { tstest.Shard(t) flakytest.Mark(t, "https://github.com/tailscale/tailscale/issues/7008")