From 9cb071666c3e89cdebbb5dfc4f793067b3cb69d7 Mon Sep 17 00:00:00 2001 From: "M. J. Fromberger" Date: Fri, 12 Jun 2026 09:41:00 -0700 Subject: [PATCH] ipn/ipnlocal: update netmap cache after peer deltas are applied (#20111) Add an UpdatePeers method to the cache. This allows us to support netmap peer deltas, by allowing just the peers to be updated in an existing cache. As a safety check, reject an update if there was no base netmap data to apply a change to. Then, when processing peer mutations in the backend, capture any changes that should be applied to the cache and update it, if one is enabled. Updates #12542 Change-Id: I2f8790a8fdc5e85fce6700ba4821a8cb10dddffa Signed-off-by: M. J. Fromberger --- ipn/ipnlocal/diskcache.go | 15 ++++++ ipn/ipnlocal/local.go | 33 +++++++++++++ ipn/ipnlocal/netmapcache/netmapcache.go | 38 +++++++++++++++ ipn/ipnlocal/netmapcache/netmapcache_test.go | 49 ++++++++++++++++++++ 4 files changed, 135 insertions(+) diff --git a/ipn/ipnlocal/diskcache.go b/ipn/ipnlocal/diskcache.go index 3f5bfc3cc..1ceb5a6ad 100644 --- a/ipn/ipnlocal/diskcache.go +++ b/ipn/ipnlocal/diskcache.go @@ -10,6 +10,7 @@ "tailscale.com/feature/buildfeatures" "tailscale.com/ipn/ipnlocal/netmapcache" + "tailscale.com/tailcfg" "tailscale.com/types/netmap" ) @@ -21,6 +22,20 @@ type diskCache struct { cache *netmapcache.Cache } +// writePeerDeltaToDiskLocked applies the specified peer delta to the cache, +// leaving other existing cached fields (self, profiles, other peers not +// mentioned in the arguments) unchanged. It does nothing (without error) if +// both slices are empty. +func (b *LocalBackend) writePeerDeltaToDiskLocked(update []tailcfg.NodeView, remove []tailcfg.StableNodeID) error { + if !buildfeatures.HasCacheNetMap || (len(update) == 0 && len(remove) == 0) { + return nil + } else if err := b.ensureDiskCacheLocked(); err != nil { + return err + } + b.logf("updating netmap peers in disk cache (%d to update, %d to remove)", len(update), len(remove)) + return b.diskCache.cache.UpdatePeers(b.currentNode().Context(), update, remove) +} + // writeNetmapToDiskLockedWithoutPeers updates nm in the cache, excluding peers and profiles. func (b *LocalBackend) writeNetmapToDiskLockedWithoutPeers(nm *netmap.NetworkMap) error { if !buildfeatures.HasCacheNetMap || nm == nil || nm.Cached { diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index fdacd2bea..7bd444d2b 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -2422,6 +2422,12 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo needsAuthReconfig := netmapDeltaNeedsAuthReconfig(cn, muts) cn.UpdateNetmapDelta(muts) + // In order that we can update the cache, keep track of which nodes are + // updated and removed. The nodeBackend has already applied any deltas, so + // we just need to know which nodes need updating. + updateIDs := set.Of[tailcfg.NodeID]() + removeIDs := set.Of[tailcfg.NodeID]() + // Dispatch Upsert/Remove per-peer to magicsock, and any per-field // patches via the existing UpdateNetmapDelta path. The per-peer // methods take c.mu themselves, so we can't call them from inside @@ -2434,12 +2440,15 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo ms.UpsertPeer(m.Node) peersUpsertedOrRemoved = true metricNetmapDeltaPeerUpserted.Add(1) + updateIDs.Add(m.Node.ID()) case netmap.NodeMutationRemove: ms.RemovePeer(m.NodeIDBeingMutated()) peersUpsertedOrRemoved = true metricNetmapDeltaPeerRemoved.Add(1) + removeIDs.Add(m.NodeIDBeingMutated()) default: metricNetmapDeltaPeerPatched.Add(1) + updateIDs.Add(m.NodeIDBeingMutated()) } } ms.UpdateNetmapDelta(muts) @@ -2476,6 +2485,30 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo return true } + // Reaching here, apply any peer changes to the netmap cache (if relevant). + // Note we do this AFTER the updates are applied in the nodeBackend, so that + // we can get its updated views to put back into the cache. + if buildfeatures.HasCacheNetMap && + cn.SelfHasCap(tailcfg.NodeAttrCacheNetworkMaps) && + envknob.BoolDefaultTrue("TS_USE_CACHED_NETMAP") { + + var peersToUpdate []tailcfg.NodeView + for id := range updateIDs { + if n, ok := cn.NodeByID(id); ok { + peersToUpdate = append(peersToUpdate, n) + } + } + var peersToRemove []tailcfg.StableNodeID + for id := range removeIDs { + if n, ok := cn.NodeByID(id); ok { + peersToRemove = append(peersToRemove, n.StableID()) + } + } + if err := b.writePeerDeltaToDiskLocked(peersToUpdate, peersToRemove); err != nil { + b.logf("update netmap cache for peer deltas: %v", err) + } + } + // A single MapResponse can carry upserts/removes (full Nodes) AND // per-field patches in the same delta. Build one Notify that // reflects all of them; per-session stripping in [sendToLocked] diff --git a/ipn/ipnlocal/netmapcache/netmapcache.go b/ipn/ipnlocal/netmapcache/netmapcache.go index 33f0ed32e..3c4b01cf3 100644 --- a/ipn/ipnlocal/netmapcache/netmapcache.go +++ b/ipn/ipnlocal/netmapcache/netmapcache.go @@ -242,6 +242,44 @@ func (c *Cache) Store(ctx context.Context, nm *netmap.NetworkMap) error { return c.removeUnwantedKeys(ctx) } +// UpdatePeers updates the cache to add or replace ("upsert") any peers +// specified in update, and discard any peers specified in remove. This does +// not affect any previous cached values for the self node or user profiles. +// +// It is expected that update and remove will be disjoint, but in the event +// they are not, remove takes precedence. If the cache does not already contain +// at least a self node, UpdatePeers reports an error without changing anything. +func (c *Cache) UpdatePeers(ctx context.Context, update []tailcfg.NodeView, remove []tailcfg.StableNodeID) error { + if !buildfeatures.HasCacheNetMap { + return nil + } + if !c.wantKeys.Contains(selfKey) { + return errors.New("no netmap is cached to apply an update") + } + + // Attempt all updates and removals before reporting any errors. + var errs []error + for _, p := range update { + // We may already have an entry for this peer, or it may be new, but + // rather than do a lookup we can just replace the existing entry if it + // exists. + key := peerKeyPrefix + cacheKey(p.StableID()) + if err := c.writeJSON(ctx, key, netmapNode{Node: &p}); err != nil { + errs = append(errs, err) + continue + } + c.wantKeys.Add(key) + } + for _, q := range remove { + key := peerKeyPrefix + cacheKey(q) + if err := c.store.Remove(ctx, string(key)); err != nil { + errs = append(errs, err) + } + c.wantKeys.Delete(key) + } + return errors.Join(errs...) +} + // updateSelfOnly updates the "static" parts of the netmap in the cache. It is // shared between [Cache.UpdateSelfOnly] and [Cache.Store]. func (c *Cache) updateSelfOnly(ctx context.Context, nm *netmap.NetworkMap) error { diff --git a/ipn/ipnlocal/netmapcache/netmapcache_test.go b/ipn/ipnlocal/netmapcache/netmapcache_test.go index efbe0fa91..884e450e4 100644 --- a/ipn/ipnlocal/netmapcache/netmapcache_test.go +++ b/ipn/ipnlocal/netmapcache/netmapcache_test.go @@ -307,6 +307,55 @@ func TestUpdateSelfOnly(t *testing.T) { } } +func TestUpdatePeers(t *testing.T) { + modNode1 := (&tailcfg.Node{ + ID: 99001, + StableID: "n99001FAKE", + Name: "test1altered.example.com.", + }).View() // a modified version of testNode1 + newNode3 := (&tailcfg.Node{ + ID: 99003, + StableID: "n99003FAKE", + Name: "test3.example.com.", + }).View() // a new node hitherto unseen + + update := []tailcfg.NodeView{newNode3, modNode1} + remove := []tailcfg.StableNodeID{testNode2.StableID()} + + // Modify a shallow copy of the map so we can perform an update and verify + // that it round-trips through a Load after calling UpdatePeers. + updated := *testMap + updated.Peers = []tailcfg.NodeView{modNode1, newNode3} // N.B. sorted + + s := make(testStore) + c := netmapcache.NewCache(s) + + // Prior to storing anything, the cache should reject an update because it + // has no base netmap to apply changes to. + if err := c.UpdatePeers(t.Context(), update, remove); err == nil { + t.Error("UpdatePeers on empty cache unexpectedly succeeded") + } else { + t.Logf("UpdatePeers on empty cache: %v (OK)", err) + } + + // Initialize the cache with the test map so we get a baseline. + if err := c.Store(t.Context(), testMap); err != nil { + t.Fatalf("Store initial netmap: %v", err) + } + + if err := c.UpdatePeers(t.Context(), update, remove); err != nil { + t.Fatalf("UpdatePeers failed; %v", err) + } + + got, err := c.Load(t.Context()) + if err != nil { + t.Fatalf("Load netmap failed: %v", err) + } + if diff := diffNetMaps(got, &updated); diff != "" { + t.Fatalf("Updated map differs (-got, +want):\n%s", diff) + } +} + // skippedMapFields are the names of fields that should not be considered by // network map caching, and thus skipped when comparing test results. var skippedMapFields = []string{