diff --git a/drive/driveimpl/drive_test.go b/drive/driveimpl/drive_test.go index 185ae2a9c..81788f8d2 100644 --- a/drive/driveimpl/drive_test.go +++ b/drive/driveimpl/drive_test.go @@ -7,6 +7,7 @@ "fmt" "io" "io/fs" + "iter" "log" "net" "net/http" @@ -456,10 +457,48 @@ func (r *remote) ServeHTTP(w http.ResponseWriter, req *http.Request) { } type system struct { - t *testing.T - local *local - client *gowebdav.Client + t *testing.T + local *local + client *gowebdav.Client + transport http.RoundTripper + + mu sync.Mutex remotes map[string]*remote + gen uint64 +} + +// Domain implements [drive.RemoteSource]. +func (s *system) Domain() string { return domain } + +// Transport implements [drive.RemoteSource]. +func (s *system) Transport() http.RoundTripper { return s.transport } + +// Remotes implements [drive.RemoteSource]. +func (s *system) Remotes() iter.Seq[*drive.Remote] { + s.mu.Lock() + rs := make([]*drive.Remote, 0, len(s.remotes)) + for name, r := range s.remotes { + url := fmt.Sprintf("http://%s", r.ln.Addr()) + rs = append(rs, &drive.Remote{ + Name: name, + URL: func() string { return url }, + }) + } + s.mu.Unlock() + return func(yield func(*drive.Remote) bool) { + for _, r := range rs { + if !yield(r) { + return + } + } + } +} + +// Generation implements [drive.RemoteSource]. +func (s *system) Generation() uint64 { + s.mu.Lock() + defer s.mu.Unlock() + return s.gen } func newSystem(t *testing.T) *system { @@ -486,11 +525,16 @@ func newSystem(t *testing.T) *system { client := gowebdav.NewAuthClient(fmt.Sprintf("http://%s", ln.Addr()), &noopAuthorizer{}) client.SetTransport(&http.Transport{DisableKeepAlives: true}) s := &system{ - t: t, - local: &local{ln: ln, fs: fs}, + t: t, + local: &local{ln: ln, fs: fs}, + transport: &http.Transport{ + DisableKeepAlives: true, + ResponseHeaderTimeout: 5 * time.Second, + }, client: client, remotes: make(map[string]*remote), } + fs.SetRemoteSource(s) t.Cleanup(s.stop) return s } @@ -518,22 +562,11 @@ func (s *system) addRemote(name string) string { } r.fs.SetFileServerAddr(fileServer.Addr()) go http.Serve(ln, r) - s.remotes[name] = r - remotes := make([]*drive.Remote, 0, len(s.remotes)) - for name, r := range s.remotes { - remotes = append(remotes, &drive.Remote{ - Name: name, - URL: func() string { return fmt.Sprintf("http://%s", r.ln.Addr()) }, - }) - } - s.local.fs.SetRemotes( - domain, - remotes, - &http.Transport{ - DisableKeepAlives: true, - ResponseHeaderTimeout: 5 * time.Second, - }) + s.mu.Lock() + s.remotes[name] = r + s.gen++ + s.mu.Unlock() return fileServer.Addr() } diff --git a/drive/driveimpl/local_impl.go b/drive/driveimpl/local_impl.go index ab908c0d3..1ac6b1395 100644 --- a/drive/driveimpl/local_impl.go +++ b/drive/driveimpl/local_impl.go @@ -8,6 +8,7 @@ "log" "net" "net/http" + "sync" "time" "tailscale.com/drive" @@ -52,10 +53,18 @@ type FileSystemForLocal struct { logf logger.Logf h *compositedav.Handler listener *connListener + + // sourceMu guards source and cachedGen. It also serializes the + // rebuild path so concurrent requests don't race to replace + // children with stale data. + sourceMu sync.Mutex + source drive.RemoteSource + cachedGen uint64 + haveGen bool // true once cachedGen reflects an actual source.Generation call } func (s *FileSystemForLocal) startServing() { - hs := &http.Server{Handler: s.h} + hs := &http.Server{Handler: http.HandlerFunc(s.serveHTTP)} go func() { err := hs.Serve(s.listener) if err != nil { @@ -65,17 +74,35 @@ func (s *FileSystemForLocal) startServing() { }() } -// HandleConn handles connections from local WebDAV clients -func (s *FileSystemForLocal) HandleConn(conn net.Conn, remoteAddr net.Addr) error { - return s.listener.HandleConn(conn, remoteAddr) +// serveHTTP refreshes the underlying compositedav children from the +// remote source if its generation has changed, then delegates to the +// composite handler. The refresh path is skipped entirely when the +// generation is unchanged, which is the common case. +func (s *FileSystemForLocal) serveHTTP(w http.ResponseWriter, r *http.Request) { + s.refresh() + s.h.ServeHTTP(w, r) } -// SetRemotes sets the complete set of remotes on the given tailnet domain -// using a map of name -> url. If transport is specified, that transport -// will be used to connect to these remotes. -func (s *FileSystemForLocal) SetRemotes(domain string, remotes []*drive.Remote, transport http.RoundTripper) { - children := make([]*compositedav.Child, 0, len(remotes)) - for _, remote := range remotes { +// refresh rebuilds the compositedav children from the current source +// if its generation has changed since the last refresh. It is a no-op +// when no source is set or when the generation matches the cached +// value. +func (s *FileSystemForLocal) refresh() { + s.sourceMu.Lock() + defer s.sourceMu.Unlock() + + source := s.source + if source == nil { + return + } + gen := source.Generation() + if s.haveGen && gen == s.cachedGen { + return + } + + transport := source.Transport() + var children []*compositedav.Child + for remote := range source.Remotes() { children = append(children, &compositedav.Child{ Child: &dirfs.Child{ Name: remote.Name, @@ -85,8 +112,25 @@ func (s *FileSystemForLocal) SetRemotes(domain string, remotes []*drive.Remote, Transport: transport, }) } + s.h.SetChildren(source.Domain(), children...) + s.cachedGen = gen + s.haveGen = true +} - s.h.SetChildren(domain, children...) +// HandleConn handles connections from local WebDAV clients +func (s *FileSystemForLocal) HandleConn(conn net.Conn, remoteAddr net.Addr) error { + return s.listener.HandleConn(conn, remoteAddr) +} + +// SetRemoteSource sets the source from which the filesystem reads the +// current set of remotes. It replaces any previously set source and +// forces a rebuild on the next incoming request. +func (s *FileSystemForLocal) SetRemoteSource(source drive.RemoteSource) { + s.sourceMu.Lock() + s.source = source + s.cachedGen = 0 + s.haveGen = false + s.sourceMu.Unlock() } // Close() stops serving the WebDAV content diff --git a/drive/local.go b/drive/local.go index 300d142d4..8e200c3e2 100644 --- a/drive/local.go +++ b/drive/local.go @@ -10,6 +10,7 @@ package drive import ( + "iter" "net" "net/http" ) @@ -21,16 +22,46 @@ type Remote struct { Available func() bool } +// RemoteSource provides the current set of remote Taildrive nodes +// on demand. The drive filesystem consults Generation on each request +// and only rebuilds its internal child list when the value differs +// from the previously cached one. This lets callers avoid the +// per-netmap O(n) rebuild that an eager SetRemotes call would +// require. +// +// All methods may be called concurrently. +type RemoteSource interface { + // Domain returns the current tailnet domain under which remotes + // appear as sub-folders. + Domain() string + + // Transport returns the http.RoundTripper used to reach remotes. + Transport() http.RoundTripper + + // Remotes yields the current set of remote nodes. + // It is called by the drive filesystem only when Generation has + // changed since the last call. + Remotes() iter.Seq[*Remote] + + // Generation returns a monotonically-increasing counter that + // changes whenever the values returned by Domain, Transport, or + // Remotes might have changed. The drive filesystem reads it on + // every request and skips the rebuild path entirely when it + // matches the previously-cached value. + Generation() uint64 +} + // FileSystemForLocal is the Taildrive filesystem exposed to local clients. It // provides a unified WebDAV interface to remote Taildrive shares on other nodes. type FileSystemForLocal interface { // HandleConn handles connections from local WebDAV clients HandleConn(conn net.Conn, remoteAddr net.Addr) error - // SetRemotes sets the complete set of remotes on the given tailnet domain - // using a map of name -> url. If transport is specified, that transport - // will be used to connect to these remotes. - SetRemotes(domain string, remotes []*Remote, transport http.RoundTripper) + // SetRemoteSource sets the source from which the filesystem + // reads the current set of remotes. The source is consulted + // lazily on incoming WebDAV requests, so a stale cap or empty + // tailnet costs nothing per netmap update. + SetRemoteSource(source RemoteSource) // Close() stops serving the WebDAV content Close() error diff --git a/ipn/ipnlocal/drive.go b/ipn/ipnlocal/drive.go index 110ffff2a..70a52cb1d 100644 --- a/ipn/ipnlocal/drive.go +++ b/ipn/ipnlocal/drive.go @@ -9,6 +9,7 @@ "errors" "fmt" "io" + "iter" "net/http" "net/netip" "os" @@ -18,20 +19,41 @@ "tailscale.com/ipn" "tailscale.com/tailcfg" "tailscale.com/types/logger" - "tailscale.com/types/netmap" "tailscale.com/types/views" "tailscale.com/util/httpm" ) func init() { hookSetNetMapLockedDrive.Set(setNetMapLockedDrive) + hookInstallDriveRemoteSource.Set(installDriveRemoteSource) } -func setNetMapLockedDrive(b *LocalBackend, nm *netmap.NetworkMap) { - b.updateDrivePeersLocked(nm) +// setNetMapLockedDrive runs on every full netmap install (the only path that +// can flip self caps or change the tailnet domain) to re-notify IPN bus +// listeners of the current local shares. +// +// It deliberately does NOT touch the remotes list passed to the local drive +// filesystem: that flows through [driveRemoteSource], which the filesystem +// pulls from lazily and only when something might have changed. See +// [LocalBackend.driveGen]. +func setNetMapLockedDrive(b *LocalBackend) { b.driveNotifyCurrentSharesLocked() } +// installDriveRemoteSource registers a [drive.RemoteSource] on the local +// Taildrive filesystem so it can pull the current set of remotes on demand +// instead of being pushed a fresh list on every netmap update. +// +// It is wired from [NewLocalBackend] via [hookInstallDriveRemoteSource] so +// non-drive builds don't reference the drive package at all. +func installDriveRemoteSource(b *LocalBackend) { + fs, ok := b.sys.DriveForLocal.GetOK() + if !ok { + return + } + fs.SetRemoteSource(driveRemoteSource{b}) +} + // DriveSetServerAddr tells Taildrive to use the given address for connecting // to the drive.FileServer that's exposing local files as an unprivileged // user. @@ -285,80 +307,93 @@ func (b *LocalBackend) DriveGetShares() views.SliceView[*drive.Share, drive.Shar return b.pm.prefs.DriveShares() } -// updateDrivePeersLocked sets all applicable peers from the netmap as Taildrive -// remotes. -func (b *LocalBackend) updateDrivePeersLocked(nm *netmap.NetworkMap) { - fs, ok := b.sys.DriveForLocal.GetOK() - if !ok { - return - } +// driveRemoteSource implements [drive.RemoteSource] by reading from a +// [LocalBackend]. It is installed once on the local Taildrive filesystem +// at [NewLocalBackend] time and consulted lazily on incoming WebDAV +// requests. +// +// The filesystem only rebuilds its internal child list when [Generation] +// changes, so peer churn that doesn't affect the drive-capable peer set +// (e.g. endpoint or online-status mutations on non-drive peers) costs +// nothing more than a single atomic load. +type driveRemoteSource struct{ b *LocalBackend } - var driveRemotes []*drive.Remote - if b.DriveAccessEnabled() { - // Only populate peers if access is enabled, otherwise leave blank. - driveRemotes = b.driveRemotesFromPeers(nm) - } - - fs.SetRemotes(nm.Domain, driveRemotes, b.newDriveTransport()) +// Generation implements [drive.RemoteSource]. +func (s driveRemoteSource) Generation() uint64 { + return s.b.driveGen.Load() } -func (b *LocalBackend) driveRemotesFromPeers(nm *netmap.NetworkMap) []*drive.Remote { - b.logf("[v1] taildrive: setting up drive remotes from %d peers", len(nm.Peers)) - driveRemotes := make([]*drive.Remote, 0, len(nm.Peers)) - for _, p := range nm.Peers { - peer := p - peerID := peer.ID() - peerKey := peer.Key().ShortString() - peerName := peer.DisplayName(false) - - driveRemotes = append(driveRemotes, &drive.Remote{ - Name: peerName, - URL: func() string { - url := fmt.Sprintf("%s/%s", b.currentNode().PeerAPIBase(peer), taildrivePrefix[1:]) - b.logf("[v2] taildrive: url for peer %s (%s): %s", peerKey, peerName, url) - return url - }, - Available: func() bool { - // Peers are available to Taildrive if: - // - They are online - // - Their PeerAPI is reachable - // - They are allowed to share at least one folder with us - cn := b.currentNode() - peer, ok := cn.NodeByID(peerID) - if !ok { - b.logf("[v2] taildrive: peer %s (%s, id=%v) not found", peerKey, peerName, peerID) - return false - } - - // Exclude offline peers. - // TODO(oxtoacart): for some reason, this correctly - // catches when a node goes from offline to online, - // but not the other way around... - // TODO(oxtoacart,nickkhyl): the reason was probably - // that we were using netmap.Peers instead of b.peers. - // The netmap.Peers slice is not updated in all cases. - // It should be fixed now that we use PeerByIDOk. - if !peer.Online().Get() { - b.logf("[v2] taildrive: peer %s (%s, id=%v) offline", peerKey, peerName, peerID) - return false - } - if cn.PeerAPIBase(peer) == "" { - b.logf("[v2] taildrive: peer %s (%s, id=%v) PeerAPI unreachable", peerKey, peerName, peerID) - return false - } - // Check that the peer is allowed to share with us. - if cn.PeerHasCap(peer, tailcfg.PeerCapabilityTaildriveSharer) { - b.logf("[v2] taildrive: peer %s (%s, id=%v) available", peerKey, peerName, peerID) - return true - } - - b.logf("[v2] taildrive: peer %s (%s, id=%v) not allowed to share", peerKey, peerName, peerID) - return false - }, - }) +// Domain implements [drive.RemoteSource]. +func (s driveRemoteSource) Domain() string { + nm := s.b.currentNode().NetMap() + if nm == nil { + return "" + } + return nm.Domain +} + +// Transport implements [drive.RemoteSource]. +func (s driveRemoteSource) Transport() http.RoundTripper { + return s.b.newDriveTransport() +} + +// Remotes implements [drive.RemoteSource]. It yields one [*drive.Remote] +// per peer; the per-Remote Available closure filters out peers that +// aren't online, don't expose PeerAPI, or haven't granted us the +// Taildrive sharer cap. If drive access is disabled on this node, no +// remotes are yielded at all, so the filesystem ends up with an empty +// child list. +func (s driveRemoteSource) Remotes() iter.Seq[*drive.Remote] { + return func(yield func(*drive.Remote) bool) { + b := s.b + if !b.DriveAccessEnabled() { + return + } + cn := b.currentNode() + peers := cn.Peers() + b.logf("[v1] taildrive: building drive remotes from %d peers", len(peers)) + for _, peer := range peers { + peerID := peer.ID() + peerKey := peer.Key().ShortString() + peerName := peer.DisplayName(false) + r := &drive.Remote{ + Name: peerName, + URL: func() string { + url := fmt.Sprintf("%s/%s", cn.PeerAPIBase(peer), taildrivePrefix[1:]) + b.logf("[v2] taildrive: url for peer %s (%s): %s", peerKey, peerName, url) + return url + }, + Available: func() bool { + // Peers are available to Taildrive if: + // - They are online + // - Their PeerAPI is reachable + // - They are allowed to share at least one folder with us + peer, ok := cn.NodeByID(peerID) + if !ok { + b.logf("[v2] taildrive: peer %s (%s, id=%v) not found", peerKey, peerName, peerID) + return false + } + if !peer.Online().Get() { + b.logf("[v2] taildrive: peer %s (%s, id=%v) offline", peerKey, peerName, peerID) + return false + } + if cn.PeerAPIBase(peer) == "" { + b.logf("[v2] taildrive: peer %s (%s, id=%v) PeerAPI unreachable", peerKey, peerName, peerID) + return false + } + if cn.PeerHasCap(peer, tailcfg.PeerCapabilityTaildriveSharer) { + b.logf("[v2] taildrive: peer %s (%s, id=%v) available", peerKey, peerName, peerID) + return true + } + b.logf("[v2] taildrive: peer %s (%s, id=%v) not allowed to share", peerKey, peerName, peerID) + return false + }, + } + if !yield(r) { + return + } + } } - b.logf("[v1] taildrive: built %d candidate remotes", len(driveRemotes)) - return driveRemotes } // responseBodyWrapper wraps an io.ReadCloser and stores diff --git a/ipn/ipnlocal/drive_test.go b/ipn/ipnlocal/drive_test.go index aca05432b..faa32f3d1 100644 --- a/ipn/ipnlocal/drive_test.go +++ b/ipn/ipnlocal/drive_test.go @@ -7,9 +7,33 @@ import ( "errors" + "fmt" + "net" "net/http" "net/http/httptest" + "net/netip" + "slices" + "sync" "testing" + "time" + + "github.com/studio-b12/gowebdav" + "tailscale.com/control/controlclient" + "tailscale.com/drive" + "tailscale.com/drive/driveimpl" + "tailscale.com/ipn" + "tailscale.com/ipn/store/mem" + "tailscale.com/tailcfg" + "tailscale.com/tsd" + "tailscale.com/tstest" + "tailscale.com/types/ipproto" + "tailscale.com/types/logger" + "tailscale.com/types/netmap" + "tailscale.com/types/views" + "tailscale.com/util/eventbus/eventbustest" + "tailscale.com/util/set" + "tailscale.com/wgengine" + "tailscale.com/wgengine/filter/filtertype" ) // TestDriveTransportRoundTrip_NetworkError tests that driveTransport.RoundTrip @@ -48,3 +72,473 @@ type mockRoundTripper struct { func (m *mockRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { return nil, m.err } + +// TestDriveGenBumps verifies that driveGen increments at each of the three +// call sites the [driveRemoteSource] cache invalidation depends on: +// full netmap installs, netmap deltas, and packet-filter updates. If any of +// these stops bumping, WebDAV clients would see a stale remote list until +// some other event happened to bump the counter, so the test asserts each +// site independently. +func TestDriveGenBumps(t *testing.T) { + b := newTestLocalBackend(t) + + assertBumped := func(name string, fn func()) { + t.Helper() + before := b.driveGen.Load() + fn() + after := b.driveGen.Load() + if after <= before { + t.Errorf("%s: driveGen = %d after, %d before; want strictly greater", name, after, before) + } + } + + selfNode := (&tailcfg.Node{ + ID: 1, + Key: makeNodeKeyFromID(1), + Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32")}, + }).View() + peer2 := (&tailcfg.Node{ + ID: 2, + Key: makeNodeKeyFromID(2), + Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.2/32")}, + }).View() + + assertBumped("setNetMapLocked", func() { + b.mu.Lock() + defer b.mu.Unlock() + b.setNetMapLocked(&netmap.NetworkMap{ + SelfNode: selfNode, + Peers: []tailcfg.NodeView{peer2}, + }) + }) + + assertBumped("UpdateNetmapDelta", func() { + muts, ok := netmap.MutationsFromMapResponse(&tailcfg.MapResponse{ + OnlineChange: map[tailcfg.NodeID]bool{peer2.ID(): true}, + }, time.Time{}) + if !ok { + t.Fatal("MutationsFromMapResponse failed") + } + if !b.UpdateNetmapDelta(muts) { + t.Fatal("UpdateNetmapDelta returned false") + } + }) + + assertBumped("UpdatePacketFilter", func() { + if !b.UpdatePacketFilter(views.Slice[tailcfg.FilterRule]{}, nil) { + t.Fatal("UpdatePacketFilter returned false") + } + }) +} + +// TestDriveRemoteSourceAccessGate verifies that [driveRemoteSource.Remotes] +// yields zero entries when the self node lacks NodeAttrsTaildriveAccess, and +// the full peer set when it has it. This is the only path that decides +// whether the local Taildrive root shows any folders at all, so a regression +// here would silently break access for every user. +func TestDriveRemoteSourceAccessGate(t *testing.T) { + b := newTestLocalBackend(t) + + selfNode := (&tailcfg.Node{ + ID: 1, + Key: makeNodeKeyFromID(1), + Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.1/32")}, + }).View() + peers := []tailcfg.NodeView{ + (&tailcfg.Node{ + ID: 2, + Key: makeNodeKeyFromID(2), + Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.2/32")}, + }).View(), + (&tailcfg.Node{ + ID: 3, + Key: makeNodeKeyFromID(3), + Addresses: []netip.Prefix{netip.MustParsePrefix("100.64.0.3/32")}, + }).View(), + } + + install := func(allCaps set.Set[tailcfg.NodeCapability]) { + b.mu.Lock() + defer b.mu.Unlock() + b.setNetMapLocked(&netmap.NetworkMap{ + SelfNode: selfNode, + Peers: peers, + AllCaps: allCaps, + }) + } + + src := driveRemoteSource{b: b} + collect := func() []*drive.Remote { + var out []*drive.Remote + for r := range src.Remotes() { + out = append(out, r) + } + return out + } + + install(nil) + if got := collect(); len(got) != 0 { + t.Errorf("Remotes without DriveAccess cap: got %d entries, want 0", len(got)) + } + + install(set.Of(tailcfg.NodeAttrsTaildriveAccess)) + if got := collect(); len(got) != len(peers) { + t.Errorf("Remotes with DriveAccess cap: got %d entries, want %d", len(got), len(peers)) + } +} + +// captureFS wraps a real [drive.FileSystemForLocal] but records the most +// recent [drive.RemoteSource] installed via SetRemoteSource. It lets the +// test below observe what NewLocalBackend wires up without poking at +// LocalBackend internals. +type captureFS struct { + drive.FileSystemForLocal + + mu sync.Mutex + source drive.RemoteSource +} + +func (c *captureFS) SetRemoteSource(source drive.RemoteSource) { + c.mu.Lock() + c.source = source + c.mu.Unlock() + c.FileSystemForLocal.SetRemoteSource(source) +} + +func (c *captureFS) lastSource() drive.RemoteSource { + c.mu.Lock() + defer c.mu.Unlock() + return c.source +} + +// TestDriveRemoteSourceInstalled verifies that NewLocalBackend wires a +// [driveRemoteSource] into sys.DriveForLocal via the +// hookInstallDriveRemoteSource init hook. Without this wiring, every +// remote-set update would silently no-op because the filesystem would +// have no source to pull from. +func TestDriveRemoteSourceInstalled(t *testing.T) { + bus := eventbustest.NewBus(t) + sys := tsd.NewSystemWithBus(bus) + cf := &captureFS{FileSystemForLocal: driveimpl.NewFileSystemForLocal(logger.Discard)} + sys.Set(drive.FileSystemForLocal(cf)) + t.Cleanup(func() { cf.FileSystemForLocal.Close() }) + + b := newTestLocalBackendWithSys(t, sys) + + src := cf.lastSource() + if src == nil { + t.Fatal("SetRemoteSource was never called on FileSystemForLocal") + } + drs, ok := src.(driveRemoteSource) + if !ok { + t.Fatalf("installed source is %T, want driveRemoteSource", src) + } + if drs.b != b { + t.Errorf("driveRemoteSource.b = %p, want LocalBackend %p", drs.b, b) + } +} + +// driveEndToEndHarness wires up: +// +// - a real [LocalBackend] backed by a mock controlclient, so a full netmap +// can be delivered through the same code path the real controlclient +// uses (SetControlClientStatus → setNetMapLocked → updateFilterLocked); +// +// - a real [driveimpl.FileSystemForLocal] injected into [tsd.System] +// before NewLocalBackend, so installDriveRemoteSource fires; and +// +// - a TCP listener that hands accepted connections to fs.HandleConn, +// plus a gowebdav client pointed at it, so WebDAV PROPFINDs traverse +// the same code path a real Mac/Windows client would. +type driveEndToEndHarness struct { + t *testing.T + b *LocalBackend + cc *mockControl + fs drive.FileSystemForLocal + client *gowebdav.Client + domain string + + selfAddr netip.Addr // self IPv4 single-IP address + + // wg tracks the listener's accept loop and every per-connection + // fs.HandleConn goroutine, so t.Cleanup can Wait for them after + // closing the listener and prevent goroutine leaks across tests. + wg sync.WaitGroup +} + +func newDriveEndToEndHarness(t *testing.T) *driveEndToEndHarness { + bus := eventbustest.NewBus(t) + sys := tsd.NewSystemWithBus(bus) + + logf := logger.Discard + sys.Set(new(mem.Store)) + eng, err := wgengine.NewFakeUserspaceEngine(logf, sys.Set, sys.HealthTracker.Get(), sys.UserMetricsRegistry(), sys.Bus.Get()) + if err != nil { + t.Fatalf("NewFakeUserspaceEngine: %v", err) + } + t.Cleanup(eng.Close) + sys.Set(eng) + + fs := driveimpl.NewFileSystemForLocal(logf) + sys.Set(drive.FileSystemForLocal(fs)) + t.Cleanup(func() { fs.Close() }) + + b := newLocalBackendWithSysAndTestControl(t, false, sys, func(tb testing.TB, opts controlclient.Options) controlclient.Client { + return newClient(tb, opts) + }) + if err := b.Start(ipn.Options{}); err != nil { + t.Fatalf("(*LocalBackend).Start: %v", err) + } + + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("net.Listen: %v", err) + } + + client := gowebdav.NewClient(fmt.Sprintf("http://%s", ln.Addr()), "", "") + client.SetTransport(&http.Transport{DisableKeepAlives: true}) + + h := &driveEndToEndHarness{ + t: t, + b: b, + cc: b.cc.(*mockControl), + fs: fs, + client: client, + domain: "example.com", + selfAddr: netip.MustParseAddr("100.64.0.1"), + } + + t.Cleanup(h.wg.Wait) // runs last, after ln.Close etc + t.Cleanup(func() { ln.Close() }) + + h.wg.Go(func() { + for { + conn, err := ln.Accept() + if err != nil { + return + } + h.wg.Go(func() { fs.HandleConn(conn, conn.RemoteAddr()) }) + } + }) + + return h +} + +// peerSpec describes a peer to be installed in a test netmap. +// driveCap controls whether the packet filter is built with a rule that +// grants the peer the [tailcfg.PeerCapabilityTaildriveSharer] cap to self. +type peerSpec struct { + id tailcfg.NodeID + name string + addr netip.Addr + online bool + driveCap bool + peerAPI bool +} + +func (s peerSpec) node() *tailcfg.Node { + n := &tailcfg.Node{ + ID: s.id, + Name: s.name + ".example.com.", + Key: makeNodeKeyFromID(s.id), + Addresses: []netip.Prefix{netip.PrefixFrom(s.addr, s.addr.BitLen())}, + Online: new(s.online), + } + if s.peerAPI { + hi := &tailcfg.Hostinfo{ + Services: []tailcfg.Service{{ + Proto: tailcfg.PeerAPI4, + Port: 12345, + }}, + } + n.Hostinfo = hi.View() + } + // The controlclient calls InitDisplayNames before delivering peers to + // LocalBackend. The mockControl path used here skips that step, so we + // do it explicitly to get DisplayName(false) → ComputedName ("alpha" + // etc.) rather than the node-key fallback. + n.InitDisplayNames("example.com") + return n +} + +// filterMatchesFor builds the parsed packet-filter matches that grant +// PeerCapabilityTaildriveSharer from each driveCap-enabled peer's address +// to the self address. PeerHasCap reads its result, so this is what flips a +// peer in or out of the drive-capable set in the e2e test. +func (h *driveEndToEndHarness) filterMatchesFor(specs []peerSpec) []filtertype.Match { + var matches []filtertype.Match + for _, s := range specs { + if !s.driveCap { + continue + } + matches = append(matches, filtertype.Match{ + IPProto: views.SliceOf([]ipproto.Proto{ipproto.TCP}), + Srcs: []netip.Prefix{netip.PrefixFrom(s.addr, s.addr.BitLen())}, + Caps: []filtertype.CapMatch{{ + Dst: netip.PrefixFrom(h.selfAddr, h.selfAddr.BitLen()), + Cap: tailcfg.PeerCapabilityTaildriveSharer, + }}, + }) + } + return matches +} + +// installNetMap pushes a netmap built from specs through the mock control +// client. The self node always has Addresses and NodeAttrsTaildriveAccess +// in AllCaps. The packet filter is generated to grant +// PeerCapabilityTaildriveSharer from each driveCap-enabled peer's IP to +// the self IP. +func (h *driveEndToEndHarness) installNetMap(specs []peerSpec) { + h.t.Helper() + peers := make([]tailcfg.NodeView, 0, len(specs)) + for _, s := range specs { + peers = append(peers, s.node().View()) + } + nm := &netmap.NetworkMap{ + SelfNode: (&tailcfg.Node{ + ID: 1, + Name: "self.example.com.", + Key: makeNodeKeyFromID(1), + Addresses: []netip.Prefix{netip.PrefixFrom(h.selfAddr, h.selfAddr.BitLen())}, + }).View(), + Domain: h.domain, + Peers: peers, + AllCaps: set.Of(tailcfg.NodeAttrsTaildriveAccess), + PacketFilter: h.filterMatchesFor(specs), + } + h.cc.send(sendOpt{loginFinished: true, nm: nm}) +} + +// sendMapResponse turns a [tailcfg.MapResponse] into a slice of +// [netmap.NodeMutation] via netmap.MutationsFromMapResponse — the same path +// the real controlclient uses on the incremental-fast-path — and dispatches +// it through [LocalBackend.UpdateNetmapDelta]. Going through MapResponse +// rather than constructing NodeMutation values directly keeps the test +// honest about wire-shaped inputs: every mutation kind exercised here is +// reachable by the control server with no test-only constructs. +func (h *driveEndToEndHarness) sendMapResponse(mr *tailcfg.MapResponse) { + h.t.Helper() + muts, ok := netmap.MutationsFromMapResponse(mr, time.Time{}) + if !ok { + h.t.Fatalf("MutationsFromMapResponse(%+v) returned !ok", mr) + } + if !h.b.UpdateNetmapDelta(muts) { + h.t.Fatalf("UpdateNetmapDelta(%+v) returned false", muts) + } +} + +// readDirNames issues a real WebDAV PROPFIND against the local Taildrive +// root and returns the names of the directory entries the filesystem +// reports as available, sorted for stable comparison. +func (h *driveEndToEndHarness) readDirNames() []string { + h.t.Helper() + infos, err := h.client.ReadDir("/" + h.domain) + if err != nil { + h.t.Fatalf("ReadDir: %v", err) + } + var names []string + for _, fi := range infos { + names = append(names, fi.Name()) + } + slices.Sort(names) + return names +} + +// waitForDir polls readDirNames until it matches want or the timeout +// elapses. We poll because the gen-bump → next-WebDAV-request rebuild has +// no happens-before synchronization with the caller of UpdateNetmapDelta; +// the rebuild happens lazily on the next inbound request. +func (h *driveEndToEndHarness) waitForDir(want []string) { + h.t.Helper() + slices.Sort(want) + err := tstest.WaitFor(2*time.Second, func() error { + got := h.readDirNames() + if !slices.Equal(got, want) { + return fmt.Errorf("readDirNames = %v, want %v", got, want) + } + return nil + }) + if err != nil { + h.t.Fatal(err) + } +} + +// TestDriveRemotesEndToEnd exercises the full pipeline from netmap mutation +// through driveGen invalidation, [driveRemoteSource.Remotes] re-evaluation, +// and compositedav child rebuild, validated by a real WebDAV PROPFIND each +// time. +// +// We cover the four ways a peer can enter or leave the drive-capable set: +// +// 1. Peer upsert (PeersChanged → NodeMutationUpsert): adds a brand-new +// sharer peer. +// 2. Peer removal (PeersRemoved → NodeMutationRemove): removes an +// existing sharer. +// 3. Packet-filter change (UpdatePacketFilter): flips an existing peer +// in or out of the drive-capable set without any per-peer mutation. +// 4. Per-peer online toggle (OnlineChange → NodeMutationOnline): doesn't +// change drive-capable membership but does change Available(), so the +// PROPFIND listing must reflect it. +// +// Note: deltas are delivered via [LocalBackend.UpdateNetmapDelta] / +// [LocalBackend.UpdatePacketFilter] directly rather than through a real +// testcontrol stream. Those are the same entry points the controlclient +// calls into after parsing a MapResponse, so the LocalBackend-side +// semantics under test are identical; the wire-level netmap streaming +// itself is covered by [TestNetmapDeltaFastPath] in tstest/largetailnet. +func TestDriveRemotesEndToEnd(t *testing.T) { + h := newDriveEndToEndHarness(t) + + peer2 := peerSpec{id: 2, name: "alpha", addr: netip.MustParseAddr("100.64.0.2"), online: true, driveCap: true, peerAPI: true} + peer3 := peerSpec{id: 3, name: "bravo", addr: netip.MustParseAddr("100.64.0.3"), online: true, driveCap: false, peerAPI: true} + + // Initial state: peer2 has the sharer cap, peer3 does not. Only + // peer2 should appear in the WebDAV listing. + h.installNetMap([]peerSpec{peer2, peer3}) + h.waitForDir([]string{"alpha"}) + + // 1) Upsert a new sharer peer (peer4) via PeersChanged, then refresh + // the packet filter so PeerHasCap returns true for it. PeerHasCap + // is driven by the filter, so adding the peer alone is not enough. + peer4 := peerSpec{id: 4, name: "charlie", addr: netip.MustParseAddr("100.64.0.4"), online: true, driveCap: true, peerAPI: true} + h.sendMapResponse(&tailcfg.MapResponse{ + PeersChanged: []*tailcfg.Node{peer4.node()}, + }) + if !h.b.UpdatePacketFilter(views.Slice[tailcfg.FilterRule]{}, h.filterMatchesFor([]peerSpec{peer2, peer3, peer4})) { + t.Fatal("UpdatePacketFilter returned false") + } + h.waitForDir([]string{"alpha", "charlie"}) + + // 2) Remove peer2 via PeersRemoved. + h.sendMapResponse(&tailcfg.MapResponse{ + PeersRemoved: []tailcfg.NodeID{peer2.id}, + }) + if !h.b.UpdatePacketFilter(views.Slice[tailcfg.FilterRule]{}, h.filterMatchesFor([]peerSpec{peer3, peer4})) { + t.Fatal("UpdatePacketFilter returned false") + } + h.waitForDir([]string{"charlie"}) + + // 3) Flip peer3 into the drive-capable set by changing only the + // packet filter — no per-peer mutation. This exercises the + // UpdatePacketFilter bump path specifically. + peer3.driveCap = true + if !h.b.UpdatePacketFilter(views.Slice[tailcfg.FilterRule]{}, h.filterMatchesFor([]peerSpec{peer3, peer4})) { + t.Fatal("UpdatePacketFilter returned false") + } + h.waitForDir([]string{"bravo", "charlie"}) + + // 4) Toggle peer3 offline via OnlineChange. The peer is still in the + // drive-capable set so its name remains in the cached Remotes + // slice, but Available() returns false so dirfs filters it out of + // PROPFIND listings. + h.sendMapResponse(&tailcfg.MapResponse{ + OnlineChange: map[tailcfg.NodeID]bool{peer3.id: false}, + }) + h.waitForDir([]string{"charlie"}) + + // And back online. + h.sendMapResponse(&tailcfg.MapResponse{ + OnlineChange: map[tailcfg.NodeID]bool{peer3.id: true}, + }) + h.waitForDir([]string{"bravo", "charlie"}) +} diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index eba0f661f..2598eea38 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -416,6 +416,16 @@ type LocalBackend struct { // notified about. lastNotifiedDriveShares *views.SliceView[*drive.Share, drive.ShareView] + // driveGen is the generation counter consulted by the [drive.RemoteSource] + // installed on [sys.DriveForLocal]. It is bumped whenever the inputs that + // could change the set of drive-capable remotes might have changed: full + // netmap installs (domain + self caps), netmap delta updates (peer set or + // per-peer addresses), and packet-filter updates (which is where + // [PeerCapability] values actually live). The bump is a single atomic.Add, + // so we do it unconditionally rather than try to detect "did this + // mutation actually matter". + driveGen atomic.Uint64 + // lastSuggestedExitNode stores the last suggested exit node suggestion to // avoid unnecessary churn between multiple equally-good options. lastSuggestedExitNode tailcfg.StableNodeID @@ -738,6 +748,12 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo eventbus.SubscribeFunc(ec, b.onHomeDERPUpdate) mConn.SetNetInfoCallback(b.setNetInfo) // TODO(tailscale/tailscale#17887): move to eventbus + if buildfeatures.HasDrive { + if f, ok := hookInstallDriveRemoteSource.GetOk(); ok { + f(b) + } + } + return b, nil } @@ -2455,6 +2471,15 @@ func (b *LocalBackend) UpdateNetmapDelta(muts []netmap.NodeMutation) (handled bo muts = b.tkaFilterDeltaMutsLocked(muts) needsAuthReconfig := netmapDeltaNeedsAuthReconfig(cn, muts) cn.UpdateNetmapDelta(muts) + if buildfeatures.HasDrive { + // Drive's lazy remotes-source caches its rebuild keyed by this + // generation, so any delta — peer add/remove, address change, + // or otherwise — needs to invalidate that cache. We bump + // unconditionally rather than type-switch over [muts]; the + // atomic add is cheaper than the switch and immune to future + // mutation kinds that might affect drive-cap evaluation. + b.driveGen.Add(1) + } // In order that we can update the cache, keep track of which nodes are // updated and removed. The nodeBackend has already applied any deltas, so @@ -2639,6 +2664,13 @@ func (b *LocalBackend) UpdatePacketFilter(rules views.Slice[tailcfg.FilterRule], metricUpdatePacketFilter.Add(1) cn.setPacketFilter(rules, parsed) b.updateFilterLocked(b.pm.CurrentPrefs()) + if buildfeatures.HasDrive { + // The drive sharer cap (and any other PeerCapability) is + // derived from the packet filter, so an updated rule set can + // flip a peer in or out of the drive-capable set without any + // per-peer mutation. Bump so the next WebDAV request rebuilds. + b.driveGen.Add(1) + } return true } @@ -7396,9 +7428,17 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) { } } - if buildfeatures.HasDrive && nm != nil { - if f, ok := hookSetNetMapLockedDrive.GetOk(); ok { - f(b, nm) + if buildfeatures.HasDrive { + // Bump the drive remotes-source generation on every full netmap + // install. The hook is invoked separately below for share-list + // notifications; the bump itself is unconditional because the + // drive feature might not be wired in this build but the atomic + // add is cheap. + b.driveGen.Add(1) + if nm != nil { + if f, ok := hookSetNetMapLockedDrive.GetOk(); ok { + f(b) + } } } @@ -7429,7 +7469,21 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) { // HookSetRuntimeMetricsEnabled is an optional hook for the "runtimemetrics" feature. var HookSetRuntimeMetricsEnabled feature.Hook[func(enabled bool)] -var hookSetNetMapLockedDrive feature.Hook[func(*LocalBackend, *netmap.NetworkMap)] +// hookSetNetMapLockedDrive is invoked on every full netmap install when the +// drive feature is linked in. It does NOT fire on netmap delta updates (peer +// add/remove etc.); those are picked up by the lazy [drive.RemoteSource] +// installed via [hookInstallDriveRemoteSource] plus the per-event bumps to +// [LocalBackend.driveGen]. Its only remaining job is to re-notify IPN bus +// listeners of the current local share list, whose visibility depends on +// self caps that only change on full installs. +var hookSetNetMapLockedDrive feature.Hook[func(*LocalBackend)] + +// hookInstallDriveRemoteSource is invoked from [NewLocalBackend] once the +// LocalBackend is far enough along to be used as a source. It registers a +// [drive.RemoteSource] on [sys.DriveForLocal] so the filesystem can pull +// remotes lazily instead of being pushed a fresh list on every netmap +// update. +var hookInstallDriveRemoteSource feature.Hook[func(*LocalBackend)] // roundTraffic rounds bytes. This is used to preserve user privacy within logs. func roundTraffic(bytes int64) float64 {