diff --git a/control/controlknobs/controlknobs.go b/control/controlknobs/controlknobs.go index 36e3b6d40..d41b6703c 100644 --- a/control/controlknobs/controlknobs.go +++ b/control/controlknobs/controlknobs.go @@ -114,6 +114,22 @@ type Knobs struct { // EmitRuntimeMetrics is whether the node should poll and emit [runtime/metrics] // as [tailscale.com/util/clientmetric]'s. EmitRuntimeMetrics atomic.Bool + + // DisableUDPGRO disables UDP GRO on the magicsock UDP socket. See + // [tailcfg.NodeAttrDisableUDPGRO]. + DisableUDPGRO atomic.Bool + + // DisableUDPGSO disables UDP GSO on the magicsock UDP socket. See + // [tailcfg.NodeAttrDisableUDPGSO]. + DisableUDPGSO atomic.Bool + + // DisableTUNUDPGRO disables UDP GRO on the Tailscale TUN device. See + // [tailcfg.NodeAttrDisableTUNUDPGRO]. + DisableTUNUDPGRO atomic.Bool + + // DisableTUNTCPGRO disables TCP GRO on the Tailscale TUN device. See + // [tailcfg.NodeAttrDisableTUNTCPGRO]. + DisableTUNTCPGRO atomic.Bool } // UpdateFromNodeAttributes updates k (if non-nil) based on the provided self @@ -144,6 +160,10 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) { disableHostsFileUpdates = has(tailcfg.NodeAttrDisableHostsFileUpdates) forceRegisterMagicDNSIPv4Only = has(tailcfg.NodeAttrForceRegisterMagicDNSIPv4Only) emitRuntimeMetrics = has(tailcfg.NodeAttrEmitRuntimeMetrics) + disableUDPGRO = has(tailcfg.NodeAttrDisableUDPGRO) + disableUDPGSO = has(tailcfg.NodeAttrDisableUDPGSO) + disableTUNUDPGRO = has(tailcfg.NodeAttrDisableTUNUDPGRO) + disableTUNTCPGRO = has(tailcfg.NodeAttrDisableTUNTCPGRO) ) if has(tailcfg.NodeAttrOneCGNATEnable) { @@ -172,6 +192,10 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) { k.DisableHostsFileUpdates.Store(disableHostsFileUpdates) k.ForceRegisterMagicDNSIPv4Only.Store(forceRegisterMagicDNSIPv4Only) k.EmitRuntimeMetrics.Store(emitRuntimeMetrics) + k.DisableUDPGRO.Store(disableUDPGRO) + k.DisableUDPGSO.Store(disableUDPGSO) + k.DisableTUNUDPGRO.Store(disableTUNUDPGRO) + k.DisableTUNTCPGRO.Store(disableTUNTCPGRO) } // AsDebugJSON returns k as something that can be marshalled with json.Marshal diff --git a/feature/relayserver/relayserver.go b/feature/relayserver/relayserver.go index 4f52a7ca7..2de5654c7 100644 --- a/feature/relayserver/relayserver.go +++ b/feature/relayserver/relayserver.go @@ -69,7 +69,7 @@ func servePeerRelayDebugSessions(h *localapi.Handler, w http.ResponseWriter, r * func newExtension(logf logger.Logf, sb ipnext.SafeBackend) (ipnext.Extension, error) { e := &extension{ newServerFn: func(logf logger.Logf, port uint16, onlyStaticAddrPorts bool) (relayServer, error) { - return udprelay.NewServer(logf, port, onlyStaticAddrPorts, sb.Sys().UserMetricsRegistry()) + return udprelay.NewServer(logf, port, onlyStaticAddrPorts, sb.Sys().UserMetricsRegistry(), sb.Sys().ControlKnobs()) }, logf: logger.WithPrefix(logf, featureName+": "), } diff --git a/net/batching/conn_default.go b/net/batching/conn_default.go index 77c4c8b6a..922a6a358 100644 --- a/net/batching/conn_default.go +++ b/net/batching/conn_default.go @@ -6,11 +6,12 @@ package batching import ( + "tailscale.com/control/controlknobs" "tailscale.com/types/nettype" ) // TryUpgradeToConn is no-op on all platforms except linux. -func TryUpgradeToConn(pconn nettype.PacketConn, _ string, _ int, _ string) nettype.PacketConn { +func TryUpgradeToConn(pconn nettype.PacketConn, _ string, _ int, _ string, _ *controlknobs.Knobs) nettype.PacketConn { return pconn } diff --git a/net/batching/conn_linux.go b/net/batching/conn_linux.go index e4f437ac8..1ddb08b0b 100644 --- a/net/batching/conn_linux.go +++ b/net/batching/conn_linux.go @@ -20,6 +20,7 @@ "golang.org/x/net/ipv4" "golang.org/x/net/ipv6" "golang.org/x/sys/unix" + "tailscale.com/control/controlknobs" "tailscale.com/envknob" "tailscale.com/hostinfo" "tailscale.com/net/neterror" @@ -426,8 +427,14 @@ func tryEnableRXQOverflowsCounter(pconn nettype.PacketConn) (enabled bool) { } // tryEnableUDPOffload attempts to enable the UDP_GRO socket option on pconn, -// and returns two booleans indicating TX and RX UDP offload support. -func tryEnableUDPOffload(pconn nettype.PacketConn) (hasTX bool, hasRX bool) { +// and returns two booleans indicating TX and RX UDP offload support. If knobs +// is non-nil, UDP GSO and/or UDP GRO may be disabled via control-plane node +// attributes. +func tryEnableUDPOffload(pconn nettype.PacketConn, knobs *controlknobs.Knobs) (hasTX bool, hasRX bool) { + disableGSO := envknob.Bool("TS_DEBUG_DISABLE_UDP_GSO") || + (knobs != nil && knobs.DisableUDPGSO.Load()) + disableGRO := envknob.Bool("TS_DEBUG_DISABLE_UDP_GRO") || + (knobs != nil && knobs.DisableUDPGRO.Load()) if c, ok := pconn.(*net.UDPConn); ok { rc, err := c.SyscallConn() if err != nil { @@ -435,11 +442,11 @@ func tryEnableUDPOffload(pconn nettype.PacketConn) (hasTX bool, hasRX bool) { } err = rc.Control(func(fd uintptr) { var errSyscall error - if !envknob.Bool("TS_DEBUG_DISABLE_UDP_GSO") { + if !disableGSO { _, errSyscall = syscall.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_SEGMENT) hasTX = errSyscall == nil } - if !envknob.Bool("TS_DEBUG_DISABLE_UDP_GRO") { + if !disableGRO { errSyscall = syscall.SetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_GRO, 1) hasRX = errSyscall == nil } @@ -518,8 +525,9 @@ func getRXQOverflowsMetric(name string) *clientmetric.Metric { // pconn to a [Conn] if appropriate. A batch size of [IdealBatchSize] is // suggested for the best performance. If len(rxqOverflowsMetricName) is // nonzero, then read ops will propagate the SO_RXQ_OVFL control message counter -// to a clientmetric with the supplied name. -func TryUpgradeToConn(pconn nettype.PacketConn, network string, batchSize int, rxqOverflowsMetricName string) nettype.PacketConn { +// to a clientmetric with the supplied name. If knobs is non-nil, UDP GSO +// and/or UDP GRO may be disabled via control-plane node attributes. +func TryUpgradeToConn(pconn nettype.PacketConn, network string, batchSize int, rxqOverflowsMetricName string, knobs *controlknobs.Knobs) nettype.PacketConn { if runtime.GOOS != "linux" { // Exclude Android. return pconn @@ -569,7 +577,7 @@ func TryUpgradeToConn(pconn nettype.PacketConn, network string, batchSize int, r panic("bogus network") } var txOffload bool - txOffload, b.rxOffload = tryEnableUDPOffload(uc) + txOffload, b.rxOffload = tryEnableUDPOffload(uc, knobs) b.txOffload.Store(txOffload) if len(rxqOverflowsMetricName) > 0 && tryEnableRXQOverflowsCounter(uc) { // Don't register the metric unless the socket option has been diff --git a/net/tstun/wrap_linux.go b/net/tstun/wrap_linux.go index a4e76de5a..bb193266f 100644 --- a/net/tstun/wrap_linux.go +++ b/net/tstun/wrap_linux.go @@ -15,22 +15,26 @@ "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/checksum" "gvisor.dev/gvisor/pkg/tcpip/header" + "tailscale.com/control/controlknobs" "tailscale.com/envknob" "tailscale.com/net/tsaddr" ) // SetLinkFeaturesPostUp configures link features on t based on select TS_TUN_ -// environment variables and OS feature tests. Callers should ensure t is -// up prior to calling, otherwise OS feature tests may be inconclusive. -func (t *Wrapper) SetLinkFeaturesPostUp() { +// environment variables, control-plane node attributes (via knobs, which may be +// nil), and OS feature tests. Callers should ensure t is up prior to calling, +// otherwise OS feature tests may be inconclusive. +func (t *Wrapper) SetLinkFeaturesPostUp(knobs *controlknobs.Knobs) { if t.isTAP || runtime.GOOS == "android" { return } if groDev, ok := t.tdev.(tun.GRODevice); ok { - if envknob.Bool("TS_TUN_DISABLE_UDP_GRO") { + if envknob.Bool("TS_TUN_DISABLE_UDP_GRO") || + (knobs != nil && knobs.DisableTUNUDPGRO.Load()) { groDev.DisableUDPGRO() } - if envknob.Bool("TS_TUN_DISABLE_TCP_GRO") { + if envknob.Bool("TS_TUN_DISABLE_TCP_GRO") || + (knobs != nil && knobs.DisableTUNTCPGRO.Load()) { groDev.DisableTCPGRO() } err := probeTCPGRO(groDev) @@ -42,6 +46,31 @@ func (t *Wrapper) SetLinkFeaturesPostUp() { } } +// ApplyGROKnobs applies the [tailcfg.NodeAttrDisableTUNUDPGRO] and +// [tailcfg.NodeAttrDisableTUNTCPGRO] knob values (via knobs, which must be +// non-nil) to t's underlying device. It is intended to be called when a +// control-plane node attribute change is detected after [SetLinkFeaturesPostUp] +// has already run. +// +// Note: wireguard-go's GRO disablement is one-way (sticky); ApplyGROKnobs can +// move TUN UDP/TCP GRO from enabled to disabled, but the reverse requires a +// client restart. +func (t *Wrapper) ApplyGROKnobs(knobs *controlknobs.Knobs) { + if t.isTAP || runtime.GOOS == "android" || knobs == nil { + return + } + groDev, ok := t.tdev.(tun.GRODevice) + if !ok { + return + } + if knobs.DisableTUNUDPGRO.Load() { + groDev.DisableUDPGRO() + } + if knobs.DisableTUNTCPGRO.Load() { + groDev.DisableTCPGRO() + } +} + func probeTCPGRO(dev tun.GRODevice) error { ipPort := netip.MustParseAddrPort(tsaddr.TailscaleServiceIPString + ":0") fingerprint := []byte("tailscale-probe-tun-gro") diff --git a/net/tstun/wrap_noop.go b/net/tstun/wrap_noop.go index 8f5b62d0c..319649c55 100644 --- a/net/tstun/wrap_noop.go +++ b/net/tstun/wrap_noop.go @@ -5,4 +5,8 @@ package tstun -func (t *Wrapper) SetLinkFeaturesPostUp() {} +import "tailscale.com/control/controlknobs" + +func (t *Wrapper) SetLinkFeaturesPostUp(_ *controlknobs.Knobs) {} + +func (t *Wrapper) ApplyGROKnobs(_ *controlknobs.Knobs) {} diff --git a/net/udprelay/server.go b/net/udprelay/server.go index 3b0f72989..9052873b0 100644 --- a/net/udprelay/server.go +++ b/net/udprelay/server.go @@ -25,6 +25,7 @@ "go4.org/mem" "golang.org/x/crypto/blake2s" "golang.org/x/net/ipv6" + "tailscale.com/control/controlknobs" "tailscale.com/disco" "tailscale.com/net/batching" "tailscale.com/net/netaddr" @@ -83,6 +84,7 @@ type Server struct { metrics *metrics netMon *netmon.Monitor cloudInfo *cloudinfo.CloudInfo // used to query cloud metadata services + controlKnobs *controlknobs.Knobs // or nil mu sync.Mutex // guards the following fields macSecrets views.Slice[[blake2s.Size]byte] // [0] is most recent, max 2 elements @@ -376,8 +378,8 @@ func (e *serverEndpoint) stateLocked() endpointState { // port selection is left up to the host networking stack. If // onlyStaticAddrPorts is true, then dynamic addr:port discovery will be // disabled, and only addr:port's set via [Server.SetStaticAddrPorts] will be -// used. Metrics must be non-nil. -func NewServer(logf logger.Logf, port uint16, onlyStaticAddrPorts bool, metrics *usermetric.Registry) (s *Server, err error) { +// used. Metrics must be non-nil. knobs may be nil. +func NewServer(logf logger.Logf, port uint16, onlyStaticAddrPorts bool, metrics *usermetric.Registry, knobs *controlknobs.Knobs) (s *Server, err error) { s = &Server{ logf: logf, disco: key.NewDisco(), @@ -388,6 +390,7 @@ func NewServer(logf logger.Logf, port uint16, onlyStaticAddrPorts bool, metrics serverEndpointByDisco: make(map[key.SortedPairOfDiscoPublic]*serverEndpoint), nextVNI: minVNI, cloudInfo: cloudinfo.New(logf), + controlKnobs: knobs, } s.discoPublic = s.disco.Public() s.metrics = registerMetrics(metrics) @@ -689,7 +692,7 @@ func (s *Server) bindSockets(desiredPort uint16) error { break SocketsLoop } } - pc := batching.TryUpgradeToConn(uc, network, batching.IdealBatchSize, "udprelay_rxq_overflows") + pc := batching.TryUpgradeToConn(uc, network, batching.IdealBatchSize, "udprelay_rxq_overflows", s.controlKnobs) bc, ok := pc.(batching.Conn) if !ok { bc = &singlePacketConn{uc} diff --git a/net/udprelay/server_test.go b/net/udprelay/server_test.go index 00b9c2423..6ba52db7b 100644 --- a/net/udprelay/server_test.go +++ b/net/udprelay/server_test.go @@ -214,7 +214,7 @@ func TestServer(t *testing.T) { t.Run(tt.name, func(t *testing.T) { reg := new(usermetric.Registry) deregisterMetrics() - server, err := NewServer(t.Logf, 0, true, reg) + server, err := NewServer(t.Logf, 0, true, reg, nil) if err != nil { t.Fatal(err) } diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index 0f3be5a01..65d4fcdd5 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -186,7 +186,8 @@ // - 137: 2026-04-15: Client handles 429 responses to /machine/register. // - 138: 2026-03-31: can handle C2N /debug/tka. // - 139: 2026-05-22: Client understands [NodeAttrEmitRuntimeMetrics] -const CurrentCapabilityVersion CapabilityVersion = 139 +// - 140: 2026-05-27: Client understands [NodeAttrDisableUDPGRO], [NodeAttrDisableUDPGSO], [NodeAttrDisableTUNUDPGRO], [NodeAttrDisableTUNTCPGRO] +const CurrentCapabilityVersion CapabilityVersion = 140 // ID is an integer ID for a user, node, or login allocated by the // control plane. @@ -2793,6 +2794,42 @@ func (p NodeCapabilityPrefix) ToAttribute(value string) NodeCapability { // NodeAttrEmitRuntimeMetrics enables emission of [runtime/metrics] as // [tailscale.com/util/clientmetric]'s. NodeAttrEmitRuntimeMetrics NodeCapability = "emit-runtime-metrics" + + // NodeAttrDisableUDPGRO disables UDP GRO (UDP_GRO socket option on Linux) + // on the magicsock UDP socket. It exists so control can mitigate kernel + // regressions that cause throughput or correctness issues with UDP GRO on + // specific OS/kernel versions, without requiring a client release. See + // https://github.com/tailscale/tailscale/issues/19777 for example. + // Currently only consulted on Linux; may apply to other platforms as they + // gain UDP GRO support. + NodeAttrDisableUDPGRO NodeCapability = "disable-udp-gro" + + // NodeAttrDisableUDPGSO disables UDP GSO (UDP_SEGMENT socket option on + // Linux) on the magicsock UDP socket. It exists so control can mitigate + // kernel regressions that cause throughput or correctness issues with UDP + // GSO on specific OS/kernel versions, without requiring a client release. + // See https://github.com/tailscale/tailscale/issues/19777 for example. + // Currently only consulted on Linux; may apply to other platforms as they + // gain UDP GSO support. + NodeAttrDisableUDPGSO NodeCapability = "disable-udp-gso" + + // NodeAttrDisableTUNUDPGRO disables UDP GRO on the Tailscale TUN device. + // It exists so control can mitigate kernel regressions that cause + // throughput or correctness issues with TUN UDP GRO on specific OS/kernel + // versions, without requiring a client release. See + // https://github.com/tailscale/tailscale/issues/13041 for example. + // Currently only consulted on Linux; may apply to other platforms as they + // gain TUN UDP GRO support. + NodeAttrDisableTUNUDPGRO NodeCapability = "disable-tun-udp-gro" + + // NodeAttrDisableTUNTCPGRO disables TCP GRO on the Tailscale TUN device. + // It exists so control can mitigate kernel regressions that cause + // throughput or correctness issues with TUN TCP GRO on specific OS/kernel + // versions, without requiring a client release. See + // https://github.com/tailscale/tailscale/issues/13041 for example. + // Currently only consulted on Linux; may apply to other platforms as they + // gain TUN TCP GRO support. + NodeAttrDisableTUNTCPGRO NodeCapability = "disable-tun-tcp-gro" ) const ( diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 8de8e85f6..1b70af728 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -397,6 +397,16 @@ type Conn struct { // experiencing a write error, and is used to throttle the rate of rebinds. lastErrRebind syncs.AtomicValue[time.Time] + // appliedDisableUDPGRO and appliedDisableUDPGSO cache the last UDP offload + // controlknobs values we reacted to. They are compared against the live + // knob values during netmap updates so we can detect a control-plane + // transition and trigger a [Conn.Rebind] to re-evaluate the + // UDP_GRO/UDP_SEGMENT socket options. Guarded by c.mu. Only consulted on + // Linux; on other platforms tryEnableUDPOffload is a no-op so any + // transition is meaningless and no rebind is fired. + appliedDisableUDPGRO bool + appliedDisableUDPGSO bool + // staticEndpoints are user set endpoints that this node should // advertise amongst its wireguard endpoints. It is user's // responsibility to ensure that traffic from these endpoints is routed @@ -634,6 +644,15 @@ func NewConn(opts Options) (*Conn, error) { c.eventBus = opts.EventBus c.port.Store(uint32(opts.Port)) c.controlKnobs = opts.ControlKnobs + if runtime.GOOS == "linux" && c.controlKnobs != nil { + // Seed the cached "last applied" UDP offload knob values so the first + // netmap update doesn't spuriously trigger a rebind: bindSocket (called + // shortly after NewConn) will read these same knob values when + // configuring UDP_GRO/UDP_SEGMENT, so they're already in sync. We only + // do this on Linux because tryEnableUDPOffload is a no-op elsewhere. + c.appliedDisableUDPGRO = c.controlKnobs.DisableUDPGRO.Load() + c.appliedDisableUDPGSO = c.controlKnobs.DisableUDPGSO.Load() + } c.epFunc = opts.endpointsFunc() c.derpActiveFunc = opts.derpActiveFunc() c.idleFunc = opts.IdleFunc @@ -2996,6 +3015,8 @@ func (c *Conn) setNetworkMapInternal(self tailcfg.NodeView, peers []tailcfg.Node !self.HasCap(tailcfg.NodeAttrDisableRelayClient) && !self.HasCap(tailcfg.NodeAttrOnlyTCP443) + udpOffloadKnobsChanged := false + var curGRO, curGSO bool c.mu.Lock() relayClientChanged := c.relayClientEnabled != relayClientEnabled c.relayClientEnabled = relayClientEnabled @@ -3004,12 +3025,31 @@ func (c *Conn) setNetworkMapInternal(self tailcfg.NodeView, peers []tailcfg.Node peersSnap := c.peerSnapshotLocked() isClosed := c.closed c.usingCachedNetmap.Store(isCached) + if runtime.GOOS == "linux" && c.controlKnobs != nil { + curGRO = c.controlKnobs.DisableUDPGRO.Load() + curGSO = c.controlKnobs.DisableUDPGSO.Load() + if curGRO != c.appliedDisableUDPGRO || curGSO != c.appliedDisableUDPGSO { + c.appliedDisableUDPGRO = curGRO + c.appliedDisableUDPGSO = curGSO + udpOffloadKnobsChanged = true + } + } c.mu.Unlock() // release c.mu before potentially calling c.updateRelayServersSet which is O(m * n) if isClosed { return // nothing to do here, the conn is closed and the update is no longer relevant } + if udpOffloadKnobsChanged { + // A control-plane node attribute toggled UDP GRO or UDP GSO. Rebind + // the UDP sockets so tryEnableUDPOffload re-runs and applies the new + // values, then ReSTUN to refresh endpoints. + c.logf("magicsock: UDP offload knobs changed (DisableUDPGRO=%v DisableUDPGSO=%v); rebinding", + curGRO, curGSO) + c.Rebind() + go c.ReSTUN("udp-offload-knobs-changed") + } + if peersChanged || relayClientChanged { if !relayClientEnabled { // [relayManager]'s run loop updates [relayManager.hasPeerRelayServers]. @@ -3631,13 +3671,13 @@ func (c *Conn) bindSocket(ruc *RebindingUDPConn, network string, curPortFate cur defer ruc.mu.Unlock() if runtime.GOOS == "js" { - ruc.setConnLocked(newBlockForeverConn(), "", c.bind.BatchSize()) + ruc.setConnLocked(newBlockForeverConn(), "", c.bind.BatchSize(), c.controlKnobs) return nil } if debugAlwaysDERP() { c.logf("disabled %v per TS_DEBUG_ALWAYS_USE_DERP", network) - ruc.setConnLocked(newBlockForeverConn(), "", c.bind.BatchSize()) + ruc.setConnLocked(newBlockForeverConn(), "", c.bind.BatchSize(), c.controlKnobs) return nil } @@ -3696,7 +3736,7 @@ func (c *Conn) bindSocket(ruc *RebindingUDPConn, network string, curPortFate cur if debugBindSocket() { c.logf("magicsock: bindSocket: successfully listened %v port %d", network, port) } - ruc.setConnLocked(pconn, network, c.bind.BatchSize()) + ruc.setConnLocked(pconn, network, c.bind.BatchSize(), c.controlKnobs) if network == "udp4" { c.health.SetUDP4Unbound(false) } @@ -3707,7 +3747,7 @@ func (c *Conn) bindSocket(ruc *RebindingUDPConn, network string, curPortFate cur // Set pconn to a dummy conn whose reads block until closed. // This keeps the receive funcs alive for a future in which // we get a link change and we can try binding again. - ruc.setConnLocked(newBlockForeverConn(), "", c.bind.BatchSize()) + ruc.setConnLocked(newBlockForeverConn(), "", c.bind.BatchSize(), c.controlKnobs) if network == "udp4" { c.health.SetUDP4Unbound(true) } diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index b3c21cb24..322252f2b 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -2242,8 +2242,8 @@ func TestRebindingUDPConn(t *testing.T) { t.Fatal(err) } defer realConn.Close() - c.setConnLocked(realConn.(nettype.PacketConn), "udp4", 1) - c.setConnLocked(newBlockForeverConn(), "", 1) + c.setConnLocked(realConn.(nettype.PacketConn), "udp4", 1, nil) + c.setConnLocked(newBlockForeverConn(), "", 1, nil) } // https://github.com/tailscale/tailscale/issues/6680: don't ignore diff --git a/wgengine/magicsock/rebinding_conn.go b/wgengine/magicsock/rebinding_conn.go index 11398c592..c361a6b09 100644 --- a/wgengine/magicsock/rebinding_conn.go +++ b/wgengine/magicsock/rebinding_conn.go @@ -12,6 +12,7 @@ "syscall" "golang.org/x/net/ipv6" + "tailscale.com/control/controlknobs" "tailscale.com/net/batching" "tailscale.com/net/netaddr" "tailscale.com/net/packet" @@ -41,9 +42,9 @@ type RebindingUDPConn struct { // nettype.PacketConn to a batchingConn when appropriate. This upgrade is // intentionally pushed closest to where read/write ops occur in order to avoid // disrupting surrounding code that assumes nettype.PacketConn is a -// *net.UDPConn. -func (c *RebindingUDPConn) setConnLocked(p nettype.PacketConn, network string, batchSize int) { - upc := batching.TryUpgradeToConn(p, network, batchSize, "magicsock_udp_rxq_overflows") +// *net.UDPConn. knobs may be nil. +func (c *RebindingUDPConn) setConnLocked(p nettype.PacketConn, network string, batchSize int, knobs *controlknobs.Knobs) { + upc := batching.TryUpgradeToConn(p, network, batchSize, "magicsock_udp_rxq_overflows", knobs) c.pconn = upc c.pconnAtomic.Store(&upc) c.port = uint16(c.localAddrLocked().Port) diff --git a/wgengine/userspace.go b/wgengine/userspace.go index bf6d97ddd..e064487ef 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -130,6 +130,14 @@ type userspaceEngine struct { reconfigureVPN func() error // or nil conn25PacketHooks Conn25PacketHooks // or nil + // lastAppliedDisableTUNUDPGRO and lastAppliedDisableTUNTCPGRO cache the + // controlknobs values that were last applied to the TUN device. They are + // read and updated under e.mu and only consulted when buildfeatures.HasGRO + // is true. Note: wireguard-go's GRO disablement is one-way (sticky), so + // transitions from disabled back to enabled require a client restart. + lastAppliedDisableTUNUDPGRO bool + lastAppliedDisableTUNTCPGRO bool + mu sync.Mutex // guards following; see lock order comment below netMap *netmap.NetworkMap // or nil closing bool // Close was called (even if we're still closing) @@ -564,7 +572,15 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error) if err := e.router.Up(); err != nil { return nil, fmt.Errorf("router.Up: %w", err) } - tsTUNDev.SetLinkFeaturesPostUp() + tsTUNDev.SetLinkFeaturesPostUp(e.controlKnobs) + if buildfeatures.HasGRO && runtime.GOOS == "linux" && e.controlKnobs != nil { + // Seed the cached "last applied" TUN GRO knob values so the first + // netmap update doesn't spuriously call ApplyGROKnobs: + // SetLinkFeaturesPostUp above already applied these same values. We + // only do this on Linux because ApplyGROKnobs is a no-op elsewhere. + e.lastAppliedDisableTUNUDPGRO = e.controlKnobs.DisableTUNUDPGRO.Load() + e.lastAppliedDisableTUNTCPGRO = e.controlKnobs.DisableTUNTCPGRO.Load() + } // It's a little pointless to apply no-op settings here (they // should already be empty?), but it at least exercises the @@ -1278,7 +1294,26 @@ func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) { func (e *userspaceEngine) SetNetworkMap(nm *netmap.NetworkMap) { e.mu.Lock() e.netMap = nm + tunGROKnobsChanged := false + var curUDP, curTCP bool + if buildfeatures.HasGRO && runtime.GOOS == "linux" && e.controlKnobs != nil { + curUDP = e.controlKnobs.DisableTUNUDPGRO.Load() + curTCP = e.controlKnobs.DisableTUNTCPGRO.Load() + // Only act on transitions toward "disabled"; wireguard-go's GRO + // disablement is sticky and cannot be reversed without restart. + if (curUDP && !e.lastAppliedDisableTUNUDPGRO) || + (curTCP && !e.lastAppliedDisableTUNTCPGRO) { + tunGROKnobsChanged = true + } + e.lastAppliedDisableTUNUDPGRO = curUDP + e.lastAppliedDisableTUNTCPGRO = curTCP + } e.mu.Unlock() + if buildfeatures.HasGRO && tunGROKnobsChanged { + e.logf("wgengine: TUN GRO knobs changed (DisableTUNUDPGRO=%v DisableTUNTCPGRO=%v); applying", + curUDP, curTCP) + e.tundev.ApplyGROKnobs(e.controlKnobs) + } if e.networkLogger.Running() { e.networkLogger.ReconfigNetworkMap(nm) }