diff --git a/control/controlknobs/controlknobs.go b/control/controlknobs/controlknobs.go
index d41b6703c..93c10f26e 100644
--- a/control/controlknobs/controlknobs.go
+++ b/control/controlknobs/controlknobs.go
@@ -130,6 +130,11 @@ type Knobs struct {
 	// DisableTUNTCPGRO disables TCP GRO on the Tailscale TUN device. See
 	// [tailcfg.NodeAttrDisableTUNTCPGRO].
 	DisableTUNTCPGRO atomic.Bool
+
+	// NeverGSOEqualTail enables a UDP GSO sentinel-tail workaround in the
+	// underlay UDP packet TX path on Linux. Applies to magicsock and peer relay
+	// UDP sockets. See [tailcfg.NodeAttrNeverGSOEqualTail].
+	NeverGSOEqualTail atomic.Bool
 }
 
 // UpdateFromNodeAttributes updates k (if non-nil) based on the provided self
@@ -164,6 +169,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) {
 		disableUDPGSO                        = has(tailcfg.NodeAttrDisableUDPGSO)
 		disableTUNUDPGRO                     = has(tailcfg.NodeAttrDisableTUNUDPGRO)
 		disableTUNTCPGRO                     = has(tailcfg.NodeAttrDisableTUNTCPGRO)
+		neverGSOEqualTail                    = has(tailcfg.NodeAttrNeverGSOEqualTail)
 	)
 
 	if has(tailcfg.NodeAttrOneCGNATEnable) {
@@ -196,6 +202,7 @@ func (k *Knobs) UpdateFromNodeAttributes(capMap tailcfg.NodeCapMap) {
 	k.DisableUDPGSO.Store(disableUDPGSO)
 	k.DisableTUNUDPGRO.Store(disableTUNUDPGRO)
 	k.DisableTUNTCPGRO.Store(disableTUNTCPGRO)
+	k.NeverGSOEqualTail.Store(neverGSOEqualTail)
 }
 
 // AsDebugJSON returns k as something that can be marshalled with json.Marshal
diff --git a/net/batching/conn_linux.go b/net/batching/conn_linux.go
index 1ddb08b0b..1718e98dd 100644
--- a/net/batching/conn_linux.go
+++ b/net/batching/conn_linux.go
@@ -60,6 +60,12 @@ type linuxBatchingConn struct {
 	txOffload          atomic.Bool // supports UDP GSO or similar
 	sendBatchPool      sync.Pool
 	rxqOverflowsMetric *clientmetric.Metric
+	// neverGSOEqualTail, when non-nil and true, enables a sentinel-tail
+	// workaround in the UDP GSO TX path. It points at a
+	// [controlknobs.Knobs.NeverGSOEqualTail] field so the value can be
+	// toggled live via the control plane without requiring a socket rebind.
+	// It is read once per write at the top of [linuxBatchingConn.WriteBatchTo].
+	neverGSOEqualTail *atomic.Bool
 
 	// readOpMu guards read operations that must perform accounting against
 	// rxqOverflows in single-threaded fashion. There are no concurrent usages
@@ -107,6 +113,12 @@ func (c *linuxBatchingConn) SetWriteDeadline(t time.Time) error {
 	maxIPv6PayloadLen = 1<<16 - 1 - 8
 )
 
+// neverGSOEqualTailSentinelPayload is appended to UDP GSO packet batches under
+// certain conditions in order to workaround Linux kernel UDP GSO bugs. In the
+// case of magicsock, 0x07 is handled as WireGuard, and wireguard-go silently
+// drops the packet as it's less than [device.MinMessageSize].
+var neverGSOEqualTailSentinelPayload = []byte{0x07}
+
 // coalesceMessages iterates 'buffs', setting and coalescing them in 'msgs'
 // where possible while maintaining datagram order.
 //
@@ -120,20 +132,44 @@ func (c *linuxBatchingConn) SetWriteDeadline(t time.Time) error {
 //
 // All msgs[i].Buffers[0] are preceded by a Geneve header (geneve) if geneve.VNI.IsSet().
 //
+// neverGSOEqualTail, when true, enables the sentinel-tail workaround. It is
+// loaded by the caller and passed in so a single coalesceMessages call sees a
+// consistent value even if the underlying control knob flips concurrently.
+//
 // TODO(illotum) explore MSG_ZEROCOPY for large writes (>10KB).
-func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, geneve packet.GeneveHeader, buffs [][]byte, msgs []ipv6.Message, offset int) int {
+func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, geneve packet.GeneveHeader, buffs [][]byte, msgs []ipv6.Message, offset int, neverGSOEqualTail bool) int {
 	var (
-		base         = -1 // index of msg we are currently coalescing into
-		gsoSize      int  // segmentation size of msgs[base]
-		dgramCnt     int  // number of dgrams coalesced into msgs[base]
-		endBatch     bool // tracking flag to start a new batch on next iteration of buffs
-		coalescedLen int  // bytes coalesced into msgs[base]
+		base                     = -1 // index of msg we are currently coalescing into
+		gsoSize                  int  // segmentation size of msgs[base]
+		dgramCnt                 int  // number of dgrams coalesced into msgs[base]
+		endBatchDueToSmallerTail bool // tracking flag to start a new batch on next iteration of buffs
+		coalescedLen             int  // bytes coalesced into msgs[base]
 	)
 	maxPayloadLen := maxIPv4PayloadLen
 	if addr.IP.To4() == nil {
 		maxPayloadLen = maxIPv6PayloadLen
 	}
+	maxDatagramsPerGSOBatch := udpSegmentMaxDatagrams
+	if neverGSOEqualTail {
+		// If neverGSOEqualTail is set we might end up appending a sentinel 1-byte
+		// payload, so we must leave space in our accounting.
+		maxDatagramsPerGSOBatch -= 1
+		maxPayloadLen -= len(neverGSOEqualTailSentinelPayload)
+	}
 	vniIsSet := geneve.VNI.IsSet()
+
+	maybeAppendSentinelTail := func() {
+		if !neverGSOEqualTail || endBatchDueToSmallerTail {
+			// If neverGSOEqualTail is unset we should never append a sentinel
+			// payload as we are running on an unaffected kernel. Or, if we
+			// already have a smaller-than-GSO sized tail, there is no need, since
+			// the kernel bug we are avoiding only triggers when all fragments
+			// are equal in length.
+			return
+		}
+		msgs[base].Buffers = append(msgs[base].Buffers, neverGSOEqualTailSentinelPayload)
+	}
+
 	for i, buff := range buffs {
 		if vniIsSet {
 			geneve.Encode(buff)
@@ -142,32 +178,48 @@ func (c *linuxBatchingConn) coalesceMessages(addr *net.UDPAddr, geneve packet.Ge
 		}
 		if i > 0 {
 			msgLen := len(buff)
+			// okToCoalesceWithSentinel ensures we never coalesce if a sentinel
+			// 1-byte payload might be required, but gsoSize (or more specifically
+			// UDP payload length) is also 1. The whole point of appending a sentinel
+			// 1-byte payload is to append a smaller-than-GSO tail.
+			//
+			// This is defensive as a 1-byte payload, at the time of writing
+			// (2026-05-28), is unlikely to occur. The smallest WireGuard
+			// message size is 32 bytes ([device.MinMessageSize]), and the
+			// [disco.Message] header is 62 bytes.
+			//
+			// It's also overly conservative as it checks for msgLen == 1, but a
+			// msgLen of 1 on the tail where gsoSize is greater would also be fine.
+			okToCoalesceWithSentinel := !neverGSOEqualTail || msgLen > len(neverGSOEqualTailSentinelPayload)
 			if msgLen+coalescedLen <= maxPayloadLen &&
 				msgLen <= gsoSize &&
-				dgramCnt < udpSegmentMaxDatagrams &&
-				!endBatch {
+				dgramCnt < maxDatagramsPerGSOBatch &&
+				!endBatchDueToSmallerTail &&
+				okToCoalesceWithSentinel {
 				// msgs[base].Buffers[0] is set to buff[i] when a new base is set.
 				// This appends a struct iovec element in the underlying struct msghdr (scatter-gather).
 				msgs[base].Buffers = append(msgs[base].Buffers, buff)
-				if i == len(buffs)-1 {
-					setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
-				}
 				dgramCnt++
 				coalescedLen += msgLen
 				if msgLen < gsoSize {
 					// A smaller than gsoSize packet on the tail is legal, but
 					// it must end the batch.
-					endBatch = true
+					endBatchDueToSmallerTail = true
+				}
+				if i == len(buffs)-1 {
+					maybeAppendSentinelTail()
+					setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
 				}
 				continue
 			}
 		}
 		if dgramCnt > 1 {
+			maybeAppendSentinelTail()
 			setGSOSizeInControl(&msgs[base].OOB, uint16(gsoSize))
 		}
 		// Reset prior to incrementing base since we are preparing to start a
 		// new potential batch.
-		endBatch = false
+		endBatchDueToSmallerTail = false
 		base++
 		gsoSize = len(buff)
 		msgs[base].OOB = msgs[base].OOB[:0]
@@ -199,6 +251,27 @@ func (c *linuxBatchingConn) putSendBatch(batch *sendBatch) {
 	c.sendBatchPool.Put(batch)
 }
 
+// appendSentinelTailBatchSizeThreshold represents the minimum batch size
+// required to enter [linuxBatchingConn.coalesceMessages] when
+// [linuxBatchingConn.neverGSOEqualTail] is set. If the batch of packets is less
+// than this value, and neverGSOEqualTail is set, we avoid UDP GSO altogether.
+// Appending a sentinel packet, regardless of size, is still overhead on sender,
+// middle network, and receiver.
+//
+// Coalescing (UDP GSO) greatly improves performance for sender (and receiver if
+// they support UDP GRO), but there are diminishing returns if batches are small.
+// We attempt to balance these diminishing returns against the introduction of
+// dead-weight sentinel packets.
+//
+// The initial value of 8 is a power of 2, and in the worst case leads to 6%
+// payload overhead if the batch is made up of minimum-sized WireGuard transport
+// messages (empty payload keepalives). Worst case is unlikely.
+//
+// 8 * (20 bytes IPv4 header + 8 byte UDP header + 32 byte WG message) = 480 bytes
+// sentinel tail is 20 byte IPv4 header + 8 byte UDP header + 1 byte payload = 29 bytes
+// 29/480 = 0.060...
+const appendSentinelTailBatchSizeThreshold = 8
+
 func (c *linuxBatchingConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort, geneve packet.GeneveHeader, offset int) error {
 	batch := c.getSendBatch()
 	defer c.putSendBatch(batch)
@@ -212,13 +285,16 @@ func (c *linuxBatchingConn) WriteBatchTo(buffs [][]byte, addr netip.AddrPort, ge
 		batch.ua.IP = batch.ua.IP[:4]
 	}
 	batch.ua.Port = int(addr.Port())
+	// Load the control knob once per write so a single call sees a consistent
+	// value even if the knob flips concurrently.
+	neverGSOEqualTail := c.neverGSOEqualTail != nil && c.neverGSOEqualTail.Load()
 	var (
 		n       int
 		retried bool
 	)
 retry:
-	if c.txOffload.Load() {
-		n = c.coalesceMessages(batch.ua, geneve, buffs, batch.msgs, offset)
+	if c.txOffload.Load() && (!neverGSOEqualTail || len(buffs) >= appendSentinelTailBatchSizeThreshold) {
+		n = c.coalesceMessages(batch.ua, geneve, buffs, batch.msgs, offset, neverGSOEqualTail)
 	} else {
 		vniIsSet := geneve.VNI.IsSet()
 		if vniIsSet {
@@ -535,7 +611,8 @@ func TryUpgradeToConn(pconn nettype.PacketConn, network string, batchSize int, r
 	if network != "udp4" && network != "udp6" {
 		return pconn
 	}
-	if strings.HasPrefix(hostinfo.GetOSVersion(), "2.") {
+	osVer := hostinfo.GetOSVersion()
+	if strings.HasPrefix(osVer, "2.") {
 		// recvmmsg/sendmmsg were added in 2.6.33, but we support down to
 		// 2.6.32 for old NAS devices. See https://github.com/tailscale/tailscale/issues/6807.
 		// As a cheap heuristic: if the Linux kernel starts with "2", just
@@ -579,6 +656,9 @@ func TryUpgradeToConn(pconn nettype.PacketConn, network string, batchSize int, r
 	var txOffload bool
 	txOffload, b.rxOffload = tryEnableUDPOffload(uc, knobs)
 	b.txOffload.Store(txOffload)
+	if knobs != nil {
+		b.neverGSOEqualTail = &knobs.NeverGSOEqualTail
+	}
 	if len(rxqOverflowsMetricName) > 0 && tryEnableRXQOverflowsCounter(uc) {
 		// Don't register the metric unless the socket option has been
 		// successfully set, otherwise we will report a misleading zero value
diff --git a/net/batching/conn_linux_test.go b/net/batching/conn_linux_test.go
index fa4eef33c..857c3d9d7 100644
--- a/net/batching/conn_linux_test.go
+++ b/net/batching/conn_linux_test.go
@@ -140,8 +140,6 @@ func Test_linuxBatchingConn_splitCoalescedMessages(t *testing.T) {
 }
 
 func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
-	c := &linuxBatchingConn{}
-
 	withGeneveSpace := func(len, cap int) []byte {
 		return make([]byte, len+packet.GeneveFixedHeaderLength, cap+packet.GeneveFixedHeaderLength)
 	}
@@ -152,13 +150,17 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
 	geneve.VNI.Set(1)
 
 	cases := []struct {
-		name   string
-		buffs  [][]byte
-		geneve packet.GeneveHeader
+		name              string
+		buffs             [][]byte
+		geneve            packet.GeneveHeader
+		neverGSOEqualTail bool
 		// Each wantLens slice corresponds to the Buffers of a single coalesced message,
 		// and each int is the expected length of the corresponding Buffer[i].
 		wantLens [][]int
 		wantGSO  []int
+		// wantSentinelAtTail[i], when true, asserts that the tail entry of
+		// msgs[i].Buffers is the shared neverGSOEqualTailSentinelPayload slice.
+		wantSentinelAtTail []bool
 	}{
 		{
 			name: "one-message-no-coalesce",
@@ -257,10 +259,113 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
 			wantLens: [][]int{{2 + packet.GeneveFixedHeaderLength, 2 + packet.GeneveFixedHeaderLength, 2 + packet.GeneveFixedHeaderLength}},
 			wantGSO:  []int{2 + packet.GeneveFixedHeaderLength},
 		},
+		{
+			name: "two-equal-len-coalesce-neverGSOEqualTail-appends-sentinel",
+			buffs: [][]byte{
+				withGeneveSpace(3, 3),
+				withGeneveSpace(3, 3),
+			},
+			neverGSOEqualTail:  true,
+			wantLens:           [][]int{{3, 3, len(neverGSOEqualTailSentinelPayload)}},
+			wantGSO:            []int{3},
+			wantSentinelAtTail: []bool{true},
+		},
+		{
+			name: "two-equal-len-coalesce-neverGSOEqualTail-vni-isSet-appends-sentinel",
+			buffs: [][]byte{
+				withGeneveSpace(3, 3+packet.GeneveFixedHeaderLength),
+				withGeneveSpace(3, 3),
+			},
+			geneve:             geneve,
+			neverGSOEqualTail:  true,
+			wantLens:           [][]int{{3 + packet.GeneveFixedHeaderLength, 3 + packet.GeneveFixedHeaderLength, len(neverGSOEqualTailSentinelPayload)}},
+			wantGSO:            []int{3 + packet.GeneveFixedHeaderLength},
+			wantSentinelAtTail: []bool{true},
+		},
+		{
+			name: "two-unequal-len-coalesce-neverGSOEqualTail-smaller-tail-no-sentinel",
+			buffs: [][]byte{
+				withGeneveSpace(3, 3),
+				withGeneveSpace(2, 2),
+			},
+			neverGSOEqualTail: true,
+			wantLens:          [][]int{{3, 2}},
+			wantGSO:           []int{3},
+		},
+		{
+			name: "one-byte-tail-neverGSOEqualTail-not-coalesced",
+			// okToCoalesceWithSentinel is false when msgLen == 1 and
+			// neverGSOEqualTail is set; the 1-byte tail is split into
+			// its own non-coalesced singleton msg.
+			buffs: [][]byte{
+				withGeneveSpace(2, 2),
+				withGeneveSpace(1, 1),
+			},
+			neverGSOEqualTail: true,
+			wantLens:          [][]int{{2}, {1}},
+			wantGSO:           []int{0, 0},
+		},
+		{
+			name: "one-byte-tail-neverGSOEqualTail-vni-isSet-coalesced",
+			// With vniIsSet, msgLen always includes the Geneve header, so
+			// okToCoalesceWithSentinel is true even for "1-byte payloads".
+			// The naturally smaller tail short-circuits the sentinel.
+			buffs: [][]byte{
+				withGeneveSpace(2, 2+packet.GeneveFixedHeaderLength),
+				withGeneveSpace(1, 1),
+			},
+			geneve:            geneve,
+			neverGSOEqualTail: true,
+			wantLens:          [][]int{{2 + packet.GeneveFixedHeaderLength, 1 + packet.GeneveFixedHeaderLength}},
+			wantGSO:           []int{2 + packet.GeneveFixedHeaderLength},
+		},
+		{
+			name: "batch-boundary-sentinel-appended-on-prior-batch-neverGSOEqualTail",
+			// The 4th buff (length 5) is larger than gsoSize=3 so it
+			// closes the first batch. The first batch has dgramCnt > 1 and
+			// no smaller tail, so the sentinel is appended before starting
+			// the new batch.
+			buffs: [][]byte{
+				withGeneveSpace(3, 3),
+				withGeneveSpace(3, 3),
+				withGeneveSpace(3, 3),
+				withGeneveSpace(5, 5),
+			},
+			neverGSOEqualTail:  true,
+			wantLens:           [][]int{{3, 3, 3, len(neverGSOEqualTailSentinelPayload)}, {5}},
+			wantGSO:            []int{3, 0},
+			wantSentinelAtTail: []bool{true, false},
+		},
+		{
+			name: "single-buff-neverGSOEqualTail-no-sentinel",
+			// Only one datagram, no GSO happening, no sentinel.
+			buffs: [][]byte{
+				withGeneveSpace(3, 3),
+			},
+			neverGSOEqualTail: true,
+			wantLens:          [][]int{{3}},
+			wantGSO:           []int{0},
+		},
+		{
+			name: "equal-len-then-smaller-tail-then-equal-neverGSOEqualTail",
+			// The smaller tail ends the first batch with no sentinel
+			// (variation already provided), then a second singleton batch
+			// is started for the trailing equal-length buff.
+			buffs: [][]byte{
+				withGeneveSpace(3, 3),
+				withGeneveSpace(3, 3),
+				withGeneveSpace(2, 2),
+				withGeneveSpace(3, 3),
+			},
+			neverGSOEqualTail: true,
+			wantLens:          [][]int{{3, 3, 2}, {3}},
+			wantGSO:           []int{3, 0},
+		},
 	}
 
 	for _, tt := range cases {
 		t.Run(tt.name, func(t *testing.T) {
+			c := &linuxBatchingConn{}
 			addr := &net.UDPAddr{
 				IP:   net.ParseIP("127.0.0.1"),
 				Port: 1,
@@ -270,7 +375,7 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
 				msgs[i].Buffers = make([][]byte, 1)
 				msgs[i].OOB = make([]byte, controlMessageSize)
 			}
-			got := c.coalesceMessages(addr, tt.geneve, tt.buffs, msgs, packet.GeneveFixedHeaderLength)
+			got := c.coalesceMessages(addr, tt.geneve, tt.buffs, msgs, packet.GeneveFixedHeaderLength, tt.neverGSOEqualTail)
 			if got != len(tt.wantLens) {
 				t.Fatalf("got len %d want: %d", got, len(tt.wantLens))
 			}
@@ -288,6 +393,15 @@ func Test_linuxBatchingConn_coalesceMessages(t *testing.T) {
 					}
 				}
 
+				wantSentinel := i < len(tt.wantSentinelAtTail) && tt.wantSentinelAtTail[i]
+				if wantSentinel {
+					tail := msgs[i].Buffers[len(msgs[i].Buffers)-1]
+					if len(tail) != len(neverGSOEqualTailSentinelPayload) ||
+						&tail[0] != &neverGSOEqualTailSentinelPayload[0] {
+						t.Errorf("msgs[%d] tail buffer is not neverGSOEqualTailSentinelPayload", i)
+					}
+				}
+
 				// coalesceMessages calls setGSOSizeInControl, which uses a cmsg
 				// type of UDP_SEGMENT, and getGSOSizeInControl scans for a cmsg
 				// type of UDP_GRO. Therefore, we have to use the lower-level
diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go
index 65d4fcdd5..96ae15f5c 100644
--- a/tailcfg/tailcfg.go
+++ b/tailcfg/tailcfg.go
@@ -187,7 +187,8 @@
 //   - 138: 2026-03-31: can handle C2N /debug/tka.
 //   - 139: 2026-05-22: Client understands [NodeAttrEmitRuntimeMetrics]
 //   - 140: 2026-05-27: Client understands [NodeAttrDisableUDPGRO], [NodeAttrDisableUDPGSO], [NodeAttrDisableTUNUDPGRO], [NodeAttrDisableTUNTCPGRO]
-const CurrentCapabilityVersion CapabilityVersion = 140
+//   - 141: 2026-05-28: Client understands [NodeAttrNeverGSOEqualTail]
+const CurrentCapabilityVersion CapabilityVersion = 141
 
 // ID is an integer ID for a user, node, or login allocated by the
 // control plane.
@@ -2830,6 +2831,15 @@ func (p NodeCapabilityPrefix) ToAttribute(value string) NodeCapability {
 	// Currently only consulted on Linux; may apply to other platforms as they
 	// gain TUN TCP GRO support.
 	NodeAttrDisableTUNTCPGRO NodeCapability = "disable-tun-tcp-gro"
+
+	// NodeAttrNeverGSOEqualTail enables a sentinel-tail workaround in the
+	// underlay UDP packet TX path on Linux. Applies to magicsock and peer relay
+	// UDP sockets. The workaround avoids emitting UDP GSO batches whose
+	// fragments are all equal in length, at a small payload and packet overhead
+	// cost. It exists so control can mitigate kernel regressions that mangle
+	// UDP headers or checksums for equal-length GSO batches, without requiring
+	// a client release. See https://github.com/tailscale/tailscale/issues/19777.
+	NodeAttrNeverGSOEqualTail NodeCapability = "never-gso-equal-tail"
 )
 
 const (