diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 5d3bbd36e..54fb0dccb 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -582,7 +582,7 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo // Call our linkChange code once with the current state. // Following changes are triggered via the eventbus. - cd, err := netmon.NewChangeDelta(nil, b.interfaceState, false, false) + cd, err := netmon.NewChangeDelta(nil, b.interfaceState, 0, false) if err != nil { b.logf("[unexpected] setting initial netmon state failed: %v", err) } else { diff --git a/net/netmon/loghelper_test.go b/net/netmon/loghelper_test.go index aec520644..64231e595 100644 --- a/net/netmon/loghelper_test.go +++ b/net/netmon/loghelper_test.go @@ -64,7 +64,7 @@ func syncTestLinkChangeLogLimiter(t *testing.T) { // InjectEvent doesn't work because it's not a major event, so we // instead inject the event ourselves. injector := eventbustest.NewInjector(t, bus) - cd, err := NewChangeDelta(nil, &State{}, true, true) + cd, err := NewChangeDelta(nil, &State{}, 0, true) if err != nil { t.Fatal(err) } diff --git a/net/netmon/netmon.go b/net/netmon/netmon.go index 1d51379d8..a7120cdd3 100644 --- a/net/netmon/netmon.go +++ b/net/netmon/netmon.go @@ -13,6 +13,7 @@ "net/netip" "runtime" "slices" + "strings" "sync" "time" @@ -31,6 +32,15 @@ // us check the wall time sooner than this. const pollWallTimeInterval = 15 * time.Second +// majorTimeJumpThreshold is the minimum sleep duration that warrants +// treating a time jump as a major event requiring socket rebinding, +// even if the interface state appears unchanged. After a long sleep, +// NAT mappings are likely stale and DHCP leases may have expired +// (the renewal happens after wake, so local state may not yet reflect it). +// Short sleeps (e.g., macOS DarkWake maintenance cycles of ~55s) should +// not trigger rebinding if the network state is unchanged. +const majorTimeJumpThreshold = 10 * time.Minute + // message represents a message returned from an osMon. type message interface { // Ignore is whether we should ignore this message. @@ -67,18 +77,18 @@ type Monitor struct { stop chan struct{} // closed on Stop static bool // static Monitor that doesn't actually monitor - mu syncs.Mutex // guards all following fields - cbs set.HandleSet[ChangeFunc] - ifState *State - gwValid bool // whether gw and gwSelfIP are valid - gw netip.Addr // our gateway's IP - gwSelfIP netip.Addr // our own IP address (that corresponds to gw) - started bool - closed bool - goroutines sync.WaitGroup - wallTimer *time.Timer // nil until Started; re-armed AfterFunc per tick - lastWall time.Time - timeJumped bool // whether we need to send a changed=true after a big time jump + mu syncs.Mutex // guards all following fields + cbs set.HandleSet[ChangeFunc] + ifState *State + gwValid bool // whether gw and gwSelfIP are valid + gw netip.Addr // our gateway's IP + gwSelfIP netip.Addr // our own IP address (that corresponds to gw) + started bool + closed bool + goroutines sync.WaitGroup + wallTimer *time.Timer // nil until Started; re-armed AfterFunc per tick + lastWall time.Time + jumpDuration time.Duration // wall-clock time elapsed during detected time jump; 0 if no time jump observed since reset } // ChangeFunc is a callback function registered with Monitor that's called when the @@ -97,10 +107,12 @@ type ChangeDelta struct { // It is always non-nil. new *State - // TimeJumped is whether there was a big jump in wall time since the last - // time we checked. This is a hint that a sleeping device might have - // come out of sleep. - TimeJumped bool + // JumpDuration is non-zero when a wall-clock time jump was detected, + // indicating the machine likely just woke from sleep. It is approximately + // how long the machine was asleep (the wall-clock delta since the last + // check, not an exact sleep measurement). Use TimeJumped() to check + // whether a time jump occurred. + JumpDuration time.Duration DefaultRouteInterface string @@ -121,19 +133,28 @@ type ChangeDelta struct { RebindLikelyRequired bool } +// TimeJumped reports whether a wall-clock time jump was detected, +// indicating the machine likely just woke from sleep. When true, +// JumpDuration contains the approximate duration. +func (cd *ChangeDelta) TimeJumped() bool { + return cd.JumpDuration > 0 +} + // CurrentState returns the current (new) state after the change. func (cd *ChangeDelta) CurrentState() *State { return cd.new } // NewChangeDelta builds a ChangeDelta and eagerly computes the cached fields. +// jumpDuration, if non-zero, indicates a wall-clock time jump was detected +// (the machine likely woke from sleep) and is the approximate duration of the jump. // forceViability, if true, forces DefaultInterfaceMaybeViable to be true regardless of the // actual state of the default interface. This is useful in testing. -func NewChangeDelta(old, new *State, timeJumped bool, forceViability bool) (*ChangeDelta, error) { +func NewChangeDelta(old, new *State, jumpDuration time.Duration, forceViability bool) (*ChangeDelta, error) { cd := ChangeDelta{ - old: old, - new: new, - TimeJumped: timeJumped, + old: old, + new: new, + JumpDuration: jumpDuration, } if cd.new == nil { @@ -165,10 +186,18 @@ func NewChangeDelta(old, new *State, timeJumped bool, forceViability bool) (*Cha cd.DefaultInterfaceMaybeViable = true } - // Compute rebind requirement. The default interface needs to be viable and + // Compute rebind requirement. The default interface needs to be viable and // one of the other conditions needs to be true. + // + // Short time jumps (e.g., macOS DarkWake maintenance cycles of ~55s) are + // excluded — if the network state is unchanged after a brief sleep, there's + // no reason to rebind. However, a major time jump (over majorTimeJumpThreshold) + // warrants a rebind even if the local state looks the same, because NAT + // mappings are likely stale and DHCP leases may have changed (the renewal + // happens after wake, so local state may not yet reflect it). + majorTimeJump := cd.JumpDuration >= majorTimeJumpThreshold cd.RebindLikelyRequired = (cd.old == nil || - cd.TimeJumped || + majorTimeJump || cd.DefaultInterfaceChanged || cd.InterfaceIPsChanged || cd.IsLessExpensive || @@ -181,7 +210,39 @@ func NewChangeDelta(old, new *State, timeJumped bool, forceViability bool) (*Cha // StateDesc returns a description of the old and new states for logging. func (cd *ChangeDelta) StateDesc() string { - return fmt.Sprintf("old: %v new: %v", cd.old, cd.new) + var sb strings.Builder + fmt.Fprintf(&sb, "old: %v new: %v", cd.old, cd.new) + if cd.old != nil && cd.new != nil { + if diff := cd.old.InterfaceDiff(cd.new); diff != "" { + fmt.Fprintf(&sb, " diff: %s", diff) + } + } + if cd.RebindLikelyRequired { + var reasons []string + if cd.old == nil { + reasons = append(reasons, "initial-state") + } + if cd.TimeJumped() { + reasons = append(reasons, fmt.Sprintf("time-jumped(%v)", cd.JumpDuration.Round(time.Second))) + } + if cd.DefaultInterfaceChanged { + reasons = append(reasons, "default-if-changed") + } + if cd.InterfaceIPsChanged { + reasons = append(reasons, "ips-changed") + } + if cd.IsLessExpensive { + reasons = append(reasons, "less-expensive") + } + if cd.HasPACOrProxyConfigChanged { + reasons = append(reasons, "pac-proxy-changed") + } + if cd.AvailableProtocolsChanged { + reasons = append(reasons, "protocols-changed") + } + fmt.Fprintf(&sb, " rebind-reason=[%s]", strings.Join(reasons, ",")) + } + return sb.String() } // InterfaceIPDisappeared reports whether the given IP address exists on any interface @@ -245,7 +306,12 @@ func (cd *ChangeDelta) isInterestingInterfaceChange() bool { } newIps = filterRoutableIPs(newIps) - if !oldInterface.Equal(newInterface) || !prefixesEqual(oldIps, newIps) { + // Only consider routable IP changes and up/down state transitions + // as interesting. Transient metadata changes (Flags like FlagRunning, + // MTU, etc.) should not trigger a major link change, as they cause + // false "major" events on macOS and Windows when the OS notifies us + // of interface changes that don't affect connectivity. + if oldInterface.IsUp() != newInterface.IsUp() || !prefixesEqual(oldIps, newIps) { return true } } @@ -277,8 +343,8 @@ func (cd *ChangeDelta) isInterestingInterfaceChange() bool { } oldIps = filterRoutableIPs(oldIps) - // The interface's IPs, Name, MTU, etc have changed. This is definitely interesting. - if !newInterface.Equal(oldInterface) || !prefixesEqual(oldIps, newIps) { + // Only consider routable IP changes and up/down state transitions. + if newInterface.IsUp() != oldInterface.IsUp() || !prefixesEqual(oldIps, newIps) { return true } } @@ -574,7 +640,8 @@ func (m *Monitor) handlePotentialChange(newState *State, forceCallbacks bool) { return } - delta, err := NewChangeDelta(oldState, newState, timeJumped, false) + jumpDuration := m.jumpDuration + delta, err := NewChangeDelta(oldState, newState, jumpDuration, false) if err != nil { m.logf("[unexpected] error creating ChangeDelta: %v", err) return @@ -587,12 +654,13 @@ func (m *Monitor) handlePotentialChange(newState *State, forceCallbacks bool) { // See if we have a queued or new time jump signal. if timeJumped { m.resetTimeJumpedLocked() + m.logf("time jump detected (slept %v), probably wake from sleep", jumpDuration.Round(time.Second)) } metricChange.Add(1) if delta.RebindLikelyRequired { metricChangeMajor.Add(1) } - if delta.TimeJumped { + if delta.TimeJumped() { metricChangeTimeJump.Add(1) } m.changed.Publish(*delta) @@ -654,14 +722,14 @@ func (m *Monitor) checkWallTimeAdvanceLocked() bool { panic("unreachable") // if callers are correct } now := wallTime() - if now.Sub(m.lastWall) > pollWallTimeInterval*3/2 { - m.timeJumped = true // it is reset by debounce. + if elapsed := now.Sub(m.lastWall); elapsed > pollWallTimeInterval*3/2 { + m.jumpDuration = elapsed } m.lastWall = now - return m.timeJumped + return m.jumpDuration != 0 } // resetTimeJumpedLocked consumes the signal set by checkWallTimeAdvanceLocked. func (m *Monitor) resetTimeJumpedLocked() { - m.timeJumped = false + m.jumpDuration = 0 } diff --git a/net/netmon/netmon_test.go b/net/netmon/netmon_test.go index 97c203274..a3ba4e03e 100644 --- a/net/netmon/netmon_test.go +++ b/net/netmon/netmon_test.go @@ -473,6 +473,98 @@ func TestRebindRequired(t *testing.T) { }, want: false, }, + { + name: "interface-flags-changed-no-ip-change", + s1: &State{ + DefaultRouteInterface: "en0", + Interface: map[string]Interface{ + "en0": {Interface: &net.Interface{ + Name: "en0", + Flags: net.FlagUp | net.FlagBroadcast | net.FlagMulticast | net.FlagRunning, + }}, + }, + InterfaceIPs: map[string][]netip.Prefix{ + "en0": {netip.MustParsePrefix("10.0.0.12/24")}, + }, + HaveV4: true, + }, + s2: &State{ + DefaultRouteInterface: "en0", + Interface: map[string]Interface{ + "en0": {Interface: &net.Interface{ + Name: "en0", + Flags: net.FlagUp | net.FlagBroadcast | net.FlagMulticast, // FlagRunning removed + }}, + }, + InterfaceIPs: map[string][]netip.Prefix{ + "en0": {netip.MustParsePrefix("10.0.0.12/24")}, + }, + HaveV4: true, + }, + want: false, + }, + { + name: "interface-mtu-changed-no-ip-change", + s1: &State{ + DefaultRouteInterface: "en0", + Interface: map[string]Interface{ + "en0": {Interface: &net.Interface{ + Name: "en0", + Flags: net.FlagUp | net.FlagBroadcast | net.FlagMulticast | net.FlagRunning, + MTU: 1500, + }}, + }, + InterfaceIPs: map[string][]netip.Prefix{ + "en0": {netip.MustParsePrefix("10.0.0.12/24")}, + }, + HaveV4: true, + }, + s2: &State{ + DefaultRouteInterface: "en0", + Interface: map[string]Interface{ + "en0": {Interface: &net.Interface{ + Name: "en0", + Flags: net.FlagUp | net.FlagBroadcast | net.FlagMulticast | net.FlagRunning, + MTU: 9000, + }}, + }, + InterfaceIPs: map[string][]netip.Prefix{ + "en0": {netip.MustParsePrefix("10.0.0.12/24")}, + }, + HaveV4: true, + }, + want: false, + }, + { + name: "interface-went-down", + s1: &State{ + DefaultRouteInterface: "en0", + Interface: map[string]Interface{ + "en0": {Interface: &net.Interface{ + Name: "en0", + Flags: net.FlagUp | net.FlagBroadcast | net.FlagMulticast | net.FlagRunning, + }}, + }, + InterfaceIPs: map[string][]netip.Prefix{ + "en0": {netip.MustParsePrefix("10.0.0.12/24")}, + }, + HaveV4: true, + }, + s2: &State{ + DefaultRouteInterface: "en0", + Interface: map[string]Interface{ + "en0": {Interface: &net.Interface{ + Name: "en0", + Flags: net.FlagBroadcast | net.FlagMulticast, // FlagUp removed + }}, + }, + InterfaceIPs: map[string][]netip.Prefix{ + "en0": {netip.MustParsePrefix("10.0.0.12/24")}, + }, + HaveV4: true, + }, + want: true, + }, } withIsInterestingInterface(t, func(ni Interface, pfxs []netip.Prefix) bool { @@ -498,7 +590,7 @@ func TestRebindRequired(t *testing.T) { } SetTailscaleInterfaceProps(tt.tsIfName, 1) - cd, err := NewChangeDelta(tt.s1, tt.s2, false, true) + cd, err := NewChangeDelta(tt.s1, tt.s2, 0, true) if err != nil { t.Fatalf("NewChangeDelta error: %v", err) } @@ -510,6 +602,71 @@ func TestRebindRequired(t *testing.T) { } } +func TestTimeJumpedDoesNotTriggerRebind(t *testing.T) { + s := &State{ + DefaultRouteInterface: "en0", + Interface: map[string]Interface{ + "en0": {Interface: &net.Interface{ + Name: "en0", + Flags: net.FlagUp | net.FlagBroadcast | net.FlagMulticast | net.FlagRunning, + }}, + }, + InterfaceIPs: map[string][]netip.Prefix{ + "en0": {netip.MustParsePrefix("10.0.0.12/24")}, + }, + HaveV4: true, + } + + // A short time jump (e.g., macOS DarkWake maintenance cycle ~55s) + // with unchanged network state should NOT trigger rebind. + cd, err := NewChangeDelta(s, s, 55*time.Second, true) + if err != nil { + t.Fatalf("NewChangeDelta error: %v", err) + } + if cd.RebindLikelyRequired { + t.Error("RebindLikelyRequired = true for short time jump with unchanged state; want false") + } + if !cd.TimeJumped() { + t.Error("TimeJumped = false; want true") + } + + // A major time jump (>10m) with unchanged state SHOULD trigger rebind, + // because NAT mappings are likely stale. + cd2, err := NewChangeDelta(s, s, 2*time.Hour, true) + if err != nil { + t.Fatalf("NewChangeDelta error: %v", err) + } + if !cd2.RebindLikelyRequired { + t.Error("RebindLikelyRequired = false for major time jump (2h); want true") + } + + // A short time jump with changed state SHOULD trigger rebind. + s2 := &State{ + DefaultRouteInterface: "en0", + Interface: map[string]Interface{ + "en0": {Interface: &net.Interface{ + Name: "en0", + Flags: net.FlagUp | net.FlagBroadcast | net.FlagMulticast | net.FlagRunning, + }}, + }, + InterfaceIPs: map[string][]netip.Prefix{ + "en0": {netip.MustParsePrefix("10.0.0.99/24")}, // IP changed + }, + HaveV4: true, + } + + saveAndRestoreTailscaleIfaceProps(t) + SetTailscaleInterfaceProps("", 0) + + cd3, err := NewChangeDelta(s, s2, 55*time.Second, true) + if err != nil { + t.Fatalf("NewChangeDelta error: %v", err) + } + if !cd3.RebindLikelyRequired { + t.Error("RebindLikelyRequired = false for time jump with changed IP; want true") + } +} + func saveAndRestoreTailscaleIfaceProps(t *testing.T) { t.Helper() index, _ := TailscaleInterfaceIndex() @@ -612,6 +769,71 @@ func TestPrefixesEqual(t *testing.T) { } } +func TestInterfaceDiff(t *testing.T) { + tests := []struct { + name string + s1, s2 *State + wantDiff string // substring expected in diff output; "" means no diff + }{ + { + name: "equal", + s1: &State{HaveV4: true, DefaultRouteInterface: "en0"}, + s2: &State{HaveV4: true, DefaultRouteInterface: "en0"}, + wantDiff: "", + }, + { + name: "flags-changed", + s1: &State{ + DefaultRouteInterface: "en0", + Interface: map[string]Interface{ + "en0": {Interface: &net.Interface{ + Name: "en0", + Flags: net.FlagUp | net.FlagRunning, + }}, + }, + }, + s2: &State{ + DefaultRouteInterface: "en0", + Interface: map[string]Interface{ + "en0": {Interface: &net.Interface{ + Name: "en0", + Flags: net.FlagUp, + }}, + }, + }, + wantDiff: "flags", + }, + { + name: "mtu-changed", + s1: &State{ + Interface: map[string]Interface{ + "en0": {Interface: &net.Interface{Name: "en0", MTU: 1500}}, + }, + }, + s2: &State{ + Interface: map[string]Interface{ + "en0": {Interface: &net.Interface{Name: "en0", MTU: 9000}}, + }, + }, + wantDiff: "MTU", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.s1.InterfaceDiff(tt.s2) + if tt.wantDiff == "" { + if got != "" { + t.Errorf("InterfaceDiff = %q; want empty", got) + } + } else { + if !strings.Contains(got, tt.wantDiff) { + t.Errorf("InterfaceDiff = %q; want substring %q", got, tt.wantDiff) + } + } + }) + } +} + func TestForeachInterface(t *testing.T) { tests := []struct { name string diff --git a/net/netmon/state.go b/net/netmon/state.go index 98ed52e5e..94554dcc3 100644 --- a/net/netmon/state.go +++ b/net/netmon/state.go @@ -443,6 +443,91 @@ func (a Interface) Equal(b Interface) bool { return true } +// InterfaceDiff returns a human-readable summary of the differences between s +// and s2 that would cause Equal to return false. It returns "" if the states +// are equal. This is useful for debugging false link change events where the +// State.String() output looks identical but Equal() returns false because it +// checks fields not shown in String() (like interface Flags, MTU, HardwareAddr). +func (s *State) InterfaceDiff(s2 *State) string { + if s == nil && s2 == nil { + return "" + } + if s == nil { + return "old=nil" + } + if s2 == nil { + return "new=nil" + } + var diffs []string + if s.HaveV6 != s2.HaveV6 { + diffs = append(diffs, fmt.Sprintf("HaveV6: %v->%v", s.HaveV6, s2.HaveV6)) + } + if s.HaveV4 != s2.HaveV4 { + diffs = append(diffs, fmt.Sprintf("HaveV4: %v->%v", s.HaveV4, s2.HaveV4)) + } + if s.IsExpensive != s2.IsExpensive { + diffs = append(diffs, fmt.Sprintf("IsExpensive: %v->%v", s.IsExpensive, s2.IsExpensive)) + } + if s.DefaultRouteInterface != s2.DefaultRouteInterface { + diffs = append(diffs, fmt.Sprintf("DefaultRoute: %q->%q", s.DefaultRouteInterface, s2.DefaultRouteInterface)) + } + if s.HTTPProxy != s2.HTTPProxy { + diffs = append(diffs, fmt.Sprintf("HTTPProxy: %q->%q", s.HTTPProxy, s2.HTTPProxy)) + } + if s.PAC != s2.PAC { + diffs = append(diffs, fmt.Sprintf("PAC: %q->%q", s.PAC, s2.PAC)) + } + if len(s.Interface) != len(s2.Interface) { + diffs = append(diffs, fmt.Sprintf("numInterfaces: %d->%d", len(s.Interface), len(s2.Interface))) + } + if len(s.InterfaceIPs) != len(s2.InterfaceIPs) { + diffs = append(diffs, fmt.Sprintf("numInterfaceIPs: %d->%d", len(s.InterfaceIPs), len(s2.InterfaceIPs))) + } + for iname, i := range s.Interface { + i2, ok := s2.Interface[iname] + if !ok { + diffs = append(diffs, fmt.Sprintf("if %s: removed", iname)) + continue + } + if !i.Equal(i2) { + if i.Interface != nil && i2.Interface != nil { + if i.Flags != i2.Flags { + diffs = append(diffs, fmt.Sprintf("if %s flags: %v->%v", iname, i.Flags, i2.Flags)) + } + if i.MTU != i2.MTU { + diffs = append(diffs, fmt.Sprintf("if %s MTU: %d->%d", iname, i.MTU, i2.MTU)) + } + if i.Index != i2.Index { + diffs = append(diffs, fmt.Sprintf("if %s index: %d->%d", iname, i.Index, i2.Index)) + } + if !bytes.Equal([]byte(i.HardwareAddr), []byte(i2.HardwareAddr)) { + diffs = append(diffs, fmt.Sprintf("if %s hwaddr: %v->%v", iname, i.HardwareAddr, i2.HardwareAddr)) + } + } + if i.Desc != i2.Desc { + diffs = append(diffs, fmt.Sprintf("if %s desc: %q->%q", iname, i.Desc, i2.Desc)) + } + } + } + for iname := range s2.Interface { + if _, ok := s.Interface[iname]; !ok { + diffs = append(diffs, fmt.Sprintf("if %s: added", iname)) + } + } + for iname, vv := range s.InterfaceIPs { + vv2 := s2.InterfaceIPs[iname] + if !slices.Equal(vv, vv2) { + diffs = append(diffs, fmt.Sprintf("ips %s: %v->%v", iname, vv, vv2)) + } + } + for iname := range s2.InterfaceIPs { + if _, ok := s.InterfaceIPs[iname]; !ok { + diffs = append(diffs, fmt.Sprintf("ips %s: added %v", iname, s2.InterfaceIPs[iname])) + } + } + return strings.Join(diffs, "; ") +} + func (s *State) HasPAC() bool { return s != nil && s.PAC != "" } // AnyInterfaceUp reports whether any interface seems like it has Internet access.