From a694ff682b90bb46321b2815e83e73fcf750d52e Mon Sep 17 00:00:00 2001 From: chaosinthecrd Date: Thu, 2 Apr 2026 14:07:50 +0100 Subject: [PATCH] kube/authkey,kube/state,cmd/containerboot: preserve device_id across restarts Stop clearing device_id, device_fqdn, and device_ips from state on startup. These keys are now preserved across restarts so the operator can track device identity. Expand ClearReissueAuthKey to clear device state and tailscaled profile data when performing a full auth key reissue. Updates #14080 Signed-off-by: chaosinthecrd --- cmd/containerboot/kube.go | 3 +++ cmd/containerboot/kube_test.go | 12 ------------ kube/authkey/authkey.go | 19 ++++++++++++++++++- kube/authkey/authkey_test.go | 19 ++++++++++++++++++- kube/state/state.go | 13 +------------ kube/state/state_test.go | 6 +++--- 6 files changed, 43 insertions(+), 29 deletions(-) diff --git a/cmd/containerboot/kube.go b/cmd/containerboot/kube.go index c7a66efc2..3e97710da 100644 --- a/cmd/containerboot/kube.go +++ b/cmd/containerboot/kube.go @@ -127,6 +127,9 @@ func (kc *kubeClient) deleteAuthKey(ctx context.Context) error { // resetContainerbootState resets state from previous runs of containerboot to // ensure the operator doesn't use stale state when a Pod is first recreated. +// +// Device identity keys (device_id, device_fqdn, device_ips) are preserved so +// the operator can clean up the old device from the control plane. func (kc *kubeClient) resetContainerbootState(ctx context.Context, podUID string, tailscaledConfigAuthkey string) error { existingSecret, err := kc.GetSecret(ctx, kc.stateSecret) switch { diff --git a/cmd/containerboot/kube_test.go b/cmd/containerboot/kube_test.go index 6acaa60e1..b6b750334 100644 --- a/cmd/containerboot/kube_test.go +++ b/cmd/containerboot/kube_test.go @@ -303,9 +303,6 @@ func TestResetContainerbootState(t *testing.T) { kubetypes.KeyCapVer: capver, kubetypes.KeyPodUID: []byte("1234"), // Cleared keys. - kubetypes.KeyDeviceID: nil, - kubetypes.KeyDeviceFQDN: nil, - kubetypes.KeyDeviceIPs: nil, kubetypes.KeyHTTPSEndpoint: nil, egressservices.KeyEgressServices: nil, ingressservices.IngressConfigKey: nil, @@ -321,9 +318,6 @@ func TestResetContainerbootState(t *testing.T) { kubetypes.KeyCapVer: capver, kubetypes.KeyReissueAuthkey: nil, // Cleared keys. - kubetypes.KeyDeviceID: nil, - kubetypes.KeyDeviceFQDN: nil, - kubetypes.KeyDeviceIPs: nil, kubetypes.KeyHTTPSEndpoint: nil, egressservices.KeyEgressServices: nil, ingressservices.IngressConfigKey: nil, @@ -338,9 +332,6 @@ func TestResetContainerbootState(t *testing.T) { kubetypes.KeyCapVer: capver, // reissue_authkey not cleared. // Cleared keys. - kubetypes.KeyDeviceID: nil, - kubetypes.KeyDeviceFQDN: nil, - kubetypes.KeyDeviceIPs: nil, kubetypes.KeyHTTPSEndpoint: nil, egressservices.KeyEgressServices: nil, ingressservices.IngressConfigKey: nil, @@ -355,9 +346,6 @@ func TestResetContainerbootState(t *testing.T) { kubetypes.KeyCapVer: capver, // reissue_authkey not cleared. // Cleared keys. - kubetypes.KeyDeviceID: nil, - kubetypes.KeyDeviceFQDN: nil, - kubetypes.KeyDeviceIPs: nil, kubetypes.KeyHTTPSEndpoint: nil, egressservices.KeyEgressServices: nil, ingressservices.IngressConfigKey: nil, diff --git a/kube/authkey/authkey.go b/kube/authkey/authkey.go index 5698f55f0..c56301b12 100644 --- a/kube/authkey/authkey.go +++ b/kube/authkey/authkey.go @@ -19,6 +19,7 @@ "log" "time" + "tailscale.com/ipn" "tailscale.com/ipn/conffile" "tailscale.com/kube/kubeapi" "tailscale.com/kube/kubeclient" @@ -46,11 +47,27 @@ func SetReissueAuthKey(ctx context.Context, kc kubeclient.Client, stateSecretNam // ClearReissueAuthKey removes the reissue_authkey marker from the state Secret // to signal to the operator that we've successfully received the new key. func ClearReissueAuthKey(ctx context.Context, kc kubeclient.Client, stateSecretName string, fieldManager string) error { + existing, err := kc.GetSecret(ctx, stateSecretName) + if err != nil { + return fmt.Errorf("error getting state secret: %w", err) + } + s := &kubeapi.Secret{ Data: map[string][]byte{ - kubetypes.KeyReissueAuthkey: nil, + kubetypes.KeyReissueAuthkey: nil, + kubetypes.KeyDeviceID: nil, + kubetypes.KeyDeviceFQDN: nil, + kubetypes.KeyDeviceIPs: nil, + string(ipn.MachineKeyStateKey): nil, + string(ipn.CurrentProfileStateKey): nil, + string(ipn.KnownProfilesStateKey): nil, }, } + + if profileKey := string(existing.Data["_current-profile"]); profileKey != "" { + s.Data[profileKey] = nil + } + return kc.StrategicMergePatchSecret(ctx, stateSecretName, s, fieldManager) } diff --git a/kube/authkey/authkey_test.go b/kube/authkey/authkey_test.go index bb01b6a44..268bc46d6 100644 --- a/kube/authkey/authkey_test.go +++ b/kube/authkey/authkey_test.go @@ -12,6 +12,7 @@ "testing" "github.com/google/go-cmp/cmp" + "tailscale.com/ipn" "tailscale.com/kube/kubeapi" "tailscale.com/kube/kubeclient" "tailscale.com/kube/kubetypes" @@ -42,6 +43,15 @@ func TestSetReissueAuthKey(t *testing.T) { func TestClearReissueAuthKey(t *testing.T) { var patched map[string][]byte kc := &kubeclient.FakeClient{ + GetSecretImpl: func(ctx context.Context, name string) (*kubeapi.Secret, error) { + return &kubeapi.Secret{ + Data: map[string][]byte{ + "_current-profile": []byte("profile-abc1"), + "profile-abc1": []byte("some-profile-data"), + "_machinekey": []byte("machine-key-data"), + }, + }, nil + }, StrategicMergePatchSecretImpl: func(ctx context.Context, name string, secret *kubeapi.Secret, _ string) error { patched = secret.Data return nil @@ -54,7 +64,14 @@ func TestClearReissueAuthKey(t *testing.T) { } want := map[string][]byte{ - kubetypes.KeyReissueAuthkey: nil, + kubetypes.KeyReissueAuthkey: nil, + kubetypes.KeyDeviceID: nil, + kubetypes.KeyDeviceFQDN: nil, + kubetypes.KeyDeviceIPs: nil, + string(ipn.MachineKeyStateKey): nil, + string(ipn.CurrentProfileStateKey): nil, + string(ipn.KnownProfilesStateKey): nil, + "profile-abc1": nil, } if diff := cmp.Diff(want, patched); diff != "" { t.Errorf("ClearReissueAuthKey() mismatch (-want +got):\n%s", diff) diff --git a/kube/state/state.go b/kube/state/state.go index ebedb2f72..a7f00b7f2 100644 --- a/kube/state/state.go +++ b/kube/state/state.go @@ -30,19 +30,8 @@ keyDeviceFQDN = ipn.StateKey(kubetypes.KeyDeviceFQDN) ) -// SetInitialKeys sets Pod UID and cap ver and clears tailnet device state -// keys to help stop the operator using stale tailnet device state. +// SetInitialKeys sets Pod UID and cap ver. func SetInitialKeys(store ipn.StateStore, podUID string) error { - // Clear device state keys first so the operator knows if the pod UID - // matches, the other values are definitely not stale. - for _, key := range []ipn.StateKey{keyDeviceID, keyDeviceFQDN, keyDeviceIPs} { - if _, err := store.ReadState(key); err == nil { - if err := store.WriteState(key, nil); err != nil { - return fmt.Errorf("error writing %q to state store: %w", key, err) - } - } - } - if err := store.WriteState(keyPodUID, []byte(podUID)); err != nil { return fmt.Errorf("error writing pod UID to state store: %w", err) } diff --git a/kube/state/state_test.go b/kube/state/state_test.go index 9b2ce69be..b5603acb5 100644 --- a/kube/state/state_test.go +++ b/kube/state/state_test.go @@ -58,9 +58,9 @@ func TestSetInitialStateKeys(t *testing.T) { expected: map[ipn.StateKey][]byte{ keyPodUID: podUID, keyCapVer: expectedCapVer, - keyDeviceID: nil, - keyDeviceFQDN: nil, - keyDeviceIPs: nil, + keyDeviceID: []byte("existing-device-id"), + keyDeviceFQDN: []byte("existing-device-fqdn"), + keyDeviceIPs: []byte(`["1.2.3.4"]`), }, }, } {