kube/authkey,kube/state,cmd/containerboot: preserve device_id across restarts

Stop clearing device_id, device_fqdn, and device_ips from state on startup.
These keys are now preserved across restarts so the operator can track
device identity. Expand ClearReissueAuthKey to clear device state and
tailscaled profile data when performing a full auth key reissue.

Updates #14080

Signed-off-by: chaosinthecrd <tom@tmlabs.co.uk>
This commit is contained in:
chaosinthecrd
2026-04-02 14:07:50 +01:00
parent f07c5204ad
commit a694ff682b
6 changed files with 43 additions and 29 deletions

View File

@@ -127,6 +127,9 @@ func (kc *kubeClient) deleteAuthKey(ctx context.Context) error {
// resetContainerbootState resets state from previous runs of containerboot to
// ensure the operator doesn't use stale state when a Pod is first recreated.
//
// Device identity keys (device_id, device_fqdn, device_ips) are preserved so
// the operator can clean up the old device from the control plane.
func (kc *kubeClient) resetContainerbootState(ctx context.Context, podUID string, tailscaledConfigAuthkey string) error {
existingSecret, err := kc.GetSecret(ctx, kc.stateSecret)
switch {

View File

@@ -303,9 +303,6 @@ func TestResetContainerbootState(t *testing.T) {
kubetypes.KeyCapVer: capver,
kubetypes.KeyPodUID: []byte("1234"),
// Cleared keys.
kubetypes.KeyDeviceID: nil,
kubetypes.KeyDeviceFQDN: nil,
kubetypes.KeyDeviceIPs: nil,
kubetypes.KeyHTTPSEndpoint: nil,
egressservices.KeyEgressServices: nil,
ingressservices.IngressConfigKey: nil,
@@ -321,9 +318,6 @@ func TestResetContainerbootState(t *testing.T) {
kubetypes.KeyCapVer: capver,
kubetypes.KeyReissueAuthkey: nil,
// Cleared keys.
kubetypes.KeyDeviceID: nil,
kubetypes.KeyDeviceFQDN: nil,
kubetypes.KeyDeviceIPs: nil,
kubetypes.KeyHTTPSEndpoint: nil,
egressservices.KeyEgressServices: nil,
ingressservices.IngressConfigKey: nil,
@@ -338,9 +332,6 @@ func TestResetContainerbootState(t *testing.T) {
kubetypes.KeyCapVer: capver,
// reissue_authkey not cleared.
// Cleared keys.
kubetypes.KeyDeviceID: nil,
kubetypes.KeyDeviceFQDN: nil,
kubetypes.KeyDeviceIPs: nil,
kubetypes.KeyHTTPSEndpoint: nil,
egressservices.KeyEgressServices: nil,
ingressservices.IngressConfigKey: nil,
@@ -355,9 +346,6 @@ func TestResetContainerbootState(t *testing.T) {
kubetypes.KeyCapVer: capver,
// reissue_authkey not cleared.
// Cleared keys.
kubetypes.KeyDeviceID: nil,
kubetypes.KeyDeviceFQDN: nil,
kubetypes.KeyDeviceIPs: nil,
kubetypes.KeyHTTPSEndpoint: nil,
egressservices.KeyEgressServices: nil,
ingressservices.IngressConfigKey: nil,

View File

@@ -19,6 +19,7 @@
"log"
"time"
"tailscale.com/ipn"
"tailscale.com/ipn/conffile"
"tailscale.com/kube/kubeapi"
"tailscale.com/kube/kubeclient"
@@ -46,11 +47,27 @@ func SetReissueAuthKey(ctx context.Context, kc kubeclient.Client, stateSecretNam
// ClearReissueAuthKey removes the reissue_authkey marker from the state Secret
// to signal to the operator that we've successfully received the new key.
func ClearReissueAuthKey(ctx context.Context, kc kubeclient.Client, stateSecretName string, fieldManager string) error {
existing, err := kc.GetSecret(ctx, stateSecretName)
if err != nil {
return fmt.Errorf("error getting state secret: %w", err)
}
s := &kubeapi.Secret{
Data: map[string][]byte{
kubetypes.KeyReissueAuthkey: nil,
kubetypes.KeyReissueAuthkey: nil,
kubetypes.KeyDeviceID: nil,
kubetypes.KeyDeviceFQDN: nil,
kubetypes.KeyDeviceIPs: nil,
string(ipn.MachineKeyStateKey): nil,
string(ipn.CurrentProfileStateKey): nil,
string(ipn.KnownProfilesStateKey): nil,
},
}
if profileKey := string(existing.Data["_current-profile"]); profileKey != "" {
s.Data[profileKey] = nil
}
return kc.StrategicMergePatchSecret(ctx, stateSecretName, s, fieldManager)
}

View File

@@ -12,6 +12,7 @@
"testing"
"github.com/google/go-cmp/cmp"
"tailscale.com/ipn"
"tailscale.com/kube/kubeapi"
"tailscale.com/kube/kubeclient"
"tailscale.com/kube/kubetypes"
@@ -42,6 +43,15 @@ func TestSetReissueAuthKey(t *testing.T) {
func TestClearReissueAuthKey(t *testing.T) {
var patched map[string][]byte
kc := &kubeclient.FakeClient{
GetSecretImpl: func(ctx context.Context, name string) (*kubeapi.Secret, error) {
return &kubeapi.Secret{
Data: map[string][]byte{
"_current-profile": []byte("profile-abc1"),
"profile-abc1": []byte("some-profile-data"),
"_machinekey": []byte("machine-key-data"),
},
}, nil
},
StrategicMergePatchSecretImpl: func(ctx context.Context, name string, secret *kubeapi.Secret, _ string) error {
patched = secret.Data
return nil
@@ -54,7 +64,14 @@ func TestClearReissueAuthKey(t *testing.T) {
}
want := map[string][]byte{
kubetypes.KeyReissueAuthkey: nil,
kubetypes.KeyReissueAuthkey: nil,
kubetypes.KeyDeviceID: nil,
kubetypes.KeyDeviceFQDN: nil,
kubetypes.KeyDeviceIPs: nil,
string(ipn.MachineKeyStateKey): nil,
string(ipn.CurrentProfileStateKey): nil,
string(ipn.KnownProfilesStateKey): nil,
"profile-abc1": nil,
}
if diff := cmp.Diff(want, patched); diff != "" {
t.Errorf("ClearReissueAuthKey() mismatch (-want +got):\n%s", diff)

View File

@@ -30,19 +30,8 @@
keyDeviceFQDN = ipn.StateKey(kubetypes.KeyDeviceFQDN)
)
// SetInitialKeys sets Pod UID and cap ver and clears tailnet device state
// keys to help stop the operator using stale tailnet device state.
// SetInitialKeys sets Pod UID and cap ver.
func SetInitialKeys(store ipn.StateStore, podUID string) error {
// Clear device state keys first so the operator knows if the pod UID
// matches, the other values are definitely not stale.
for _, key := range []ipn.StateKey{keyDeviceID, keyDeviceFQDN, keyDeviceIPs} {
if _, err := store.ReadState(key); err == nil {
if err := store.WriteState(key, nil); err != nil {
return fmt.Errorf("error writing %q to state store: %w", key, err)
}
}
}
if err := store.WriteState(keyPodUID, []byte(podUID)); err != nil {
return fmt.Errorf("error writing pod UID to state store: %w", err)
}

View File

@@ -58,9 +58,9 @@ func TestSetInitialStateKeys(t *testing.T) {
expected: map[ipn.StateKey][]byte{
keyPodUID: podUID,
keyCapVer: expectedCapVer,
keyDeviceID: nil,
keyDeviceFQDN: nil,
keyDeviceIPs: nil,
keyDeviceID: []byte("existing-device-id"),
keyDeviceFQDN: []byte("existing-device-fqdn"),
keyDeviceIPs: []byte(`["1.2.3.4"]`),
},
},
} {