From 9522619031287c6dcda68cda11dbcf2baf326ed3 Mon Sep 17 00:00:00 2001 From: David Bond Date: Tue, 10 Mar 2026 10:33:55 +0000 Subject: [PATCH] cmd/k8s-operator: use correct tailnet client for L7 & L3 ingresses (#18749) * cmd/k8s-operator: use correct tailnet client for L7 & L3 ingresses This commit fixes a bug when using multi-tailnet within the operator to spin up L7 & L3 ingresses where the client used to create the tailscale services was not switching depending on the tailnet used by the proxygroup backing the service/ingress. Updates: https://github.com/tailscale/corp/issues/34561 Signed-off-by: David Bond * cmd/k8s-operator: adding server url to proxygroups when a custom tailnet has been specified Signed-off-by: chaosinthecrd (cherry picked from commit 3b21ac5504e713e32dfcd43d9ee21e7e712ac200) --------- Signed-off-by: David Bond Signed-off-by: chaosinthecrd Co-authored-by: chaosinthecrd --- cmd/k8s-operator/api-server-proxy-pg.go | 57 ++++--- cmd/k8s-operator/api-server-proxy-pg_test.go | 47 +++--- cmd/k8s-operator/ingress-for-pg.go | 154 ++++++++----------- cmd/k8s-operator/ingress-for-pg_test.go | 105 ++++++++----- cmd/k8s-operator/operator.go | 3 - cmd/k8s-operator/proxygroup.go | 73 +++++---- cmd/k8s-operator/sts.go | 48 ++++-- cmd/k8s-operator/svc-for-pg.go | 144 +++++++++-------- cmd/k8s-operator/svc-for-pg_test.go | 14 +- cmd/k8s-operator/tailnet.go | 23 ++- cmd/k8s-operator/testutils_test.go | 17 +- cmd/k8s-operator/tsrecorder.go | 40 +++-- 12 files changed, 403 insertions(+), 322 deletions(-) diff --git a/cmd/k8s-operator/api-server-proxy-pg.go b/cmd/k8s-operator/api-server-proxy-pg.go index ff04d553a..0900fd0aa 100644 --- a/cmd/k8s-operator/api-server-proxy-pg.go +++ b/cmd/k8s-operator/api-server-proxy-pg.go @@ -23,6 +23,7 @@ "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "tailscale.com/internal/client/tailscale" tsoperator "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" @@ -52,7 +53,6 @@ type KubeAPIServerTSServiceReconciler struct { logger *zap.SugaredLogger tsClient tsClient tsNamespace string - lc localClient defaultTags []string operatorID string // stableID of the operator's Tailscale device @@ -78,9 +78,14 @@ func (r *KubeAPIServerTSServiceReconciler) Reconcile(ctx context.Context, req re serviceName := serviceNameForAPIServerProxy(pg) logger = logger.With("Tailscale Service", serviceName) + tailscaleClient, err := r.getClient(ctx, pg.Spec.Tailnet) + if err != nil { + return res, fmt.Errorf("failed to get tailscale client: %w", err) + } + if markedForDeletion(pg) { logger.Debugf("ProxyGroup is being deleted, ensuring any created resources are cleaned up") - if err = r.maybeCleanup(ctx, serviceName, pg, logger); err != nil && strings.Contains(err.Error(), optimisticLockErrorMsg) { + if err = r.maybeCleanup(ctx, serviceName, pg, logger, tailscaleClient); err != nil && strings.Contains(err.Error(), optimisticLockErrorMsg) { logger.Infof("optimistic lock error, retrying: %s", err) return res, nil } @@ -88,7 +93,7 @@ func (r *KubeAPIServerTSServiceReconciler) Reconcile(ctx context.Context, req re return res, err } - err = r.maybeProvision(ctx, serviceName, pg, logger) + err = r.maybeProvision(ctx, serviceName, pg, logger, tailscaleClient) if err != nil { if strings.Contains(err.Error(), optimisticLockErrorMsg) { logger.Infof("optimistic lock error, retrying: %s", err) @@ -100,11 +105,27 @@ func (r *KubeAPIServerTSServiceReconciler) Reconcile(ctx context.Context, req re return reconcile.Result{}, nil } +// getClient returns the appropriate Tailscale client for the given tailnet. +// If no tailnet is specified, returns the default client. +func (r *KubeAPIServerTSServiceReconciler) getClient(ctx context.Context, tailnetName string) (tsClient, + error) { + if tailnetName == "" { + return r.tsClient, nil + } + + tc, _, err := clientForTailnet(ctx, r.Client, r.tsNamespace, tailnetName) + if err != nil { + return nil, err + } + + return tc, nil +} + // maybeProvision ensures that a Tailscale Service for this ProxyGroup exists // and is up to date. // // Returns true if the operation resulted in a Tailscale Service update. -func (r *KubeAPIServerTSServiceReconciler) maybeProvision(ctx context.Context, serviceName tailcfg.ServiceName, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (err error) { +func (r *KubeAPIServerTSServiceReconciler) maybeProvision(ctx context.Context, serviceName tailcfg.ServiceName, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger, tsClient tsClient) (err error) { var dnsName string oldPGStatus := pg.Status.DeepCopy() defer func() { @@ -156,7 +177,7 @@ func (r *KubeAPIServerTSServiceReconciler) maybeProvision(ctx context.Context, s // 1. Check there isn't a Tailscale Service with the same hostname // already created and not owned by this ProxyGroup. - existingTSSvc, err := r.tsClient.GetVIPService(ctx, serviceName) + existingTSSvc, err := tsClient.GetVIPService(ctx, serviceName) if err != nil && !isErrorTailscaleServiceNotFound(err) { return fmt.Errorf("error getting Tailscale Service %q: %w", serviceName, err) } @@ -198,17 +219,17 @@ func (r *KubeAPIServerTSServiceReconciler) maybeProvision(ctx context.Context, s !ownersAreSetAndEqual(tsSvc, existingTSSvc) || !slices.Equal(tsSvc.Ports, existingTSSvc.Ports) { logger.Infof("Ensuring Tailscale Service exists and is up to date") - if err := r.tsClient.CreateOrUpdateVIPService(ctx, tsSvc); err != nil { + if err = tsClient.CreateOrUpdateVIPService(ctx, tsSvc); err != nil { return fmt.Errorf("error creating Tailscale Service: %w", err) } } // 3. Ensure that TLS Secret and RBAC exists. - tcd, err := tailnetCertDomain(ctx, r.lc) + dnsName, err = dnsNameForService(ctx, r.Client, serviceName, pg, r.tsNamespace) if err != nil { - return fmt.Errorf("error determining DNS name base: %w", err) + return fmt.Errorf("error determining service DNS name: %w", err) } - dnsName = serviceName.WithoutPrefix() + "." + tcd + if err = r.ensureCertResources(ctx, pg, dnsName); err != nil { return fmt.Errorf("error ensuring cert resources: %w", err) } @@ -219,7 +240,7 @@ func (r *KubeAPIServerTSServiceReconciler) maybeProvision(ctx context.Context, s } // 5. Clean up any stale Tailscale Services from previous resource versions. - if err = r.maybeDeleteStaleServices(ctx, pg, logger); err != nil { + if err = r.maybeDeleteStaleServices(ctx, pg, logger, tsClient); err != nil { return fmt.Errorf("failed to delete stale Tailscale Services: %w", err) } @@ -230,7 +251,7 @@ func (r *KubeAPIServerTSServiceReconciler) maybeProvision(ctx context.Context, s // Service is being deleted or is unexposed. The cleanup is safe for a multi-cluster setup- the Tailscale Service is only // deleted if it does not contain any other owner references. If it does, the cleanup only removes the owner reference // corresponding to this Service. -func (r *KubeAPIServerTSServiceReconciler) maybeCleanup(ctx context.Context, serviceName tailcfg.ServiceName, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (err error) { +func (r *KubeAPIServerTSServiceReconciler) maybeCleanup(ctx context.Context, serviceName tailcfg.ServiceName, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger, tsClient tsClient) (err error) { ix := slices.Index(pg.Finalizers, proxyPGFinalizerName) if ix < 0 { logger.Debugf("no finalizer, nothing to do") @@ -244,11 +265,11 @@ func (r *KubeAPIServerTSServiceReconciler) maybeCleanup(ctx context.Context, ser } }() - if _, err = cleanupTailscaleService(ctx, r.tsClient, serviceName, r.operatorID, logger); err != nil { + if _, err = cleanupTailscaleService(ctx, tsClient, serviceName, r.operatorID, logger); err != nil { return fmt.Errorf("error deleting Tailscale Service: %w", err) } - if err = cleanupCertResources(ctx, r.Client, r.lc, r.tsNamespace, pg.Name, serviceName); err != nil { + if err = cleanupCertResources(ctx, r.Client, r.tsNamespace, serviceName, pg); err != nil { return fmt.Errorf("failed to clean up cert resources: %w", err) } @@ -257,10 +278,10 @@ func (r *KubeAPIServerTSServiceReconciler) maybeCleanup(ctx context.Context, ser // maybeDeleteStaleServices deletes Services that have previously been created for // this ProxyGroup but are no longer needed. -func (r *KubeAPIServerTSServiceReconciler) maybeDeleteStaleServices(ctx context.Context, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) error { +func (r *KubeAPIServerTSServiceReconciler) maybeDeleteStaleServices(ctx context.Context, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger, tsClient tsClient) error { serviceName := serviceNameForAPIServerProxy(pg) - svcs, err := r.tsClient.ListVIPServices(ctx) + svcs, err := tsClient.ListVIPServices(ctx) if err != nil { return fmt.Errorf("error listing Tailscale Services: %w", err) } @@ -285,11 +306,11 @@ func (r *KubeAPIServerTSServiceReconciler) maybeDeleteStaleServices(ctx context. } logger.Infof("Deleting Tailscale Service %s", svc.Name) - if err := r.tsClient.DeleteVIPService(ctx, svc.Name); err != nil && !isErrorTailscaleServiceNotFound(err) { + if err = tsClient.DeleteVIPService(ctx, svc.Name); err != nil && !isErrorTailscaleServiceNotFound(err) { return fmt.Errorf("error deleting Tailscale Service %s: %w", svc.Name, err) } - if err = cleanupCertResources(ctx, r.Client, r.lc, r.tsNamespace, pg.Name, svc.Name); err != nil { + if err = cleanupCertResources(ctx, r.Client, r.tsNamespace, svc.Name, pg); err != nil { return fmt.Errorf("failed to clean up cert resources: %w", err) } } @@ -343,7 +364,7 @@ func (r *KubeAPIServerTSServiceReconciler) maybeAdvertiseServices(ctx context.Co // Only advertise a Tailscale Service once the TLS certs required for // serving it are available. - shouldBeAdvertised, err := hasCerts(ctx, r.Client, r.lc, r.tsNamespace, serviceName) + shouldBeAdvertised, err := hasCerts(ctx, r.Client, r.tsNamespace, serviceName, pg) if err != nil { return fmt.Errorf("error checking TLS credentials provisioned for Tailscale Service %q: %w", serviceName, err) } diff --git a/cmd/k8s-operator/api-server-proxy-pg_test.go b/cmd/k8s-operator/api-server-proxy-pg_test.go index 8fb18c818..52dda93e5 100644 --- a/cmd/k8s-operator/api-server-proxy-pg_test.go +++ b/cmd/k8s-operator/api-server-proxy-pg_test.go @@ -16,8 +16,8 @@ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "tailscale.com/internal/client/tailscale" - "tailscale.com/ipn/ipnstate" tsoperator "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/kube/k8s-proxy/conf" @@ -107,14 +107,6 @@ func TestAPIServerProxyReconciler(t *testing.T) { } ft.CreateOrUpdateVIPService(t.Context(), ingressTSSvc) - lc := &fakeLocalClient{ - status: &ipnstate.Status{ - CurrentTailnet: &ipnstate.TailnetStatus{ - MagicDNSSuffix: "ts.net", - }, - }, - } - r := &KubeAPIServerTSServiceReconciler{ Client: fc, tsClient: ft, @@ -122,7 +114,6 @@ func TestAPIServerProxyReconciler(t *testing.T) { tsNamespace: ns, logger: zap.Must(zap.NewDevelopment()).Sugar(), recorder: record.NewFakeRecorder(10), - lc: lc, clock: tstest.NewClock(tstest.ClockOpts{}), operatorID: "self-id", } @@ -147,6 +138,20 @@ func TestAPIServerProxyReconciler(t *testing.T) { if err := ft.DeleteVIPService(t.Context(), "svc:"+pgName); err != nil { t.Fatalf("deleting initial Tailscale Service: %v", err) } + + // Create the state secret for the ProxyGroup without services being advertised. + mustCreate(t, fc, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pg-0", + Namespace: ns, + Labels: pgSecretLabels(pgName, kubetypes.LabelSecretTypeState), + }, + Data: map[string][]byte{ + "_current-profile": []byte("test"), + "test": []byte(`{"Config":{"NodeID":"node-foo", "UserProfile": {"LoginName": "test-pg.ts.net" }}}`), + }, + }) + expectReconciled(t, r, "", pgName) tsSvc, err := ft.GetVIPService(t.Context(), "svc:"+pgName) @@ -190,17 +195,19 @@ func TestAPIServerProxyReconciler(t *testing.T) { expectEqual(t, fc, pg, omitPGStatusConditionMessages) // Unchanged status. // Simulate Pod prefs updated with advertised services; should see Configured condition updated to true. - mustCreate(t, fc, &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pg-0", - Namespace: ns, - Labels: pgSecretLabels(pgName, kubetypes.LabelSecretTypeState), - }, - Data: map[string][]byte{ - "_current-profile": []byte("profile-foo"), - "profile-foo": []byte(`{"AdvertiseServices":["svc:test-pg"],"Config":{"NodeID":"node-foo"}}`), - }, + mustUpdate(t, fc, ns, "test-pg-0", func(o *corev1.Secret) { + var p prefs + if err = json.Unmarshal(o.Data["test"], &p); err != nil { + t.Errorf("failed to unmarshal preferences: %v", err) + } + + p.AdvertiseServices = []string{"svc:test-pg"} + o.Data["test"], err = json.Marshal(p) + if err != nil { + t.Errorf("failed to marshal preferences: %v", err) + } }) + expectReconciled(t, r, "", pgName) tsoperator.SetProxyGroupCondition(pg, tsapi.KubeAPIServerProxyConfigured, metav1.ConditionTrue, reasonKubeAPIServerProxyConfigured, "", 1, r.clock, r.logger) pg.Status.URL = "https://" + defaultDomain diff --git a/cmd/k8s-operator/ingress-for-pg.go b/cmd/k8s-operator/ingress-for-pg.go index 4b140a8ae..28a836e97 100644 --- a/cmd/k8s-operator/ingress-for-pg.go +++ b/cmd/k8s-operator/ingress-for-pg.go @@ -33,7 +33,6 @@ "tailscale.com/internal/client/tailscale" "tailscale.com/ipn" - "tailscale.com/ipn/ipnstate" tsoperator "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/kube/kubetypes" @@ -45,22 +44,15 @@ ) const ( - serveConfigKey = "serve-config.json" - TailscaleSvcOwnerRef = "tailscale.com/k8s-operator:owned-by:%s" + serveConfigKey = "serve-config.json" // FinalizerNamePG is the finalizer used by the IngressPGReconciler - FinalizerNamePG = "tailscale.com/ingress-pg-finalizer" - + FinalizerNamePG = "tailscale.com/ingress-pg-finalizer" indexIngressProxyGroup = ".metadata.annotations.ingress-proxy-group" // annotationHTTPEndpoint can be used to configure the Ingress to expose an HTTP endpoint to tailnet (as // well as the default HTTPS endpoint). - annotationHTTPEndpoint = "tailscale.com/http-endpoint" - - labelDomain = "tailscale.com/domain" - msgFeatureFlagNotEnabled = "Tailscale Service feature flag is not enabled for this tailnet, skipping provisioning. " + - "Please contact Tailscale support through https://tailscale.com/contact/support to enable the feature flag, then recreate the operator's Pod." - - warningTailscaleServiceFeatureFlagNotEnabled = "TailscaleServiceFeatureFlagNotEnabled" - managedTSServiceComment = "This Tailscale Service is managed by the Tailscale Kubernetes Operator, do not modify" + annotationHTTPEndpoint = "tailscale.com/http-endpoint" + labelDomain = "tailscale.com/domain" + managedTSServiceComment = "This Tailscale Service is managed by the Tailscale Kubernetes Operator, do not modify" ) var gaugePGIngressResources = clientmetric.NewGauge(kubetypes.MetricIngressPGResourceCount) @@ -75,7 +67,6 @@ type HAIngressReconciler struct { tsClient tsClient tsnetServer tsnetServer tsNamespace string - lc localClient defaultTags []string operatorID string // stableID of the operator's Tailscale device ingressClassName string @@ -109,11 +100,12 @@ func (r *HAIngressReconciler) Reconcile(ctx context.Context, req reconcile.Reque ing := new(networkingv1.Ingress) err = r.Get(ctx, req.NamespacedName, ing) - if apierrors.IsNotFound(err) { + switch { + case apierrors.IsNotFound(err): // Request object not found, could have been deleted after reconcile request. logger.Debugf("Ingress not found, assuming it was deleted") return res, nil - } else if err != nil { + case err != nil: return res, fmt.Errorf("failed to get Ingress: %w", err) } @@ -123,6 +115,23 @@ func (r *HAIngressReconciler) Reconcile(ctx context.Context, req reconcile.Reque hostname := hostnameForIngress(ing) logger = logger.With("hostname", hostname) + pgName := ing.Annotations[AnnotationProxyGroup] + pg := &tsapi.ProxyGroup{} + + err = r.Get(ctx, client.ObjectKey{Name: pgName}, pg) + switch { + case apierrors.IsNotFound(err): + logger.Infof("ProxyGroup %q does not exist, it may have been deleted. Reconciliation for ingress %q will be skipped until the ProxyGroup is found", pgName, ing.Name) + return res, nil + case err != nil: + return res, fmt.Errorf("getting ProxyGroup %q: %w", pgName, err) + } + + tailscaleClient, err := clientFromProxyGroup(ctx, r.Client, pg, r.tsNamespace, r.tsClient) + if err != nil { + return res, fmt.Errorf("failed to get tailscale client: %w", err) + } + // needsRequeue is set to true if the underlying Tailscale Service has // changed as a result of this reconcile. If that is the case, we // reconcile the Ingress one more time to ensure that concurrent updates @@ -130,9 +139,9 @@ func (r *HAIngressReconciler) Reconcile(ctx context.Context, req reconcile.Reque // resulted in another actor overwriting our Tailscale Service update. needsRequeue := false if !ing.DeletionTimestamp.IsZero() || !r.shouldExpose(ing) { - needsRequeue, err = r.maybeCleanup(ctx, hostname, ing, logger) + needsRequeue, err = r.maybeCleanup(ctx, hostname, ing, logger, tailscaleClient, pg) } else { - needsRequeue, err = r.maybeProvision(ctx, hostname, ing, logger) + needsRequeue, err = r.maybeProvision(ctx, hostname, ing, logger, tailscaleClient, pg) } if err != nil { return res, err @@ -151,16 +160,16 @@ func (r *HAIngressReconciler) Reconcile(ctx context.Context, req reconcile.Reque // If a Tailscale Service exists, but does not have an owner reference from any operator, we error // out assuming that this is an owner reference created by an unknown actor. // Returns true if the operation resulted in a Tailscale Service update. -func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname string, ing *networkingv1.Ingress, logger *zap.SugaredLogger) (svcsChanged bool, err error) { +func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname string, ing *networkingv1.Ingress, logger *zap.SugaredLogger, tsClient tsClient, pg *tsapi.ProxyGroup) (svcsChanged bool, err error) { // Currently (2025-05) Tailscale Services are behind an alpha feature flag that // needs to be explicitly enabled for a tailnet to be able to use them. serviceName := tailcfg.ServiceName("svc:" + hostname) - existingTSSvc, err := r.tsClient.GetVIPService(ctx, serviceName) + existingTSSvc, err := tsClient.GetVIPService(ctx, serviceName) if err != nil && !isErrorTailscaleServiceNotFound(err) { return false, fmt.Errorf("error getting Tailscale Service %q: %w", hostname, err) } - if err := validateIngressClass(ctx, r.Client, r.ingressClassName); err != nil { + if err = validateIngressClass(ctx, r.Client, r.ingressClassName); err != nil { logger.Infof("error validating tailscale IngressClass: %v.", err) return false, nil } @@ -172,14 +181,6 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin } logger = logger.With("ProxyGroup", pgName) - pg := &tsapi.ProxyGroup{} - if err := r.Get(ctx, client.ObjectKey{Name: pgName}, pg); err != nil { - if apierrors.IsNotFound(err) { - logger.Infof("ProxyGroup does not exist") - return false, nil - } - return false, fmt.Errorf("getting ProxyGroup %q: %w", pgName, err) - } if !tsoperator.ProxyGroupAvailable(pg) { logger.Infof("ProxyGroup is not (yet) ready") return false, nil @@ -220,7 +221,7 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin // that in edge cases (a single update changed both hostname and removed // ProxyGroup annotation) the Tailscale Service is more likely to be // (eventually) removed. - svcsChanged, err = r.maybeCleanupProxyGroup(ctx, pgName, logger) + svcsChanged, err = r.maybeCleanupProxyGroup(ctx, logger, tsClient, pg) if err != nil { return false, fmt.Errorf("failed to cleanup Tailscale Service resources for ProxyGroup: %w", err) } @@ -245,12 +246,12 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin return false, nil } // 3. Ensure that TLS Secret and RBAC exists - tcd, err := tailnetCertDomain(ctx, r.lc) + dnsName, err := dnsNameForService(ctx, r.Client, serviceName, pg, r.tsNamespace) if err != nil { - return false, fmt.Errorf("error determining DNS name base: %w", err) + return false, fmt.Errorf("error determining DNS name for service: %w", err) } - dnsName := hostname + "." + tcd - if err := r.ensureCertResources(ctx, pg, dnsName, ing); err != nil { + + if err = r.ensureCertResources(ctx, pg, dnsName, ing); err != nil { return false, fmt.Errorf("error ensuring cert resources: %w", err) } @@ -358,7 +359,7 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin !reflect.DeepEqual(tsSvc.Ports, existingTSSvc.Ports) || !ownersAreSetAndEqual(tsSvc, existingTSSvc) { logger.Infof("Ensuring Tailscale Service exists and is up to date") - if err := r.tsClient.CreateOrUpdateVIPService(ctx, tsSvc); err != nil { + if err := tsClient.CreateOrUpdateVIPService(ctx, tsSvc); err != nil { return false, fmt.Errorf("error creating Tailscale Service: %w", err) } } @@ -369,7 +370,7 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin if isHTTPEndpointEnabled(ing) || isHTTPRedirectEnabled(ing) { mode = serviceAdvertisementHTTPAndHTTPS } - if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg.Name, serviceName, mode, logger); err != nil { + if err = r.maybeUpdateAdvertiseServicesConfig(ctx, serviceName, mode, pg); err != nil { return false, fmt.Errorf("failed to update tailscaled config: %w", err) } @@ -386,7 +387,7 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin ing.Status.LoadBalancer.Ingress = nil default: var ports []networkingv1.IngressPortStatus - hasCerts, err := hasCerts(ctx, r.Client, r.lc, r.tsNamespace, serviceName) + hasCerts, err := hasCerts(ctx, r.Client, r.tsNamespace, serviceName, pg) if err != nil { return false, fmt.Errorf("error checking TLS credentials provisioned for Ingress: %w", err) } @@ -426,9 +427,10 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin logger.Infof("%s. %d Pod(s) advertising Tailscale Service", prefix, count) } - if err := r.Status().Update(ctx, ing); err != nil { + if err = r.Status().Update(ctx, ing); err != nil { return false, fmt.Errorf("failed to update Ingress status: %w", err) } + return svcsChanged, nil } @@ -438,9 +440,9 @@ func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname strin // operator instances, else the owner reference is cleaned up. Returns true if // the operation resulted in an existing Tailscale Service updates (owner // reference removal). -func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyGroupName string, logger *zap.SugaredLogger) (svcsChanged bool, err error) { +func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, logger *zap.SugaredLogger, tsClient tsClient, pg *tsapi.ProxyGroup) (svcsChanged bool, err error) { // Get serve config for the ProxyGroup - cm, cfg, err := r.proxyGroupServeConfig(ctx, proxyGroupName) + cm, cfg, err := r.proxyGroupServeConfig(ctx, pg.Name) if err != nil { return false, fmt.Errorf("getting serve config: %w", err) } @@ -468,7 +470,7 @@ func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyG if !found { logger.Infof("Tailscale Service %q is not owned by any Ingress, cleaning up", tsSvcName) - tsService, err := r.tsClient.GetVIPService(ctx, tsSvcName) + tsService, err := tsClient.GetVIPService(ctx, tsSvcName) if isErrorTailscaleServiceNotFound(err) { return false, nil } @@ -477,22 +479,24 @@ func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyG } // Delete the Tailscale Service from control if necessary. - svcsChanged, err = r.cleanupTailscaleService(ctx, tsService, logger) + svcsChanged, err = r.cleanupTailscaleService(ctx, tsService, logger, tsClient) if err != nil { return false, fmt.Errorf("deleting Tailscale Service %q: %w", tsSvcName, err) } // Make sure the Tailscale Service is not advertised in tailscaled or serve config. - if err = r.maybeUpdateAdvertiseServicesConfig(ctx, proxyGroupName, tsSvcName, serviceAdvertisementOff, logger); err != nil { + if err = r.maybeUpdateAdvertiseServicesConfig(ctx, tsSvcName, serviceAdvertisementOff, pg); err != nil { return false, fmt.Errorf("failed to update tailscaled config services: %w", err) } + _, ok := cfg.Services[tsSvcName] if ok { logger.Infof("Removing Tailscale Service %q from serve config", tsSvcName) delete(cfg.Services, tsSvcName) serveConfigChanged = true } - if err := cleanupCertResources(ctx, r.Client, r.lc, r.tsNamespace, proxyGroupName, tsSvcName); err != nil { + + if err = cleanupCertResources(ctx, r.Client, r.tsNamespace, tsSvcName, pg); err != nil { return false, fmt.Errorf("failed to clean up cert resources: %w", err) } } @@ -515,7 +519,7 @@ func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyG // Ingress is being deleted or is unexposed. The cleanup is safe for a multi-cluster setup- the Tailscale Service is only // deleted if it does not contain any other owner references. If it does the cleanup only removes the owner reference // corresponding to this Ingress. -func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string, ing *networkingv1.Ingress, logger *zap.SugaredLogger) (svcChanged bool, err error) { +func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string, ing *networkingv1.Ingress, logger *zap.SugaredLogger, tsClient tsClient, pg *tsapi.ProxyGroup) (svcChanged bool, err error) { logger.Debugf("Ensuring any resources for Ingress are cleaned up") ix := slices.Index(ing.Finalizers, FinalizerNamePG) if ix < 0 { @@ -524,7 +528,7 @@ func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string, } logger.Infof("Ensuring that Tailscale Service %q configuration is cleaned up", hostname) serviceName := tailcfg.ServiceName("svc:" + hostname) - svc, err := r.tsClient.GetVIPService(ctx, serviceName) + svc, err := tsClient.GetVIPService(ctx, serviceName) if err != nil && !isErrorTailscaleServiceNotFound(err) { return false, fmt.Errorf("error getting Tailscale Service: %w", err) } @@ -538,8 +542,7 @@ func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string, }() // 1. Check if there is a Tailscale Service associated with this Ingress. - pg := ing.Annotations[AnnotationProxyGroup] - cm, cfg, err := r.proxyGroupServeConfig(ctx, pg) + cm, cfg, err := r.proxyGroupServeConfig(ctx, pg.Name) if err != nil { return false, fmt.Errorf("error getting ProxyGroup serve config: %w", err) } @@ -553,13 +556,13 @@ func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string, } // 2. Clean up the Tailscale Service resources. - svcChanged, err = r.cleanupTailscaleService(ctx, svc, logger) + svcChanged, err = r.cleanupTailscaleService(ctx, svc, logger, tsClient) if err != nil { return false, fmt.Errorf("error deleting Tailscale Service: %w", err) } // 3. Clean up any cluster resources - if err := cleanupCertResources(ctx, r.Client, r.lc, r.tsNamespace, pg, serviceName); err != nil { + if err = cleanupCertResources(ctx, r.Client, r.tsNamespace, serviceName, pg); err != nil { return false, fmt.Errorf("failed to clean up cert resources: %w", err) } @@ -568,12 +571,12 @@ func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string, } // 4. Unadvertise the Tailscale Service in tailscaled config. - if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg, serviceName, serviceAdvertisementOff, logger); err != nil { + if err = r.maybeUpdateAdvertiseServicesConfig(ctx, serviceName, serviceAdvertisementOff, pg); err != nil { return false, fmt.Errorf("failed to update tailscaled config services: %w", err) } // 5. Remove the Tailscale Service from the serve config for the ProxyGroup. - logger.Infof("Removing TailscaleService %q from serve config for ProxyGroup %q", hostname, pg) + logger.Infof("Removing TailscaleService %q from serve config for ProxyGroup %q", hostname, pg.Name) delete(cfg.Services, serviceName) cfgBytes, err := json.Marshal(cfg) if err != nil { @@ -631,19 +634,6 @@ func (r *HAIngressReconciler) proxyGroupServeConfig(ctx context.Context, pg stri return cm, cfg, nil } -type localClient interface { - StatusWithoutPeers(ctx context.Context) (*ipnstate.Status, error) -} - -// tailnetCertDomain returns the base domain (TCD) of the current tailnet. -func tailnetCertDomain(ctx context.Context, lc localClient) (string, error) { - st, err := lc.StatusWithoutPeers(ctx) - if err != nil { - return "", fmt.Errorf("error getting tailscale status: %w", err) - } - return st.CurrentTailnet.MagicDNSSuffix, nil -} - // shouldExpose returns true if the Ingress should be exposed over Tailscale in HA mode (on a ProxyGroup). func (r *HAIngressReconciler) shouldExpose(ing *networkingv1.Ingress) bool { isTSIngress := ing != nil && @@ -708,7 +698,7 @@ func (r *HAIngressReconciler) validateIngress(ctx context.Context, ing *networki // If a Tailscale Service is found, but contains other owner references, only removes this operator's owner reference. // If a Tailscale Service by the given name is not found or does not contain this operator's owner reference, do nothing. // It returns true if an existing Tailscale Service was updated to remove owner reference, as well as any error that occurred. -func (r *HAIngressReconciler) cleanupTailscaleService(ctx context.Context, svc *tailscale.VIPService, logger *zap.SugaredLogger) (updated bool, _ error) { +func (r *HAIngressReconciler) cleanupTailscaleService(ctx context.Context, svc *tailscale.VIPService, logger *zap.SugaredLogger, tsClient tsClient) (updated bool, _ error) { if svc == nil { return false, nil } @@ -731,7 +721,7 @@ func (r *HAIngressReconciler) cleanupTailscaleService(ctx context.Context, svc * } if len(o.OwnerRefs) == 1 { logger.Infof("Deleting Tailscale Service %q", svc.Name) - if err = r.tsClient.DeleteVIPService(ctx, svc.Name); err != nil && !isErrorTailscaleServiceNotFound(err) { + if err = tsClient.DeleteVIPService(ctx, svc.Name); err != nil && !isErrorTailscaleServiceNotFound(err) { return false, err } @@ -745,7 +735,7 @@ func (r *HAIngressReconciler) cleanupTailscaleService(ctx context.Context, svc * return false, fmt.Errorf("error marshalling updated Tailscale Service owner reference: %w", err) } svc.Annotations[ownerAnnotation] = string(json) - return true, r.tsClient.CreateOrUpdateVIPService(ctx, svc) + return true, tsClient.CreateOrUpdateVIPService(ctx, svc) } // isHTTPEndpointEnabled returns true if the Ingress has been configured to expose an HTTP endpoint to tailnet. @@ -765,10 +755,10 @@ func isHTTPEndpointEnabled(ing *networkingv1.Ingress) bool { serviceAdvertisementHTTPAndHTTPS // Both ports 80 and 443 should be advertised ) -func (a *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Context, pgName string, serviceName tailcfg.ServiceName, mode serviceAdvertisementMode, logger *zap.SugaredLogger) (err error) { +func (r *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Context, serviceName tailcfg.ServiceName, mode serviceAdvertisementMode, pg *tsapi.ProxyGroup) (err error) { // Get all config Secrets for this ProxyGroup. secrets := &corev1.SecretList{} - if err := a.List(ctx, secrets, client.InNamespace(a.tsNamespace), client.MatchingLabels(pgSecretLabels(pgName, kubetypes.LabelSecretTypeConfig))); err != nil { + if err := r.List(ctx, secrets, client.InNamespace(r.tsNamespace), client.MatchingLabels(pgSecretLabels(pg.Name, kubetypes.LabelSecretTypeConfig))); err != nil { return fmt.Errorf("failed to list config Secrets: %w", err) } @@ -780,7 +770,7 @@ func (a *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Con // The only exception is Ingresses with an HTTP endpoint enabled - if an // Ingress has an HTTP endpoint enabled, it will be advertised even if the // TLS cert is not yet provisioned. - hasCert, err := hasCerts(ctx, a.Client, a.lc, a.tsNamespace, serviceName) + hasCert, err := hasCerts(ctx, r.Client, r.tsNamespace, serviceName, pg) if err != nil { return fmt.Errorf("error checking TLS credentials provisioned for service %q: %w", serviceName, err) } @@ -820,7 +810,7 @@ func (a *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Con } if updated { - if err := a.Update(ctx, &secret); err != nil { + if err := r.Update(ctx, &secret); err != nil { return fmt.Errorf("error updating ProxyGroup config Secret: %w", err) } } @@ -978,12 +968,12 @@ func (r *HAIngressReconciler) ensureCertResources(ctx context.Context, pg *tsapi // cleanupCertResources ensures that the TLS Secret and associated RBAC // resources that allow proxies to read/write to the Secret are deleted. -func cleanupCertResources(ctx context.Context, cl client.Client, lc localClient, tsNamespace, pgName string, serviceName tailcfg.ServiceName) error { - domainName, err := dnsNameForService(ctx, lc, serviceName) +func cleanupCertResources(ctx context.Context, cl client.Client, tsNamespace string, serviceName tailcfg.ServiceName, pg *tsapi.ProxyGroup) error { + domainName, err := dnsNameForService(ctx, cl, serviceName, pg, tsNamespace) if err != nil { return fmt.Errorf("error getting DNS name for Tailscale Service %s: %w", serviceName, err) } - labels := certResourceLabels(pgName, domainName) + labels := certResourceLabels(pg.Name, domainName) if err := cl.DeleteAllOf(ctx, &rbacv1.RoleBinding{}, client.InNamespace(tsNamespace), client.MatchingLabels(labels)); err != nil { return fmt.Errorf("error deleting RoleBinding for domain name %s: %w", domainName, err) } @@ -1093,19 +1083,9 @@ func certResourceLabels(pgName, domain string) map[string]string { } } -// dnsNameForService returns the DNS name for the given Tailscale Service's name. -func dnsNameForService(ctx context.Context, lc localClient, svc tailcfg.ServiceName) (string, error) { - s := svc.WithoutPrefix() - tcd, err := tailnetCertDomain(ctx, lc) - if err != nil { - return "", fmt.Errorf("error determining DNS name base: %w", err) - } - return s + "." + tcd, nil -} - // hasCerts checks if the TLS Secret for the given service has non-zero cert and key data. -func hasCerts(ctx context.Context, cl client.Client, lc localClient, ns string, svc tailcfg.ServiceName) (bool, error) { - domain, err := dnsNameForService(ctx, lc, svc) +func hasCerts(ctx context.Context, cl client.Client, ns string, svc tailcfg.ServiceName, pg *tsapi.ProxyGroup) (bool, error) { + domain, err := dnsNameForService(ctx, cl, svc, pg, ns) if err != nil { return false, fmt.Errorf("failed to get DNS name for service: %w", err) } diff --git a/cmd/k8s-operator/ingress-for-pg_test.go b/cmd/k8s-operator/ingress-for-pg_test.go index 480e6a26e..33e27ef37 100644 --- a/cmd/k8s-operator/ingress-for-pg_test.go +++ b/cmd/k8s-operator/ingress-for-pg_test.go @@ -28,7 +28,6 @@ "tailscale.com/internal/client/tailscale" "tailscale.com/ipn" - "tailscale.com/ipn/ipnstate" tsoperator "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/kube/kubetypes" @@ -562,16 +561,18 @@ func TestIngressPGReconciler_HTTPEndpoint(t *testing.T) { } // Add the Tailscale Service to prefs to have the Ingress recognised as ready. - mustCreate(t, fc, &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pg-0", - Namespace: "operator-ns", - Labels: pgSecretLabels("test-pg", kubetypes.LabelSecretTypeState), - }, - Data: map[string][]byte{ - "_current-profile": []byte("profile-foo"), - "profile-foo": []byte(`{"AdvertiseServices":["svc:my-svc"],"Config":{"NodeID":"node-foo"}}`), - }, + mustUpdate(t, fc, "operator-ns", "test-pg-0", func(o *corev1.Secret) { + var p prefs + var err error + if err = json.Unmarshal(o.Data["test"], &p); err != nil { + t.Errorf("failed to unmarshal preferences: %v", err) + } + + p.AdvertiseServices = []string{"svc:my-svc"} + o.Data["test"], err = json.Marshal(p) + if err != nil { + t.Errorf("failed to marshal preferences: %v", err) + } }) // Reconcile and re-fetch Ingress. @@ -685,17 +686,19 @@ func TestIngressPGReconciler_HTTPRedirect(t *testing.T) { t.Fatal(err) } - // Add the Tailscale Service to prefs to have the Ingress recognised as ready - mustCreate(t, fc, &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pg-0", - Namespace: "operator-ns", - Labels: pgSecretLabels("test-pg", kubetypes.LabelSecretTypeState), - }, - Data: map[string][]byte{ - "_current-profile": []byte("profile-foo"), - "profile-foo": []byte(`{"AdvertiseServices":["svc:my-svc"],"Config":{"NodeID":"node-foo"}}`), - }, + // Add the Tailscale Service to prefs to have the Ingress recognised as ready. + mustUpdate(t, fc, "operator-ns", "test-pg-0", func(o *corev1.Secret) { + var p prefs + var err error + if err = json.Unmarshal(o.Data["test"], &p); err != nil { + t.Errorf("failed to unmarshal preferences: %v", err) + } + + p.AdvertiseServices = []string{"svc:my-svc"} + o.Data["test"], err = json.Marshal(p) + if err != nil { + t.Errorf("failed to marshal preferences: %v", err) + } }) // Reconcile and re-fetch Ingress @@ -818,17 +821,19 @@ func TestIngressPGReconciler_HTTPEndpointAndRedirectConflict(t *testing.T) { t.Fatal(err) } - // Add the Tailscale Service to prefs to have the Ingress recognised as ready - mustCreate(t, fc, &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pg-0", - Namespace: "operator-ns", - Labels: pgSecretLabels("test-pg", kubetypes.LabelSecretTypeState), - }, - Data: map[string][]byte{ - "_current-profile": []byte("profile-foo"), - "profile-foo": []byte(`{"AdvertiseServices":["svc:my-svc"],"Config":{"NodeID":"node-foo"}}`), - }, + // Add the Tailscale Service to prefs to have the Ingress recognised as ready. + mustUpdate(t, fc, "operator-ns", "test-pg-0", func(o *corev1.Secret) { + var p prefs + var err error + if err = json.Unmarshal(o.Data["test"], &p); err != nil { + t.Errorf("failed to unmarshal preferences: %v", err) + } + + p.AdvertiseServices = []string{"svc:my-svc"} + o.Data["test"], err = json.Marshal(p) + if err != nil { + t.Errorf("failed to marshal preferences: %v", err) + } }) // Reconcile and re-fetch Ingress @@ -1109,6 +1114,7 @@ func verifyTailscaledConfig(t *testing.T, fc client.Client, pgName string, expec func createPGResources(t *testing.T, fc client.Client, pgName string) { t.Helper() + // Pre-create the ProxyGroup pg := &tsapi.ProxyGroup{ ObjectMeta: metav1.ObjectMeta{ @@ -1145,6 +1151,30 @@ func createPGResources(t *testing.T, fc client.Client, pgName string) { }, } mustCreate(t, fc, pgCfgSecret) + + pr := prefs{} + pr.Config.UserProfile.LoginName = "test.ts.net" + pr.Config.NodeID = "test" + + p, err := json.Marshal(pr) + if err != nil { + t.Fatalf("marshaling prefs: %v", err) + } + + // Pre-create a state secret for the ProxyGroup + pgStateSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: pgStateSecretName(pgName, 0), + Namespace: "operator-ns", + Labels: pgSecretLabels(pgName, kubetypes.LabelSecretTypeState), + }, + Data: map[string][]byte{ + currentProfileKey: []byte("test"), + "test": p, + }, + } + mustCreate(t, fc, pgStateSecret) + pg.Status.Conditions = []metav1.Condition{ { Type: string(tsapi.ProxyGroupAvailable), @@ -1179,14 +1209,6 @@ func setupIngressTest(t *testing.T) (*HAIngressReconciler, client.Client, *fakeT t.Fatal(err) } - lc := &fakeLocalClient{ - status: &ipnstate.Status{ - CurrentTailnet: &ipnstate.TailnetStatus{ - MagicDNSSuffix: "ts.net", - }, - }, - } - ingPGR := &HAIngressReconciler{ Client: fc, tsClient: ft, @@ -1195,7 +1217,6 @@ func setupIngressTest(t *testing.T) (*HAIngressReconciler, client.Client, *fakeT tsnetServer: fakeTsnetServer, logger: zl.Sugar(), recorder: record.NewFakeRecorder(10), - lc: lc, ingressClassName: tsIngressClass.Name, } diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go index 81f62d477..ef55d2748 100644 --- a/cmd/k8s-operator/operator.go +++ b/cmd/k8s-operator/operator.go @@ -441,7 +441,6 @@ func runReconcilers(opts reconcilerOpts) { defaultTags: strings.Split(opts.proxyTags, ","), Client: mgr.GetClient(), logger: opts.log.Named("ingress-pg-reconciler"), - lc: lc, operatorID: id, tsNamespace: opts.tailscaleNamespace, ingressClassName: opts.ingressClassName, @@ -467,7 +466,6 @@ func runReconcilers(opts reconcilerOpts) { defaultTags: strings.Split(opts.proxyTags, ","), Client: mgr.GetClient(), logger: opts.log.Named("service-pg-reconciler"), - lc: lc, clock: tstime.DefaultClock{}, operatorID: id, tsNamespace: opts.tailscaleNamespace, @@ -686,7 +684,6 @@ func runReconcilers(opts reconcilerOpts) { logger: opts.log.Named("kube-apiserver-ts-service-reconciler"), tsClient: opts.tsClient, tsNamespace: opts.tailscaleNamespace, - lc: lc, defaultTags: strings.Split(opts.proxyTags, ","), operatorID: id, clock: tstime.DefaultClock{}, diff --git a/cmd/k8s-operator/proxygroup.go b/cmd/k8s-operator/proxygroup.go index 200782498..538933f14 100644 --- a/cmd/k8s-operator/proxygroup.go +++ b/cmd/k8s-operator/proxygroup.go @@ -118,20 +118,15 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com ProxyGroup: %w", err) } - tailscaleClient := r.tsClient - if pg.Spec.Tailnet != "" { - tc, err := clientForTailnet(ctx, r.Client, r.tsNamespace, pg.Spec.Tailnet) - if err != nil { - oldPGStatus := pg.Status.DeepCopy() - nrr := ¬ReadyReason{ - reason: reasonProxyGroupTailnetUnavailable, - message: err.Error(), - } - - return reconcile.Result{}, errors.Join(err, r.maybeUpdateStatus(ctx, logger, pg, oldPGStatus, nrr, make(map[string][]netip.AddrPort))) + tailscaleClient, loginUrl, err := r.getClientAndLoginURL(ctx, pg.Spec.Tailnet) + if err != nil { + oldPGStatus := pg.Status.DeepCopy() + nrr := ¬ReadyReason{ + reason: reasonProxyGroupTailnetUnavailable, + message: fmt.Errorf("failed to get tailscale client and loginUrl: %w", err).Error(), } - tailscaleClient = tc + return reconcile.Result{}, errors.Join(err, r.maybeUpdateStatus(ctx, logger, pg, oldPGStatus, nrr, make(map[string][]netip.AddrPort))) } if markedForDeletion(pg) { @@ -161,7 +156,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ } oldPGStatus := pg.Status.DeepCopy() - staticEndpoints, nrr, err := r.reconcilePG(ctx, tailscaleClient, pg, logger) + staticEndpoints, nrr, err := r.reconcilePG(ctx, tailscaleClient, loginUrl, pg, logger) return reconcile.Result{}, errors.Join(err, r.maybeUpdateStatus(ctx, logger, pg, oldPGStatus, nrr, staticEndpoints)) } @@ -169,7 +164,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ // for deletion. It is separated out from Reconcile to make a clear separation // between reconciling the ProxyGroup, and posting the status of its created // resources onto the ProxyGroup status field. -func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (map[string][]netip.AddrPort, *notReadyReason, error) { +func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, tailscaleClient tsClient, loginUrl string, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (map[string][]netip.AddrPort, *notReadyReason, error) { if !slices.Contains(pg.Finalizers, FinalizerName) { // This log line is printed exactly once during initial provisioning, // because once the finalizer is in place this block gets skipped. So, @@ -210,7 +205,7 @@ func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, tailscaleClient return notReady(reasonProxyGroupInvalid, fmt.Sprintf("invalid ProxyGroup spec: %v", err)) } - staticEndpoints, nrr, err := r.maybeProvision(ctx, tailscaleClient, pg, proxyClass) + staticEndpoints, nrr, err := r.maybeProvision(ctx, tailscaleClient, loginUrl, pg, proxyClass) if err != nil { return nil, nrr, err } @@ -296,7 +291,7 @@ func (r *ProxyGroupReconciler) validate(ctx context.Context, pg *tsapi.ProxyGrou return errors.Join(errs...) } -func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (map[string][]netip.AddrPort, *notReadyReason, error) { +func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, tailscaleClient tsClient, loginUrl string, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (map[string][]netip.AddrPort, *notReadyReason, error) { logger := r.logger(pg.Name) r.mu.Lock() r.ensureAddedToGaugeForProxyGroup(pg) @@ -318,7 +313,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, tailscaleClie } } - staticEndpoints, err := r.ensureConfigSecretsCreated(ctx, tailscaleClient, pg, proxyClass, svcToNodePorts) + staticEndpoints, err := r.ensureConfigSecretsCreated(ctx, tailscaleClient, loginUrl, pg, proxyClass, svcToNodePorts) if err != nil { if _, ok := errors.AsType[*FindStaticEndpointErr](err); ok { reason := reasonProxyGroupCreationFailed @@ -628,7 +623,7 @@ func (r *ProxyGroupReconciler) ensureNodePortServiceCreated(ctx context.Context, // tailnet devices when the number of replicas specified is reduced. func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, pc *tsapi.ProxyClass) error { logger := r.logger(pg.Name) - metadata, err := r.getNodeMetadata(ctx, pg) + metadata, err := getNodeMetadata(ctx, pg, r.Client, r.tsNamespace) if err != nil { return err } @@ -686,7 +681,7 @@ func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, tai func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup) (bool, error) { logger := r.logger(pg.Name) - metadata, err := r.getNodeMetadata(ctx, pg) + metadata, err := getNodeMetadata(ctx, pg, r.Client, r.tsNamespace) if err != nil { return false, err } @@ -731,6 +726,7 @@ func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, tailscal func (r *ProxyGroupReconciler) ensureConfigSecretsCreated( ctx context.Context, tailscaleClient tsClient, + loginUrl string, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass, svcToNodePorts map[string]uint16, @@ -866,8 +862,8 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated( } } - if r.loginServer != "" { - cfg.ServerURL = &r.loginServer + if loginUrl != "" { + cfg.ServerURL = new(loginUrl) } if proxyClass != nil && proxyClass.Spec.TailscaleConfig != nil { @@ -895,7 +891,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated( return nil, err } - configs, err := pgTailscaledConfig(pg, proxyClass, i, authKey, endpoints[nodePortSvcName], existingAdvertiseServices, r.loginServer) + configs, err := pgTailscaledConfig(pg, loginUrl, proxyClass, i, authKey, endpoints[nodePortSvcName], existingAdvertiseServices) if err != nil { return nil, fmt.Errorf("error creating tailscaled config: %w", err) } @@ -1052,7 +1048,7 @@ func (r *ProxyGroupReconciler) ensureRemovedFromGaugeForProxyGroup(pg *tsapi.Pro gaugeAPIServerProxyGroupResources.Set(int64(r.apiServerProxyGroups.Len())) } -func pgTailscaledConfig(pg *tsapi.ProxyGroup, pc *tsapi.ProxyClass, idx int32, authKey *string, staticEndpoints []netip.AddrPort, oldAdvertiseServices []string, loginServer string) (tailscaledConfigs, error) { +func pgTailscaledConfig(pg *tsapi.ProxyGroup, loginServer string, pc *tsapi.ProxyClass, idx int32, authKey *string, staticEndpoints []netip.AddrPort, oldAdvertiseServices []string) (tailscaledConfigs, error) { conf := &ipn.ConfigVAlpha{ Version: "alpha0", AcceptDNS: "false", @@ -1103,10 +1099,10 @@ func extractAdvertiseServicesConfig(cfgSecret *corev1.Secret) ([]string, error) // some pods have failed to write state. // // The returned metadata will contain an entry for each state Secret that exists. -func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.ProxyGroup) (metadata []nodeMetadata, _ error) { +func getNodeMetadata(ctx context.Context, pg *tsapi.ProxyGroup, cl client.Client, tsNamespace string) (metadata []nodeMetadata, _ error) { // List all state Secrets owned by this ProxyGroup. secrets := &corev1.SecretList{} - if err := r.List(ctx, secrets, client.InNamespace(r.tsNamespace), client.MatchingLabels(pgSecretLabels(pg.Name, kubetypes.LabelSecretTypeState))); err != nil { + if err := cl.List(ctx, secrets, client.InNamespace(tsNamespace), client.MatchingLabels(pgSecretLabels(pg.Name, kubetypes.LabelSecretTypeState))); err != nil { return nil, fmt.Errorf("failed to list state Secrets: %w", err) } for _, secret := range secrets.Items { @@ -1130,7 +1126,7 @@ func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.Pr } pod := &corev1.Pod{} - if err := r.Get(ctx, client.ObjectKey{Namespace: r.tsNamespace, Name: fmt.Sprintf("%s-%d", pg.Name, ordinal)}, pod); err != nil && !apierrors.IsNotFound(err) { + if err := cl.Get(ctx, client.ObjectKey{Namespace: tsNamespace, Name: fmt.Sprintf("%s-%d", pg.Name, ordinal)}, pod); err != nil && !apierrors.IsNotFound(err) { return nil, err } else if err == nil { nm.podUID = string(pod.UID) @@ -1149,7 +1145,7 @@ func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.Pr // getRunningProxies will return status for all proxy Pods whose state Secret // has an up to date Pod UID and at least a hostname. func (r *ProxyGroupReconciler) getRunningProxies(ctx context.Context, pg *tsapi.ProxyGroup, staticEndpoints map[string][]netip.AddrPort) (devices []tsapi.TailnetDevice, _ error) { - metadata, err := r.getNodeMetadata(ctx, pg) + metadata, err := getNodeMetadata(ctx, pg, r.Client, r.tsNamespace) if err != nil { return nil, err } @@ -1193,6 +1189,29 @@ func (r *ProxyGroupReconciler) getRunningProxies(ctx context.Context, pg *tsapi. return devices, nil } +// getClientAndLoginURL returns the appropriate Tailscale client and resolved login URL +// for the given tailnet name. If no tailnet is specified, returns the default client +// and login server. Applies fallback to the operator's login server if the tailnet +// doesn't specify a custom login URL. +func (r *ProxyGroupReconciler) getClientAndLoginURL(ctx context.Context, tailnetName string) (tsClient, + string, error) { + if tailnetName == "" { + return r.tsClient, r.loginServer, nil + } + + tc, loginUrl, err := clientForTailnet(ctx, r.Client, r.tsNamespace, tailnetName) + if err != nil { + return nil, "", err + } + + // Apply fallback if tailnet doesn't specify custom login URL + if loginUrl == "" { + loginUrl = r.loginServer + } + + return tc, loginUrl, nil +} + type nodeMetadata struct { ordinal int stateSecret *corev1.Secret diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go index c88a6df17..5f33a9490 100644 --- a/cmd/k8s-operator/sts.go +++ b/cmd/k8s-operator/sts.go @@ -198,14 +198,9 @@ func IsHTTPSEnabledOnTailnet(tsnetServer tsnetServer) bool { // Provision ensures that the StatefulSet for the given service is running and // up to date. func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig) (*corev1.Service, error) { - tailscaleClient := a.tsClient - if sts.Tailnet != "" { - tc, err := clientForTailnet(ctx, a.Client, a.operatorNamespace, sts.Tailnet) - if err != nil { - return nil, err - } - - tailscaleClient = tc + tailscaleClient, loginUrl, err := a.getClientAndLoginURL(ctx, sts.Tailnet) + if err != nil { + return nil, fmt.Errorf("failed to get tailscale client and loginUrl: %w", err) } // Do full reconcile. @@ -227,7 +222,7 @@ func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.Suga } sts.ProxyClass = proxyClass - secretNames, err := a.provisionSecrets(ctx, tailscaleClient, logger, sts, hsvc) + secretNames, err := a.provisionSecrets(ctx, tailscaleClient, loginUrl, sts, hsvc, logger) if err != nil { return nil, fmt.Errorf("failed to create or get API key secret: %w", err) } @@ -248,13 +243,36 @@ func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.Suga return hsvc, nil } +// getClientAndLoginURL returns the appropriate Tailscale client and resolved login URL +// for the given tailnet name. If no tailnet is specified, returns the default client +// and login server. Applies fallback to the operator's login server if the tailnet +// doesn't specify a custom login URL. +func (a *tailscaleSTSReconciler) getClientAndLoginURL(ctx context.Context, tailnetName string) (tsClient, + string, error) { + if tailnetName == "" { + return a.tsClient, a.loginServer, nil + } + + tc, loginUrl, err := clientForTailnet(ctx, a.Client, a.operatorNamespace, tailnetName) + if err != nil { + return nil, "", err + } + + // Apply fallback if tailnet doesn't specify custom login URL + if loginUrl == "" { + loginUrl = a.loginServer + } + + return tc, loginUrl, nil +} + // Cleanup removes all resources associated that were created by Provision with // the given labels. It returns true when all resources have been removed, // otherwise it returns false and the caller should retry later. func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, tailnet string, logger *zap.SugaredLogger, labels map[string]string, typ string) (done bool, _ error) { tailscaleClient := a.tsClient if tailnet != "" { - tc, err := clientForTailnet(ctx, a.Client, a.operatorNamespace, tailnet) + tc, _, err := clientForTailnet(ctx, a.Client, a.operatorNamespace, tailnet) if err != nil { logger.Errorf("failed to get tailscale client: %v", err) return false, nil @@ -384,7 +402,7 @@ func (a *tailscaleSTSReconciler) reconcileHeadlessService(ctx context.Context, l return createOrUpdate(ctx, a.Client, a.operatorNamespace, hsvc, func(svc *corev1.Service) { svc.Spec = hsvc.Spec }) } -func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, tailscaleClient tsClient, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) ([]string, error) { +func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, tailscaleClient tsClient, loginUrl string, stsC *tailscaleSTSConfig, hsvc *corev1.Service, logger *zap.SugaredLogger) ([]string, error) { secretNames := make([]string, stsC.Replicas) // Start by ensuring we have Secrets for the desired number of replicas. This will handle both creating and scaling @@ -433,7 +451,7 @@ func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, tailscale } } - configs, err := tailscaledConfig(stsC, authKey, orig, hostname) + configs, err := tailscaledConfig(stsC, loginUrl, authKey, orig, hostname) if err != nil { return nil, fmt.Errorf("error creating tailscaled config: %w", err) } @@ -1062,7 +1080,7 @@ func isMainContainer(c *corev1.Container) bool { // tailscaledConfig takes a proxy config, a newly generated auth key if generated and a Secret with the previous proxy // state and auth key and returns tailscaled config files for currently supported proxy versions. -func tailscaledConfig(stsC *tailscaleSTSConfig, newAuthkey string, oldSecret *corev1.Secret, hostname string) (tailscaledConfigs, error) { +func tailscaledConfig(stsC *tailscaleSTSConfig, loginUrl string, newAuthkey string, oldSecret *corev1.Secret, hostname string) (tailscaledConfigs, error) { conf := &ipn.ConfigVAlpha{ Version: "alpha0", AcceptDNS: "false", @@ -1101,6 +1119,10 @@ func tailscaledConfig(stsC *tailscaleSTSConfig, newAuthkey string, oldSecret *co conf.AuthKey = key } + if loginUrl != "" { + conf.ServerURL = new(loginUrl) + } + capVerConfigs := make(map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha) capVerConfigs[107] = *conf diff --git a/cmd/k8s-operator/svc-for-pg.go b/cmd/k8s-operator/svc-for-pg.go index 7cbbaebaa..e1891a4a9 100644 --- a/cmd/k8s-operator/svc-for-pg.go +++ b/cmd/k8s-operator/svc-for-pg.go @@ -27,6 +27,7 @@ "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "tailscale.com/internal/client/tailscale" "tailscale.com/ipn" tsoperator "tailscale.com/k8s-operator" @@ -41,13 +42,10 @@ ) const ( - svcPGFinalizerName = "tailscale.com/service-pg-finalizer" - + svcPGFinalizerName = "tailscale.com/service-pg-finalizer" reasonIngressSvcInvalid = "IngressSvcInvalid" - reasonIngressSvcValid = "IngressSvcValid" reasonIngressSvcConfigured = "IngressSvcConfigured" reasonIngressSvcNoBackendsConfigured = "IngressSvcNoBackendsConfigured" - reasonIngressSvcCreationFailed = "IngressSvcCreationFailed" ) var gaugePGServiceResources = clientmetric.NewGauge(kubetypes.MetricServicePGResourceCount) @@ -61,7 +59,6 @@ type HAServiceReconciler struct { logger *zap.SugaredLogger tsClient tsClient tsNamespace string - lc localClient defaultTags []string operatorID string // stableID of the operator's Tailscale device @@ -100,12 +97,41 @@ func (r *HAServiceReconciler) Reconcile(ctx context.Context, req reconcile.Reque return res, fmt.Errorf("failed to get Service: %w", err) } + pgName := svc.Annotations[AnnotationProxyGroup] + if pgName == "" { + logger.Infof("[unexpected] no ProxyGroup annotation, skipping Tailscale Service provisioning") + return res, nil + } + + logger = logger.With("ProxyGroup", pgName) + + pg := &tsapi.ProxyGroup{} + err = r.Get(ctx, client.ObjectKey{Name: pgName}, pg) + switch { + case apierrors.IsNotFound(err): + logger.Infof("ProxyGroup %q does not exist, it may have been deleted. Reconciliation for service %q will be skipped until the ProxyGroup is found", pgName, svc.Name) + r.recorder.Event(svc, corev1.EventTypeWarning, "ProxyGroupNotFound", "ProxyGroup not found") + return res, nil + case err != nil: + return res, fmt.Errorf("getting ProxyGroup %q: %w", pgName, err) + } + + if !tsoperator.ProxyGroupAvailable(pg) { + logger.Infof("ProxyGroup is not (yet) ready") + return res, nil + } + + tailscaleClient, err := clientFromProxyGroup(ctx, r.Client, pg, r.tsNamespace, r.tsClient) + if err != nil { + return res, fmt.Errorf("failed to get tailscale client: %w", err) + } + hostname := nameForService(svc) logger = logger.With("hostname", hostname) if !svc.DeletionTimestamp.IsZero() || !r.isTailscaleService(svc) { logger.Debugf("Service is being deleted or is (no longer) referring to Tailscale ingress/egress, ensuring any created resources are cleaned up") - _, err = r.maybeCleanup(ctx, hostname, svc, logger) + _, err = r.maybeCleanup(ctx, hostname, svc, logger, tailscaleClient) return res, err } @@ -113,7 +139,7 @@ func (r *HAServiceReconciler) Reconcile(ctx context.Context, req reconcile.Reque // is the case, we reconcile the Ingress one more time to ensure that concurrent updates to the Tailscale Service in a // multi-cluster Ingress setup have not resulted in another actor overwriting our Tailscale Service update. needsRequeue := false - needsRequeue, err = r.maybeProvision(ctx, hostname, svc, logger) + needsRequeue, err = r.maybeProvision(ctx, hostname, svc, pg, logger, tailscaleClient) if err != nil { if strings.Contains(err.Error(), optimisticLockErrorMsg) { logger.Infof("optimistic lock error, retrying: %s", err) @@ -136,7 +162,7 @@ func (r *HAServiceReconciler) Reconcile(ctx context.Context, req reconcile.Reque // If a Tailscale Service exists, but does not have an owner reference from any operator, we error // out assuming that this is an owner reference created by an unknown actor. // Returns true if the operation resulted in a Tailscale Service update. -func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname string, svc *corev1.Service, logger *zap.SugaredLogger) (svcsChanged bool, err error) { +func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname string, svc *corev1.Service, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger, tsClient tsClient) (svcsChanged bool, err error) { oldSvcStatus := svc.Status.DeepCopy() defer func() { if !apiequality.Semantic.DeepEqual(oldSvcStatus, &svc.Status) { @@ -145,30 +171,7 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin } }() - pgName := svc.Annotations[AnnotationProxyGroup] - if pgName == "" { - logger.Infof("[unexpected] no ProxyGroup annotation, skipping Tailscale Service provisioning") - return false, nil - } - - logger = logger.With("ProxyGroup", pgName) - - pg := &tsapi.ProxyGroup{} - if err := r.Get(ctx, client.ObjectKey{Name: pgName}, pg); err != nil { - if apierrors.IsNotFound(err) { - msg := fmt.Sprintf("ProxyGroup %q does not exist", pgName) - logger.Warnf(msg) - r.recorder.Event(svc, corev1.EventTypeWarning, "ProxyGroupNotFound", msg) - return false, nil - } - return false, fmt.Errorf("getting ProxyGroup %q: %w", pgName, err) - } - if !tsoperator.ProxyGroupAvailable(pg) { - logger.Infof("ProxyGroup is not (yet) ready") - return false, nil - } - - if err := r.validateService(ctx, svc, pg); err != nil { + if err = r.validateService(ctx, svc, pg); err != nil { r.recorder.Event(svc, corev1.EventTypeWarning, reasonIngressSvcInvalid, err.Error()) tsoperator.SetServiceCondition(svc, tsapi.IngressSvcValid, metav1.ConditionFalse, reasonIngressSvcInvalid, err.Error(), r.clock, logger) return false, nil @@ -198,7 +201,7 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin // that in edge cases (a single update changed both hostname and removed // ProxyGroup annotation) the Tailscale Service is more likely to be // (eventually) removed. - svcsChanged, err = r.maybeCleanupProxyGroup(ctx, pgName, logger) + svcsChanged, err = r.maybeCleanupProxyGroup(ctx, pg.Name, logger, tsClient) if err != nil { return false, fmt.Errorf("failed to cleanup Tailscale Service resources for ProxyGroup: %w", err) } @@ -206,7 +209,7 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin // 2. Ensure that there isn't a Tailscale Service with the same hostname // already created and not owned by this Service. serviceName := tailcfg.ServiceName("svc:" + hostname) - existingTSSvc, err := r.tsClient.GetVIPService(ctx, serviceName) + existingTSSvc, err := tsClient.GetVIPService(ctx, serviceName) if err != nil && !isErrorTailscaleServiceNotFound(err) { return false, fmt.Errorf("error getting Tailscale Service %q: %w", hostname, err) } @@ -248,13 +251,13 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin !reflect.DeepEqual(tsSvc.Tags, existingTSSvc.Tags) || !ownersAreSetAndEqual(tsSvc, existingTSSvc) { logger.Infof("Ensuring Tailscale Service exists and is up to date") - if err := r.tsClient.CreateOrUpdateVIPService(ctx, tsSvc); err != nil { + if err := tsClient.CreateOrUpdateVIPService(ctx, tsSvc); err != nil { return false, fmt.Errorf("error creating Tailscale Service: %w", err) } existingTSSvc = tsSvc } - cm, cfgs, err := ingressSvcsConfigs(ctx, r.Client, pgName, r.tsNamespace) + cm, cfgs, err := ingressSvcsConfigs(ctx, r.Client, pg.Name, r.tsNamespace) if err != nil { return false, fmt.Errorf("error retrieving ingress services configuration: %w", err) } @@ -264,7 +267,7 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin } if existingTSSvc.Addrs == nil { - existingTSSvc, err = r.tsClient.GetVIPService(ctx, tsSvc.Name) + existingTSSvc, err = tsClient.GetVIPService(ctx, tsSvc.Name) if err != nil { return false, fmt.Errorf("error getting Tailscale Service: %w", err) } @@ -329,7 +332,7 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin return false, fmt.Errorf("failed to update tailscaled config: %w", err) } - count, err := r.numberPodsAdvertising(ctx, pgName, serviceName) + count, err := r.numberPodsAdvertising(ctx, pg.Name, serviceName) if err != nil { return false, fmt.Errorf("failed to get number of advertised Pods: %w", err) } @@ -345,7 +348,7 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin conditionReason := reasonIngressSvcNoBackendsConfigured conditionMessage := fmt.Sprintf("%d/%d proxy backends ready and advertising", count, pgReplicas(pg)) if count != 0 { - dnsName, err := r.dnsNameForService(ctx, serviceName) + dnsName, err := dnsNameForService(ctx, r.Client, serviceName, pg, r.tsNamespace) if err != nil { return false, fmt.Errorf("error getting DNS name for Service: %w", err) } @@ -371,7 +374,7 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin // Service is being deleted or is unexposed. The cleanup is safe for a multi-cluster setup- the Tailscale Service is only // deleted if it does not contain any other owner references. If it does the cleanup only removes the owner reference // corresponding to this Service. -func (r *HAServiceReconciler) maybeCleanup(ctx context.Context, hostname string, svc *corev1.Service, logger *zap.SugaredLogger) (svcChanged bool, err error) { +func (r *HAServiceReconciler) maybeCleanup(ctx context.Context, hostname string, svc *corev1.Service, logger *zap.SugaredLogger, tsClient tsClient) (svcChanged bool, err error) { logger.Debugf("Ensuring any resources for Service are cleaned up") ix := slices.Index(svc.Finalizers, svcPGFinalizerName) if ix < 0 { @@ -389,7 +392,7 @@ func (r *HAServiceReconciler) maybeCleanup(ctx context.Context, hostname string, serviceName := tailcfg.ServiceName("svc:" + hostname) // 1. Clean up the Tailscale Service. - svcChanged, err = cleanupTailscaleService(ctx, r.tsClient, serviceName, r.operatorID, logger) + svcChanged, err = cleanupTailscaleService(ctx, tsClient, serviceName, r.operatorID, logger) if err != nil { return false, fmt.Errorf("error deleting Tailscale Service: %w", err) } @@ -422,14 +425,14 @@ func (r *HAServiceReconciler) maybeCleanup(ctx context.Context, hostname string, // Tailscale Services that are associated with the provided ProxyGroup and no longer managed this operator's instance are deleted, if not owned by other operator instances, else the owner reference is cleaned up. // Returns true if the operation resulted in existing Tailscale Service updates (owner reference removal). -func (r *HAServiceReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyGroupName string, logger *zap.SugaredLogger) (svcsChanged bool, err error) { +func (r *HAServiceReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyGroupName string, logger *zap.SugaredLogger, tsClient tsClient) (svcsChanged bool, err error) { cm, config, err := ingressSvcsConfigs(ctx, r.Client, proxyGroupName, r.tsNamespace) if err != nil { return false, fmt.Errorf("failed to get ingress service config: %s", err) } svcList := &corev1.ServiceList{} - if err := r.Client.List(ctx, svcList, client.MatchingFields{indexIngressProxyGroup: proxyGroupName}); err != nil { + if err = r.Client.List(ctx, svcList, client.MatchingFields{indexIngressProxyGroup: proxyGroupName}); err != nil { return false, fmt.Errorf("failed to find Services for ProxyGroup %q: %w", proxyGroupName, err) } @@ -450,7 +453,7 @@ func (r *HAServiceReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyG return false, fmt.Errorf("failed to update tailscaled config services: %w", err) } - svcsChanged, err = cleanupTailscaleService(ctx, r.tsClient, tailcfg.ServiceName(tsSvcName), r.operatorID, logger) + svcsChanged, err = cleanupTailscaleService(ctx, tsClient, tailcfg.ServiceName(tsSvcName), r.operatorID, logger) if err != nil { return false, fmt.Errorf("deleting Tailscale Service %q: %w", tsSvcName, err) } @@ -510,15 +513,6 @@ func (r *HAServiceReconciler) shouldExposeClusterIP(svc *corev1.Service) bool { return isTailscaleLoadBalancerService(svc, r.isDefaultLoadBalancer) || hasExposeAnnotation(svc) } -// tailnetCertDomain returns the base domain (TCD) of the current tailnet. -func (r *HAServiceReconciler) tailnetCertDomain(ctx context.Context) (string, error) { - st, err := r.lc.StatusWithoutPeers(ctx) - if err != nil { - return "", fmt.Errorf("error getting tailscale status: %w", err) - } - return st.CurrentTailnet.MagicDNSSuffix, nil -} - // cleanupTailscaleService deletes any Tailscale Service by the provided name if it is not owned by operator instances other than this one. // If a Tailscale Service is found, but contains other owner references, only removes this operator's owner reference. // If a Tailscale Service by the given name is not found or does not contain this operator's owner reference, do nothing. @@ -570,10 +564,10 @@ func cleanupTailscaleService(ctx context.Context, tsClient tsClient, name tailcf return true, tsClient.CreateOrUpdateVIPService(ctx, svc) } -func (a *HAServiceReconciler) backendRoutesSetup(ctx context.Context, serviceName, replicaName, pgName string, wantsCfg *ingressservices.Config, logger *zap.SugaredLogger) (bool, error) { +func (r *HAServiceReconciler) backendRoutesSetup(ctx context.Context, serviceName, replicaName string, wantsCfg *ingressservices.Config, logger *zap.SugaredLogger) (bool, error) { logger.Debugf("checking backend routes for service '%s'", serviceName) pod := &corev1.Pod{} - err := a.Get(ctx, client.ObjectKey{Namespace: a.tsNamespace, Name: replicaName}, pod) + err := r.Get(ctx, client.ObjectKey{Namespace: r.tsNamespace, Name: replicaName}, pod) if apierrors.IsNotFound(err) { logger.Debugf("Pod %q not found", replicaName) return false, nil @@ -582,7 +576,7 @@ func (a *HAServiceReconciler) backendRoutesSetup(ctx context.Context, serviceNam return false, fmt.Errorf("failed to get Pod: %w", err) } secret := &corev1.Secret{} - err = a.Get(ctx, client.ObjectKey{Namespace: a.tsNamespace, Name: replicaName}, secret) + err = r.Get(ctx, client.ObjectKey{Namespace: r.tsNamespace, Name: replicaName}, secret) if apierrors.IsNotFound(err) { logger.Debugf("Secret %q not found", replicaName) return false, nil @@ -637,17 +631,17 @@ func isCurrentStatus(gotCfgs ingressservices.Status, pod *corev1.Pod, logger *za return true, nil } -func (a *HAServiceReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Context, svc *corev1.Service, pgName string, serviceName tailcfg.ServiceName, cfg *ingressservices.Config, shouldBeAdvertised bool, logger *zap.SugaredLogger) (err error) { +func (r *HAServiceReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Context, svc *corev1.Service, pgName string, serviceName tailcfg.ServiceName, cfg *ingressservices.Config, shouldBeAdvertised bool, logger *zap.SugaredLogger) (err error) { logger.Debugf("checking advertisement for service '%s'", serviceName) // Get all config Secrets for this ProxyGroup. // Get all Pods secrets := &corev1.SecretList{} - if err := a.List(ctx, secrets, client.InNamespace(a.tsNamespace), client.MatchingLabels(pgSecretLabels(pgName, kubetypes.LabelSecretTypeConfig))); err != nil { + if err := r.List(ctx, secrets, client.InNamespace(r.tsNamespace), client.MatchingLabels(pgSecretLabels(pgName, kubetypes.LabelSecretTypeConfig))); err != nil { return fmt.Errorf("failed to list config Secrets: %w", err) } if svc != nil && shouldBeAdvertised { - shouldBeAdvertised, err = a.checkEndpointsReady(ctx, svc, logger) + shouldBeAdvertised, err = r.checkEndpointsReady(ctx, svc, logger) if err != nil { return fmt.Errorf("failed to check readiness of Service '%s' endpoints: %w", svc.Name, err) } @@ -679,7 +673,7 @@ func (a *HAServiceReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Con logger.Infof("[unexpected] unable to determine replica name from config Secret name %q, unable to determine if backend routing has been configured", secret.Name) return nil } - ready, err := a.backendRoutesSetup(ctx, serviceName.String(), replicaName, pgName, cfg, logger) + ready, err := r.backendRoutesSetup(ctx, serviceName.String(), replicaName, cfg, logger) if err != nil { return fmt.Errorf("error checking backend routes: %w", err) } @@ -698,7 +692,7 @@ func (a *HAServiceReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Con updated = true } if updated { - if err := a.Update(ctx, &secret); err != nil { + if err := r.Update(ctx, &secret); err != nil { return fmt.Errorf("error updating ProxyGroup config Secret: %w", err) } } @@ -706,10 +700,10 @@ func (a *HAServiceReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Con return nil } -func (a *HAServiceReconciler) numberPodsAdvertising(ctx context.Context, pgName string, serviceName tailcfg.ServiceName) (int, error) { +func (r *HAServiceReconciler) numberPodsAdvertising(ctx context.Context, pgName string, serviceName tailcfg.ServiceName) (int, error) { // Get all state Secrets for this ProxyGroup. secrets := &corev1.SecretList{} - if err := a.List(ctx, secrets, client.InNamespace(a.tsNamespace), client.MatchingLabels(pgSecretLabels(pgName, kubetypes.LabelSecretTypeState))); err != nil { + if err := r.List(ctx, secrets, client.InNamespace(r.tsNamespace), client.MatchingLabels(pgSecretLabels(pgName, kubetypes.LabelSecretTypeState))); err != nil { return 0, fmt.Errorf("failed to list ProxyGroup %q state Secrets: %w", pgName, err) } @@ -731,12 +725,28 @@ func (a *HAServiceReconciler) numberPodsAdvertising(ctx context.Context, pgName } // dnsNameForService returns the DNS name for the given Tailscale Service name. -func (r *HAServiceReconciler) dnsNameForService(ctx context.Context, svc tailcfg.ServiceName) (string, error) { +func dnsNameForService(ctx context.Context, cl client.Client, svc tailcfg.ServiceName, pg *tsapi.ProxyGroup, namespace string) (string, error) { s := svc.WithoutPrefix() - tcd, err := r.tailnetCertDomain(ctx) - if err != nil { - return "", fmt.Errorf("error determining DNS name base: %w", err) + + md, err := getNodeMetadata(ctx, pg, cl, namespace) + switch { + case err != nil: + return "", fmt.Errorf("error getting node metadata: %w", err) + case len(md) == 0: + return "", fmt.Errorf("failed to find node metadata for ProxyGroup %q", pg.Name) } + + // To determine the appropriate magic DNS name we take the first dns name we can find that is not empty and + // contains a period. + idx := slices.IndexFunc(md, func(metadata nodeMetadata) bool { + return metadata.dnsName != "" && strings.ContainsRune(metadata.dnsName, '.') + }) + if idx == -1 { + return "", fmt.Errorf("failed to find dns name for ProxyGroup %q", pg.Name) + } + + tcd := strings.SplitN(md[idx].dnsName, ".", 2)[1] + return s + "." + tcd, nil } diff --git a/cmd/k8s-operator/svc-for-pg_test.go b/cmd/k8s-operator/svc-for-pg_test.go index 3c478a90c..7a767a9b8 100644 --- a/cmd/k8s-operator/svc-for-pg_test.go +++ b/cmd/k8s-operator/svc-for-pg_test.go @@ -22,7 +22,7 @@ "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "tailscale.com/ipn/ipnstate" + tsoperator "tailscale.com/k8s-operator" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/kube/ingressservices" @@ -194,14 +194,6 @@ func setupServiceTest(t *testing.T) (*HAServiceReconciler, *corev1.Secret, clien t.Fatal(err) } - lc := &fakeLocalClient{ - status: &ipnstate.Status{ - CurrentTailnet: &ipnstate.TailnetStatus{ - MagicDNSSuffix: "ts.net", - }, - }, - } - cl := tstest.NewClock(tstest.ClockOpts{}) svcPGR := &HAServiceReconciler{ Client: fc, @@ -211,7 +203,6 @@ func setupServiceTest(t *testing.T) (*HAServiceReconciler, *corev1.Secret, clien tsNamespace: "operator-ns", logger: zl.Sugar(), recorder: record.NewFakeRecorder(10), - lc: lc, } return svcPGR, pgStateSecret, fc, ft, cl @@ -279,15 +270,12 @@ func TestValidateService(t *testing.T) { func TestServicePGReconciler_MultiCluster(t *testing.T) { var ft *fakeTSClient - var lc localClient for i := 0; i <= 10; i++ { pgr, stateSecret, fc, fti, _ := setupServiceTest(t) if i == 0 { ft = fti - lc = pgr.lc } else { pgr.tsClient = ft - pgr.lc = lc } svc, _ := setupTestService(t, "test-multi-cluster", "", "4.3.2.1", fc, stateSecret) diff --git a/cmd/k8s-operator/tailnet.go b/cmd/k8s-operator/tailnet.go index 57c749bec..439489f75 100644 --- a/cmd/k8s-operator/tailnet.go +++ b/cmd/k8s-operator/tailnet.go @@ -20,19 +20,19 @@ tsapi "tailscale.com/k8s-operator/apis/v1alpha1" ) -func clientForTailnet(ctx context.Context, cl client.Client, namespace, name string) (tsClient, error) { +func clientForTailnet(ctx context.Context, cl client.Client, namespace, name string) (tsClient, string, error) { var tn tsapi.Tailnet if err := cl.Get(ctx, client.ObjectKey{Name: name}, &tn); err != nil { - return nil, fmt.Errorf("failed to get tailnet %q: %w", name, err) + return nil, "", fmt.Errorf("failed to get tailnet %q: %w", name, err) } if !operatorutils.TailnetIsReady(&tn) { - return nil, fmt.Errorf("tailnet %q is not ready", name) + return nil, "", fmt.Errorf("tailnet %q is not ready", name) } var secret corev1.Secret if err := cl.Get(ctx, client.ObjectKey{Name: tn.Spec.Credentials.SecretName, Namespace: namespace}, &secret); err != nil { - return nil, fmt.Errorf("failed to get Secret %q in namespace %q: %w", tn.Spec.Credentials.SecretName, namespace, err) + return nil, "", fmt.Errorf("failed to get Secret %q in namespace %q: %w", tn.Spec.Credentials.SecretName, namespace, err) } baseURL := ipn.DefaultControlURL @@ -54,5 +54,18 @@ func clientForTailnet(ctx context.Context, cl client.Client, namespace, name str ts.HTTPClient = httpClient ts.BaseURL = baseURL - return ts, nil + return ts, baseURL, nil +} + +func clientFromProxyGroup(ctx context.Context, cl client.Client, pg *tsapi.ProxyGroup, namespace string, def tsClient) (tsClient, error) { + if pg.Spec.Tailnet == "" { + return def, nil + } + + tailscaleClient, _, err := clientForTailnet(ctx, cl, namespace, pg.Spec.Tailnet) + if err != nil { + return nil, err + } + + return tailscaleClient, nil } diff --git a/cmd/k8s-operator/testutils_test.go b/cmd/k8s-operator/testutils_test.go index 191a31723..d418f0128 100644 --- a/cmd/k8s-operator/testutils_test.go +++ b/cmd/k8s-operator/testutils_test.go @@ -31,9 +31,9 @@ "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "tailscale.com/internal/client/tailscale" "tailscale.com/ipn" - "tailscale.com/ipn/ipnstate" tsapi "tailscale.com/k8s-operator/apis/v1alpha1" "tailscale.com/kube/kubetypes" "tailscale.com/tailcfg" @@ -985,18 +985,3 @@ func (c *fakeTSClient) DeleteVIPService(ctx context.Context, name tailcfg.Servic } return nil } - -type fakeLocalClient struct { - status *ipnstate.Status -} - -func (f *fakeLocalClient) StatusWithoutPeers(ctx context.Context) (*ipnstate.Status, error) { - if f.status == nil { - return &ipnstate.Status{ - Self: &ipnstate.PeerStatus{ - DNSName: "test-node.test.ts.net.", - }, - }, nil - } - return f.status, nil -} diff --git a/cmd/k8s-operator/tsrecorder.go b/cmd/k8s-operator/tsrecorder.go index 0a497a46e..0a7dbda58 100644 --- a/cmd/k8s-operator/tsrecorder.go +++ b/cmd/k8s-operator/tsrecorder.go @@ -99,14 +99,9 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques return reconcile.Result{}, nil } - tailscaleClient := r.tsClient - if tsr.Spec.Tailnet != "" { - tc, err := clientForTailnet(ctx, r.Client, r.tsNamespace, tsr.Spec.Tailnet) - if err != nil { - return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderTailnetUnavailable, err.Error()) - } - - tailscaleClient = tc + tailscaleClient, loginUrl, err := r.getClientAndLoginURL(ctx, tsr.Spec.Tailnet) + if err != nil { + return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderTailnetUnavailable, err.Error()) } if markedForDeletion(tsr) { @@ -149,7 +144,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderInvalid, message) } - if err = r.maybeProvision(ctx, tailscaleClient, tsr); err != nil { + if err = r.maybeProvision(ctx, tailscaleClient, loginUrl, tsr); err != nil { reason := reasonRecorderCreationFailed message := fmt.Sprintf("failed creating Recorder: %s", err) if strings.Contains(err.Error(), optimisticLockErrorMsg) { @@ -167,7 +162,30 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques return setStatusReady(tsr, metav1.ConditionTrue, reasonRecorderCreated, reasonRecorderCreated) } -func (r *RecorderReconciler) maybeProvision(ctx context.Context, tailscaleClient tsClient, tsr *tsapi.Recorder) error { +// getClientAndLoginURL returns the appropriate Tailscale client and resolved login URL +// for the given tailnet name. If no tailnet is specified, returns the default client +// and login server. Applies fallback to the operator's login server if the tailnet +// doesn't specify a custom login URL. +func (r *RecorderReconciler) getClientAndLoginURL(ctx context.Context, tailnetName string) (tsClient, + string, error) { + if tailnetName == "" { + return r.tsClient, r.loginServer, nil + } + + tc, loginUrl, err := clientForTailnet(ctx, r.Client, r.tsNamespace, tailnetName) + if err != nil { + return nil, "", err + } + + // Apply fallback if tailnet doesn't specify custom login URL + if loginUrl == "" { + loginUrl = r.loginServer + } + + return tc, loginUrl, nil +} + +func (r *RecorderReconciler) maybeProvision(ctx context.Context, tailscaleClient tsClient, loginUrl string, tsr *tsapi.Recorder) error { logger := r.logger(tsr.Name) r.mu.Lock() @@ -234,7 +252,7 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tailscaleClient return fmt.Errorf("error creating RoleBinding: %w", err) } - ss := tsrStatefulSet(tsr, r.tsNamespace, r.loginServer) + ss := tsrStatefulSet(tsr, r.tsNamespace, loginUrl) _, err = createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) { s.ObjectMeta.Labels = ss.ObjectMeta.Labels s.ObjectMeta.Annotations = ss.ObjectMeta.Annotations