package controllers import ( "crypto/rand" "crypto/rsa" "crypto/x509" "crypto/x509/pkix" "encoding/base64" "encoding/pem" "fmt" "math/big" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" bootstrapv1 "github.com/rancher/cluster-api-provider-rke2/bootstrap/api/v1beta1" controlplanev1 "github.com/rancher/cluster-api-provider-rke2/controlplane/api/v1beta1" "github.com/rancher/cluster-api-provider-rke2/pkg/rke2" "github.com/rancher/cluster-api-provider-rke2/pkg/secret" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/util/certs" "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/kubeconfig" "sigs.k8s.io/controller-runtime/pkg/client" ) var _ = Describe("Rotate kubeconfig cert", func() { var ( err error ns *corev1.Namespace rcp *controlplanev1.RKE2ControlPlane caSecret *corev1.Secret ccaSecret *corev1.Secret kubeconfigSecret *corev1.Secret clusterKey client.ObjectKey ) BeforeEach(func() { kubeconfigSecret = &corev1.Secret{} ns, err = testEnv.CreateNamespace(ctx, "rotate-kubeconfig-cert") Expect(err).ToNot(HaveOccurred()) clusterKey = client.ObjectKey{Namespace: ns.Name, Name: "rotate-kubeconfig-cert"} rcp = &controlplanev1.RKE2ControlPlane{ ObjectMeta: metav1.ObjectMeta{ Name: "test", Namespace: ns.Name, UID: "foobar", }, TypeMeta: metav1.TypeMeta{ APIVersion: controlplanev1.GroupVersion.String(), Kind: rke2ControlPlaneKind, }, } // Generate new Secret Cluster CA certPEM, _, err := generateCertAndKey(time.Now().Add(3650 * 24 * time.Hour)) // 10 years from now Expect(err).ShouldNot(HaveOccurred()) caSecret = &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: secret.Name(clusterKey.Name, secret.ClusterCA), Namespace: ns.Name, }, StringData: map[string]string{ secret.TLSCrtDataName: string(certPEM), }, } Expect(testEnv.Client.Create(ctx, caSecret)).Should(Succeed()) Expect(testEnv.Get(ctx, client.ObjectKeyFromObject(caSecret), caSecret)).Should(Succeed()) // Generate new Secret Client Cluster CA certPEM, keyPEM, err := generateCertAndKey(time.Now().Add(3650 * 24 * time.Hour)) // 10 years from now Expect(err).ShouldNot(HaveOccurred()) ccaSecret = &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: secret.Name(clusterKey.Name, secret.ClientClusterCA), Namespace: ns.Name, }, StringData: map[string]string{ secret.TLSCrtDataName: string(certPEM), secret.TLSKeyDataName: string(keyPEM), }, } Expect(testEnv.Client.Create(ctx, ccaSecret)).Should(Succeed()) Expect(testEnv.Get(ctx, client.ObjectKeyFromObject(ccaSecret), ccaSecret)).Should(Succeed()) }) AfterEach(func() { testEnv.Cleanup(ctx, kubeconfigSecret, ccaSecret, caSecret, ns) }) It("Should rotate kubeconfig secret if needed", func() { By("Creating the first kubeconfig if not existing yet") r := &RKE2ControlPlaneReconciler{ Client: testEnv.GetClient(), Scheme: testEnv.GetScheme(), managementCluster: &rke2.Management{Client: testEnv.GetClient(), SecretCachingClient: testEnv.GetClient()}, managementClusterUncached: &rke2.Management{Client: testEnv.GetClient()}, } endpoint := clusterv1.APIEndpoint{Host: "1.2.3.4", Port: 6443} // Trigger first reconcile to generate a new Kubeconfig Secret Expect(testEnv.Get(ctx, client.ObjectKeyFromObject(caSecret), caSecret)).Should(Succeed()) _, err = r.reconcileKubeconfig(ctx, clusterKey, endpoint, rcp) Expect(err).ToNot(HaveOccurred()) // Fetch the original Kubeconfig Secret Expect(testEnv.Get(ctx, types.NamespacedName{ Namespace: ns.Name, Name: secret.Name(clusterKey.Name, secret.Kubeconfig), }, kubeconfigSecret)).Should(Succeed()) originalSecret := kubeconfigSecret.DeepCopy() By("Overriding the kubeconfig secret with short expiry") shortExpiryDate := time.Now().Add(24 * time.Hour) // 1 day from now Expect(updateKubeconfigSecret(kubeconfigSecret, shortExpiryDate)).Should(Succeed()) Expect(testEnv.Update(ctx, kubeconfigSecret)).To(Succeed()) By("Checking that rotation is needed") needsRotation, err := kubeconfig.NeedsClientCertRotation(kubeconfigSecret, certs.ClientCertificateRenewalDuration) Expect(err).ToNot(HaveOccurred()) Expect(needsRotation).To(BeTrue()) By("Rotating kubeconfig secret") _, err = r.reconcileKubeconfig(ctx, clusterKey, endpoint, rcp) Expect(err).ToNot(HaveOccurred()) Expect(testEnv.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: kubeconfigSecret.Name}, kubeconfigSecret)).To(Succeed()) Expect(kubeconfigSecret.StringData[secret.KubeconfigDataName]).Should(Equal(originalSecret.StringData[secret.KubeconfigDataName]), "Kubeconfig data must have been updated") By("Override the kubeconfig secret with a long expiry") longExpiryDate := time.Now().Add(365 * 24 * time.Hour) // 1 year from now Expect(updateKubeconfigSecret(kubeconfigSecret, longExpiryDate)).Should(Succeed()) Expect(testEnv.Update(ctx, kubeconfigSecret)).To(Succeed()) By("Checking that rotation is not needed") needsRotation, err = kubeconfig.NeedsClientCertRotation(kubeconfigSecret, certs.ClientCertificateRenewalDuration) Expect(err).ToNot(HaveOccurred()) Expect(needsRotation).To(BeFalse()) By("Fetching the overridden kubeconfig Secret") Expect(testEnv.Get(ctx, types.NamespacedName{ Namespace: ns.Name, Name: secret.Name(clusterKey.Name, secret.Kubeconfig), }, kubeconfigSecret)).Should(Succeed()) updatedSecret := kubeconfigSecret.DeepCopy() By("Ensuring no rotation occurs") _, err = r.reconcileKubeconfig(ctx, clusterKey, endpoint, rcp) Expect(err).ToNot(HaveOccurred()) Expect(testEnv.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: kubeconfigSecret.Name}, kubeconfigSecret)).To(Succeed()) Expect(kubeconfigSecret.StringData[secret.KubeconfigDataName]).Should(Equal(updatedSecret.StringData[secret.KubeconfigDataName]), "Kubeconfig data must stay the same") }) }) var _ = Describe("Reconcile control plane conditions", func() { var ( err error cp *rke2.ControlPlane rcp *controlplanev1.RKE2ControlPlane ns *corev1.Namespace nodeName = "node1" node *corev1.Node nodeByRef *corev1.Node orphanedNode *corev1.Node machine *clusterv1.Machine machineWithRef *clusterv1.Machine config *bootstrapv1.RKE2Config ) BeforeEach(func() { ns, err = testEnv.CreateNamespace(ctx, "ns") Expect(err).ToNot(HaveOccurred()) annotations := map[string]string{ "test": "true", } config = &bootstrapv1.RKE2Config{ObjectMeta: metav1.ObjectMeta{ Name: "config", Namespace: ns.Name, }, Spec: bootstrapv1.RKE2ConfigSpec{ AgentConfig: bootstrapv1.RKE2AgentConfig{ NodeAnnotations: annotations, }, }} Expect(testEnv.Create(ctx, config)).To(Succeed()) node = &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: nodeName, Labels: map[string]string{ "node-role.kubernetes.io/master": "true", }, Annotations: map[string]string{ clusterv1.MachineAnnotation: nodeName, }, }, Status: corev1.NodeStatus{ Conditions: []corev1.NodeCondition{{ Type: corev1.NodeReady, Status: corev1.ConditionTrue, }}, }, } Expect(testEnv.Create(ctx, node.DeepCopy())).To(Succeed()) Expect(testEnv.Get(ctx, client.ObjectKeyFromObject(node), node)).Should(Succeed()) Expect(testEnv.Status().Update(ctx, node.DeepCopy())).To(Succeed()) nodeRefName := "ref-node" machineWithRef = &clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{ Name: "machine-with-ref", Namespace: ns.Name, }, Spec: clusterv1.MachineSpec{ ClusterName: "cluster", Bootstrap: clusterv1.Bootstrap{ ConfigRef: &corev1.ObjectReference{ Kind: "RKE2Config", APIVersion: bootstrapv1.GroupVersion.String(), Name: config.Name, Namespace: config.Namespace, }, }, InfrastructureRef: corev1.ObjectReference{ Kind: "Pod", APIVersion: "v1", Name: "stub", Namespace: ns.Name, }, }, Status: clusterv1.MachineStatus{ NodeRef: &corev1.ObjectReference{ Kind: "Node", APIVersion: "v1", Name: nodeRefName, }, Conditions: clusterv1.Conditions{ clusterv1.Condition{ Type: clusterv1.ReadyCondition, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now(), }, }, }, } ml := clusterv1.MachineList{Items: []clusterv1.Machine{*machineWithRef.DeepCopy()}} updatedMachine := machineWithRef.DeepCopy() Expect(testEnv.Create(ctx, updatedMachine)).To(Succeed()) updatedMachine.Status = *machineWithRef.Status.DeepCopy() machineWithRef = updatedMachine.DeepCopy() Expect(testEnv.Status().Update(ctx, machineWithRef)).To(Succeed()) nodeByRef = &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: nodeRefName, Labels: map[string]string{ "node-role.kubernetes.io/master": "true", }, Annotations: map[string]string{ clusterv1.MachineAnnotation: machineWithRef.Name, }, }, Status: corev1.NodeStatus{ Conditions: []corev1.NodeCondition{{ Type: corev1.NodeReady, Status: corev1.ConditionTrue, }}, }, } Expect(testEnv.Create(ctx, nodeByRef.DeepCopy())).To(Succeed()) Expect(testEnv.Get(ctx, client.ObjectKeyFromObject(nodeByRef), nodeByRef)).Should(Succeed()) Expect(testEnv.Status().Update(ctx, nodeByRef.DeepCopy())).To(Succeed()) orphanedNode = &corev1.Node{ObjectMeta: metav1.ObjectMeta{ Name: "missing-machine", Labels: map[string]string{ "node-role.kubernetes.io/master": "true", }, }} Expect(testEnv.Create(ctx, orphanedNode)).To(Succeed()) machine = &clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{ Name: nodeName, Namespace: ns.Name, }, Spec: clusterv1.MachineSpec{ ClusterName: "cluster", Bootstrap: clusterv1.Bootstrap{ ConfigRef: &corev1.ObjectReference{ Kind: "RKE2Config", APIVersion: bootstrapv1.GroupVersion.String(), Name: config.Name, Namespace: config.Namespace, }, }, InfrastructureRef: corev1.ObjectReference{ Kind: "Pod", APIVersion: "v1", Name: "stub", Namespace: ns.Name, }, }, Status: clusterv1.MachineStatus{ NodeRef: &corev1.ObjectReference{ Kind: "Node", Name: nodeName, UID: node.GetUID(), Namespace: "", }, Conditions: clusterv1.Conditions{ clusterv1.Condition{ Type: clusterv1.ReadyCondition, Status: corev1.ConditionTrue, LastTransitionTime: metav1.Now(), }, }, }, } ml.Items = append(ml.Items, *machine.DeepCopy()) updatedMachine = machine.DeepCopy() Expect(testEnv.Create(ctx, updatedMachine)).To(Succeed()) updatedMachine.Status = *machine.Status.DeepCopy() machine = updatedMachine.DeepCopy() Expect(testEnv.Status().Update(ctx, machine)).To(Succeed()) cluster := &clusterv1.Cluster{ ObjectMeta: metav1.ObjectMeta{ Name: "test", Namespace: ns.Name, }, } Expect(testEnv.Client.Create(ctx, cluster)).To(Succeed()) rcp = &controlplanev1.RKE2ControlPlane{ Status: controlplanev1.RKE2ControlPlaneStatus{ Initialized: true, }, } m := &rke2.Management{ Client: testEnv, SecretCachingClient: testEnv, } cp, err = rke2.NewControlPlane(ctx, m, testEnv.GetClient(), cluster, rcp, collections.FromMachineList(&ml)) Expect(err).ToNot(HaveOccurred()) ref := metav1.OwnerReference{ APIVersion: clusterv1.GroupVersion.String(), Kind: clusterv1.ClusterKind, UID: cp.Cluster.GetUID(), Name: cp.Cluster.GetName(), } Expect(testEnv.Client.Create(ctx, kubeconfig.GenerateSecretWithOwner( client.ObjectKeyFromObject(cp.Cluster), kubeconfig.FromEnvTestConfig(testEnv.Config, cp.Cluster), ref))).To(Succeed()) }) AfterEach(func() { Expect(testEnv.DeleteAllOf(ctx, node)).To(Succeed()) testEnv.Cleanup(ctx, node, ns) }) It("should reconcile cp and machine conditions successfully", func() { r := &RKE2ControlPlaneReconciler{ Client: testEnv.GetClient(), Scheme: testEnv.GetScheme(), managementCluster: &rke2.Management{Client: testEnv.GetClient(), SecretCachingClient: testEnv.GetClient()}, managementClusterUncached: &rke2.Management{Client: testEnv.GetClient()}, } _, err := r.reconcileControlPlaneConditions(ctx, cp) Expect(err).ToNot(HaveOccurred()) Expect(testEnv.Get(ctx, client.ObjectKeyFromObject(machine), machine)).To(Succeed()) Expect(testEnv.Get(ctx, client.ObjectKeyFromObject(machineWithRef), machineWithRef)).To(Succeed()) Expect(conditions.IsTrue(machine, controlplanev1.NodeMetadataUpToDate)).To(BeTrue()) Expect(conditions.IsTrue(machineWithRef, controlplanev1.NodeMetadataUpToDate)).To(BeTrue()) Expect(testEnv.Get(ctx, client.ObjectKeyFromObject(node), node)).To(Succeed()) Expect(testEnv.Get(ctx, client.ObjectKeyFromObject(nodeByRef), nodeByRef)).To(Succeed()) Expect(node.GetAnnotations()).To(HaveKeyWithValue("test", "true")) Expect(nodeByRef.GetAnnotations()).To(HaveKeyWithValue("test", "true")) Expect(conditions.IsFalse(rcp, controlplanev1.ControlPlaneComponentsHealthyCondition)).To(BeTrue()) Expect(conditions.GetMessage(rcp, controlplanev1.ControlPlaneComponentsHealthyCondition)).To(Equal( "Control plane node missing-machine does not have a corresponding machine")) }) }) // generateCertAndKey generates a self-signed certificate and private key. func generateCertAndKey(expiryDate time.Time) ([]byte, []byte, error) { priv, err := rsa.GenerateKey(rand.Reader, 2048) if err != nil { return nil, nil, err } template := x509.Certificate{ SerialNumber: big.NewInt(1), Subject: pkix.Name{ Organization: []string{"Test Org"}, }, NotBefore: time.Now(), NotAfter: expiryDate, KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature, ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth}, BasicConstraintsValid: true, } certDER, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv) if err != nil { return nil, nil, err } certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: certDER}) keyPEM := pem.EncodeToMemory(&pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(priv)}) return certPEM, keyPEM, nil } // updateKubeconfigSecret updates a Kubernetes secret with a kubeconfig containing a client certificate and key. func updateKubeconfigSecret(configSecret *corev1.Secret, expiryDate time.Time) error { certPEM, keyPEM, err := generateCertAndKey(expiryDate) if err != nil { return fmt.Errorf("Generating Cert and Key: %w", err) } configSecret.Data[secret.KubeconfigDataName] = []byte(fmt.Sprintf(` apiVersion: v1 kind: Config clusters: - cluster: server: https://1.2.3.4:6443 name: test-cluster contexts: - context: cluster: test-cluster user: test-user name: test-context current-context: test-context users: - name: test-user user: client-certificate-data: %s client-key-data: %s `, base64.StdEncoding.EncodeToString(certPEM), base64.StdEncoding.EncodeToString(keyPEM))) return nil }