From cc15357999cb96017dbe4bf501334917d752a4cc Mon Sep 17 00:00:00 2001 From: Rafael da Fonseca Date: Mon, 4 Nov 2024 16:05:06 +0000 Subject: [PATCH 1/2] Automatically preserve kubelet supported version skew on worker nodes, while control plane is being updated Co-authored-by: Peter Rifel --- cmd/kops/update_cluster.go | 81 ++++++++++++++++++++++----- pkg/apis/kops/model/instance_group.go | 24 ++++++-- pkg/assets/builder.go | 5 ++ pkg/nodemodel/nodeupconfigbuilder.go | 6 ++ upup/pkg/fi/cloudup/apply_cluster.go | 6 ++ 5 files changed, 103 insertions(+), 19 deletions(-) diff --git a/cmd/kops/update_cluster.go b/cmd/kops/update_cluster.go index 01238a152c..9b938f9392 100644 --- a/cmd/kops/update_cluster.go +++ b/cmd/kops/update_cluster.go @@ -27,11 +27,14 @@ import ( "github.com/spf13/cobra" "github.com/spf13/viper" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/clientcmd" "k8s.io/klog/v2" "k8s.io/kops/cmd/kops/util" "k8s.io/kops/pkg/apis/kops" + apisutil "k8s.io/kops/pkg/apis/kops/util" "k8s.io/kops/pkg/assets" "k8s.io/kops/pkg/commands/commandutils" "k8s.io/kops/pkg/kubeconfig" @@ -67,6 +70,10 @@ type UpdateClusterOptions struct { SSHPublicKey string RunTasksOptions fi.RunTasksOptions AllowKopsDowngrade bool + // Bypasses kubelet vs control plane version skew checks, + // which by default prevent non-control plane instancegroups + // from being updated to a version greater than the control plane + IgnoreKubeletVersionSkew bool // GetAssets is whether this is invoked from the CmdGetAssets. GetAssets bool @@ -103,6 +110,8 @@ func (o *UpdateClusterOptions) InitDefaults() { o.Target = "direct" o.SSHPublicKey = "" o.OutDir = "" + // By default we enforce the version skew between control plane and worker nodes + o.IgnoreKubeletVersionSkew = false // By default we export a kubecfg, but it doesn't have a static/eternal credential in it any more. o.CreateKubecfg = true @@ -163,6 +172,7 @@ func NewCmdUpdateCluster(f *util.Factory, out io.Writer) *cobra.Command { cmd.RegisterFlagCompletionFunc("lifecycle-overrides", completeLifecycleOverrides) cmd.Flags().BoolVar(&options.Prune, "prune", options.Prune, "Delete old revisions of cloud resources that were needed during an upgrade") + cmd.Flags().BoolVar(&options.IgnoreKubeletVersionSkew, "ignore-kubelet-version-skew", options.IgnoreKubeletVersionSkew, "Setting this to true will force updating the kubernetes version on all instance groups, regardles of which control plane version is running") return cmd } @@ -318,20 +328,30 @@ func RunUpdateCluster(ctx context.Context, f *util.Factory, out io.Writer, c *Up return nil, err } + minControlPlaneRunningVersion := cluster.Spec.KubernetesVersion + if !c.IgnoreKubeletVersionSkew { + minControlPlaneRunningVersion, err = checkControlPlaneRunningVersion(ctx, cluster.ObjectMeta.Name, minControlPlaneRunningVersion) + if err != nil { + klog.Warningf("error checking control plane running version, assuming no k8s upgrade in progress: %v", err) + } else { + klog.V(2).Infof("successfully checked control plane running version: %v", minControlPlaneRunningVersion) + } + } applyCmd := &cloudup.ApplyClusterCmd{ - Cloud: cloud, - Clientset: clientset, - Cluster: cluster, - DryRun: isDryrun, - AllowKopsDowngrade: c.AllowKopsDowngrade, - RunTasksOptions: &c.RunTasksOptions, - OutDir: c.OutDir, - InstanceGroupFilter: predicates.AllOf(instanceGroupFilters...), - Phase: phase, - TargetName: targetName, - LifecycleOverrides: lifecycleOverrideMap, - GetAssets: c.GetAssets, - DeletionProcessing: deletionProcessing, + Cloud: cloud, + Clientset: clientset, + Cluster: cluster, + DryRun: isDryrun, + AllowKopsDowngrade: c.AllowKopsDowngrade, + RunTasksOptions: &c.RunTasksOptions, + OutDir: c.OutDir, + InstanceGroupFilter: predicates.AllOf(instanceGroupFilters...), + Phase: phase, + TargetName: targetName, + LifecycleOverrides: lifecycleOverrideMap, + GetAssets: c.GetAssets, + DeletionProcessing: deletionProcessing, + ControlPlaneRunningVersion: minControlPlaneRunningVersion, } applyResults, err := applyCmd.Run(ctx) @@ -575,3 +595,38 @@ func matchInstanceGroupRoles(roles []string) predicates.Predicate[*kops.Instance return false } } + +// checkControlPlaneRunningVersion returns the minimum control plane running version +func checkControlPlaneRunningVersion(ctx context.Context, clusterName string, version string) (string, error) { + configLoadingRules := clientcmd.NewDefaultClientConfigLoadingRules() + config, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig( + configLoadingRules, + &clientcmd.ConfigOverrides{CurrentContext: clusterName}).ClientConfig() + if err != nil { + return version, fmt.Errorf("cannot load kubecfg settings for %q: %v", clusterName, err) + } + + k8sClient, err := kubernetes.NewForConfig(config) + if err != nil { + return version, fmt.Errorf("cannot build kubernetes api client for %q: %v", clusterName, err) + } + + parsedVersion, err := apisutil.ParseKubernetesVersion(version) + if err != nil { + return version, fmt.Errorf("cannot parse kubernetes version %q: %v", clusterName, err) + } + nodeList, err := k8sClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{ + LabelSelector: "node-role.kubernetes.io/control-plane", + }) + if err != nil { + return version, fmt.Errorf("cannot list nodes in cluster %q: %v", clusterName, err) + } + for _, node := range nodeList.Items { + if apisutil.IsKubernetesGTE(node.Status.NodeInfo.KubeletVersion, *parsedVersion) { + version = node.Status.NodeInfo.KubeletVersion + parsedVersion, _ = apisutil.ParseKubernetesVersion(version) + } + + } + return strings.TrimPrefix(version, "v"), nil +} diff --git a/pkg/apis/kops/model/instance_group.go b/pkg/apis/kops/model/instance_group.go index 9475f520b4..b7c8a42c6e 100644 --- a/pkg/apis/kops/model/instance_group.go +++ b/pkg/apis/kops/model/instance_group.go @@ -34,17 +34,20 @@ type InstanceGroup interface { // RawClusterSpec returns the cluster spec for the instance group. // If possible, prefer abstracted methods over accessing this data directly. RawClusterSpec() *kops.ClusterSpec + + // ForceKubernetesVersion overrides the Kubernetes version for this instance group. + // (The default is to use the cluster-wide Kubernetes version, but this allows + // us to override it for the nodes to respect the node skew policy.) + ForceKubernetesVersion(version string) error } // ForInstanceGroup creates an InstanceGroup model for the given cluster and instance group. func ForInstanceGroup(cluster *kops.Cluster, ig *kops.InstanceGroup) (InstanceGroup, error) { - kubernetesVersionString := cluster.Spec.KubernetesVersion - kubernetesVersion, err := ParseKubernetesVersion(kubernetesVersionString) - if err != nil { - return nil, fmt.Errorf("error parsing Kubernetes version %q: %v", kubernetesVersionString, err) + m := &instanceGroupModel{cluster: cluster, ig: ig} + if err := m.ForceKubernetesVersion(cluster.Spec.KubernetesVersion); err != nil { + return nil, err } - - return &instanceGroupModel{cluster: cluster, ig: ig, kubernetesVersion: kubernetesVersion}, nil + return m, nil } // instanceGroupModel is a concrete implementation of InstanceGroup. @@ -67,3 +70,12 @@ func (m *instanceGroupModel) GetCloudProvider() kops.CloudProviderID { func (m *instanceGroupModel) RawClusterSpec() *kops.ClusterSpec { return &m.cluster.Spec } + +func (m *instanceGroupModel) ForceKubernetesVersion(kubernetesVersionString string) error { + kubernetesVersion, err := ParseKubernetesVersion(kubernetesVersionString) + if err != nil { + return fmt.Errorf("error parsing Kubernetes version %q: %v", kubernetesVersionString, err) + } + m.kubernetesVersion = kubernetesVersion + return nil +} diff --git a/pkg/assets/builder.go b/pkg/assets/builder.go index ec7834eb6d..977102c525 100644 --- a/pkg/assets/builder.go +++ b/pkg/assets/builder.go @@ -51,6 +51,11 @@ type AssetBuilder struct { // KubernetesVersion is the version of kubernetes we are installing KubernetesVersion semver.Version + // KubeletSupportedVersion is the max version of kubelet that we are currently allowed to run on worker nodes. + // This is used to avoid violating the kubelet supported version skew policy, + // (we are not allowed to run a newer kubelet on a worker node than the control plane) + KubeletSupportedVersion string + // StaticManifests records manifests used by nodeup: // * e.g. sidecar manifests for static pods run by kubelet StaticManifests []*StaticManifest diff --git a/pkg/nodemodel/nodeupconfigbuilder.go b/pkg/nodemodel/nodeupconfigbuilder.go index 7f0ee4126d..41f9610b15 100644 --- a/pkg/nodemodel/nodeupconfigbuilder.go +++ b/pkg/nodemodel/nodeupconfigbuilder.go @@ -224,6 +224,12 @@ func (n *nodeUpConfigBuilder) BuildConfig(ig *kops.InstanceGroup, wellKnownAddre return nil, nil, fmt.Errorf("building instance group model: %w", err) } + if !hasAPIServer && n.assetBuilder.KubeletSupportedVersion != "" { + if err := igModel.ForceKubernetesVersion(n.assetBuilder.KubeletSupportedVersion); err != nil { + return nil, nil, err + } + } + kubernetesAssets, err := BuildKubernetesFileAssets(igModel, n.assetBuilder) if err != nil { return nil, nil, err diff --git a/upup/pkg/fi/cloudup/apply_cluster.go b/upup/pkg/fi/cloudup/apply_cluster.go index 492dc45dad..cc09d99ab5 100644 --- a/upup/pkg/fi/cloudup/apply_cluster.go +++ b/upup/pkg/fi/cloudup/apply_cluster.go @@ -137,6 +137,9 @@ type ApplyClusterCmd struct { // InstanceGroupFilter is a predicate that restricts which instance groups we will update. InstanceGroupFilter predicates.Predicate[*kops.InstanceGroup] + + // The current oldest version of control plane nodes, defaults to version defined in cluster spec if IgnoreVersionSkew was set + ControlPlaneRunningVersion string } // ApplyResults holds information about an ApplyClusterCmd operation. @@ -239,6 +242,9 @@ func (c *ApplyClusterCmd) Run(ctx context.Context) (*ApplyResults, error) { } assetBuilder := assets.NewAssetBuilder(c.Clientset.VFSContext(), c.Cluster.Spec.Assets, c.Cluster.Spec.KubernetesVersion, c.GetAssets) + if len(c.ControlPlaneRunningVersion) > 0 && c.ControlPlaneRunningVersion != c.Cluster.Spec.KubernetesVersion { + assetBuilder.KubeletSupportedVersion = c.ControlPlaneRunningVersion + } err = c.upgradeSpecs(ctx, assetBuilder) if err != nil { return nil, err From e4a0ef6c1f49d85f1b531d60f17a4f0d9f279877 Mon Sep 17 00:00:00 2001 From: Rafael da Fonseca Date: Mon, 4 Nov 2024 18:35:48 +0000 Subject: [PATCH 2/2] update cli-docs --- docs/cli/kops_update_cluster.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/cli/kops_update_cluster.md b/docs/cli/kops_update_cluster.md index ddc4e24841..332fe909d0 100644 --- a/docs/cli/kops_update_cluster.md +++ b/docs/cli/kops_update_cluster.md @@ -29,6 +29,7 @@ kops update cluster [CLUSTER] [flags] --allow-kops-downgrade Allow an older version of kOps to update the cluster than last used --create-kube-config Will control automatically creating the kube config file on your local filesystem (default true) -h, --help help for cluster + --ignore-kubelet-version-skew Setting this to true will force updating the kubernetes version on all instance groups, regardles of which control plane version is running --instance-group strings Instance groups to update (defaults to all if not specified) --instance-group-roles strings Instance group roles to update (control-plane,apiserver,node,bastion) --internal Use the cluster's internal DNS name. Implies --create-kube-config