diff --git a/cmd/kops/rollingupdatecluster.go b/cmd/kops/rollingupdatecluster.go index 67734dcdef..8d12f22f60 100644 --- a/cmd/kops/rollingupdatecluster.go +++ b/cmd/kops/rollingupdatecluster.go @@ -47,7 +47,8 @@ var ( This command updates a kubernetes cluster to match the cloud and kops specifications. To perform a rolling update, you need to update the cloud resources first with the command - ` + pretty.Bash("kops update cluster") + `. + ` + pretty.Bash("kops update cluster") + `. Nodes may be additionally marked for update by placing a + ` + pretty.Bash("kops.k8s.io/needs-update") + ` annotation on them. If rolling-update does not report that the cluster needs to be rolled, you can force the cluster to be rolled with the force flag. Rolling update drains and validates the cluster by default. A cluster is @@ -318,6 +319,30 @@ func RunRollingUpdateCluster(f *util.Factory, out io.Writer, options *RollingUpd return err } + d := &instancegroups.RollingUpdateCluster{ + MasterInterval: options.MasterInterval, + NodeInterval: options.NodeInterval, + BastionInterval: options.BastionInterval, + Interactive: options.Interactive, + Force: options.Force, + Cloud: cloud, + K8sClient: k8sClient, + FailOnDrainError: options.FailOnDrainError, + FailOnValidate: options.FailOnValidate, + CloudOnly: options.CloudOnly, + ClusterName: options.ClusterName, + PostDrainDelay: options.PostDrainDelay, + ValidationTimeout: options.ValidationTimeout, + // TODO should we expose this to the UI? + ValidateTickDuration: 30 * time.Second, + ValidateSuccessDuration: 10 * time.Second, + } + + err = d.AdjustNeedUpdate(groups, cluster, list) + if err != nil { + return err + } + { t := &tables.Table{} t.AddColumn("NAME", func(r *cloudinstances.CloudInstanceGroup) string { @@ -391,24 +416,7 @@ func RunRollingUpdateCluster(f *util.Factory, out io.Writer, options *RollingUpd return fmt.Errorf("cannot create cluster validator: %v", err) } } - d := &instancegroups.RollingUpdateCluster{ - MasterInterval: options.MasterInterval, - NodeInterval: options.NodeInterval, - BastionInterval: options.BastionInterval, - Interactive: options.Interactive, - Force: options.Force, - Cloud: cloud, - K8sClient: k8sClient, - ClusterValidator: clusterValidator, - FailOnDrainError: options.FailOnDrainError, - FailOnValidate: options.FailOnValidate, - CloudOnly: options.CloudOnly, - ClusterName: options.ClusterName, - PostDrainDelay: options.PostDrainDelay, - ValidationTimeout: options.ValidationTimeout, - // TODO should we expose this to the UI? - ValidateTickDuration: 30 * time.Second, - ValidateSuccessDuration: 10 * time.Second, - } + d.ClusterValidator = clusterValidator + return d.RollingUpdate(groups, cluster, list) } diff --git a/docs/cli/kops_rolling-update.md b/docs/cli/kops_rolling-update.md index 23d44b6222..2db99e2cfd 100644 --- a/docs/cli/kops_rolling-update.md +++ b/docs/cli/kops_rolling-update.md @@ -10,7 +10,8 @@ Rolling update a cluster. This command updates a kubernetes cluster to match the cloud and kops specifications. To perform a rolling update, you need to update the cloud resources first with the command -`kops update cluster`. +`kops update cluster`. Nodes may be additionally marked for update by placing a +`kops.k8s.io/needs-update` annotation on them. If rolling-update does not report that the cluster needs to be rolled, you can force the cluster to be rolled with the force flag. Rolling update drains and validates the cluster by default. A cluster is diff --git a/docs/cli/kops_rolling-update_cluster.md b/docs/cli/kops_rolling-update_cluster.md index c3bfddb2c2..7e939c0745 100644 --- a/docs/cli/kops_rolling-update_cluster.md +++ b/docs/cli/kops_rolling-update_cluster.md @@ -10,7 +10,8 @@ Rolling update a cluster. This command updates a kubernetes cluster to match the cloud and kops specifications. To perform a rolling update, you need to update the cloud resources first with the command -`kops update cluster`. +`kops update cluster`. Nodes may be additionally marked for update by placing a +`kops.k8s.io/needs-update` annotation on them. If rolling-update does not report that the cluster needs to be rolled, you can force the cluster to be rolled with the force flag. Rolling update drains and validates the cluster by default. A cluster is diff --git a/docs/operations/rolling-update.md b/docs/operations/rolling-update.md index 0c9e77d6c6..22ba50dcc3 100644 --- a/docs/operations/rolling-update.md +++ b/docs/operations/rolling-update.md @@ -14,6 +14,7 @@ Cloud instances are chosen to be updated (replaced) if at least one of the follo * The instance was created with a specification that is older than that generated by the last `kops update cluster`. * The instance was detached for surging by a previous (failed or interrupted) rolling update. +* The node has a `kops.k8s.io/needs-update` annotation. * The `--force` flag was given to the `kops rolling-update cluster` command. ## Order of instance groups diff --git a/pkg/instancegroups/rollingupdate.go b/pkg/instancegroups/rollingupdate.go index 35e790fcbb..e16d9e795c 100644 --- a/pkg/instancegroups/rollingupdate.go +++ b/pkg/instancegroups/rollingupdate.go @@ -69,6 +69,31 @@ type RollingUpdateCluster struct { ValidateSuccessDuration time.Duration } +// AdjustNeedUpdate adjusts the set of instances that need updating, using factors outside those known by the cloud implementation +func (c *RollingUpdateCluster) AdjustNeedUpdate(groups map[string]*cloudinstances.CloudInstanceGroup, cluster *api.Cluster, instanceGroups *api.InstanceGroupList) error { + for _, group := range groups { + if group.Ready != nil { + var newReady []*cloudinstances.CloudInstanceGroupMember + for _, member := range group.Ready { + makeNotReady := false + if member.Node != nil && member.Node.Annotations != nil { + if _, ok := member.Node.Annotations["kops.k8s.io/needs-update"]; ok { + makeNotReady = true + } + } + + if makeNotReady { + group.NeedUpdate = append(group.NeedUpdate, member) + } else { + newReady = append(newReady, member) + } + } + group.Ready = newReady + } + } + return nil +} + // RollingUpdate performs a rolling update on a K8s Cluster. func (c *RollingUpdateCluster) RollingUpdate(groups map[string]*cloudinstances.CloudInstanceGroup, cluster *api.Cluster, instanceGroups *api.InstanceGroupList) error { if len(groups) == 0 { diff --git a/pkg/instancegroups/rollingupdate_test.go b/pkg/instancegroups/rollingupdate_test.go index 12117e4f5f..3635ea7c44 100644 --- a/pkg/instancegroups/rollingupdate_test.go +++ b/pkg/instancegroups/rollingupdate_test.go @@ -572,6 +572,103 @@ func TestRollingUpdateValidatesAfterBastion(t *testing.T) { assertGroupInstanceCount(t, cloud, "bastion-1", 0) } +func addNeedsUpdateAnnotation(group *cloudinstances.CloudInstanceGroup, node string) { + for _, igm := range group.Ready { + if igm.ID == node { + if igm.Node.Annotations == nil { + igm.Node.Annotations = map[string]string{} + } + igm.Node.Annotations["kops.k8s.io/needs-update"] = "somevalue" + return + } + } + for _, igm := range group.NeedUpdate { + if igm.ID == node { + if igm.Node.Annotations == nil { + igm.Node.Annotations = map[string]string{} + } + igm.Node.Annotations["kops.k8s.io/needs-update"] = "somevalue" + return + } + } + panic("did not find node " + node) +} + +func TestAddAnnotatedNodesToNeedsUpdate(t *testing.T) { + c, cloud, cluster := getTestSetup() + + groups := make(map[string]*cloudinstances.CloudInstanceGroup) + makeGroup(groups, c.K8sClient, cloud, "master-1", kopsapi.InstanceGroupRoleMaster, 2, 1) + makeGroup(groups, c.K8sClient, cloud, "node-1", kopsapi.InstanceGroupRoleNode, 2, 1) + makeGroup(groups, c.K8sClient, cloud, "node-2", kopsapi.InstanceGroupRoleNode, 2, 1) + + addNeedsUpdateAnnotation(groups["node-1"], "node-1b") + addNeedsUpdateAnnotation(groups["node-2"], "node-2a") + addNeedsUpdateAnnotation(groups["master-1"], "master-1b") + + err := c.AdjustNeedUpdate(groups, cluster, &kopsapi.InstanceGroupList{}) + assert.NoError(t, err, "AddAnnotatedNodesToGroups") + + assertGroupNeedUpdate(t, groups, "node-1", "node-1a", "node-1b") + assertGroupNeedUpdate(t, groups, "node-2", "node-2a") + assertGroupNeedUpdate(t, groups, "master-1", "master-1a", "master-1b") +} + +func TestAddAnnotatedNodesToNeedsUpdateCloudonly(t *testing.T) { + c, cloud, cluster := getTestSetup() + + groups := make(map[string]*cloudinstances.CloudInstanceGroup) + makeGroup(groups, c.K8sClient, cloud, "master-1", kopsapi.InstanceGroupRoleMaster, 2, 1) + makeGroup(groups, c.K8sClient, cloud, "node-1", kopsapi.InstanceGroupRoleNode, 2, 1) + makeGroup(groups, c.K8sClient, cloud, "node-2", kopsapi.InstanceGroupRoleNode, 2, 1) + + addNeedsUpdateAnnotation(groups["node-1"], "node-1b") + addNeedsUpdateAnnotation(groups["node-2"], "node-2a") + addNeedsUpdateAnnotation(groups["master-1"], "master-1b") + + c.CloudOnly = true + c.ClusterValidator = &assertNotCalledClusterValidator{T: t} + + err := c.AdjustNeedUpdate(groups, cluster, &kopsapi.InstanceGroupList{}) + assert.NoError(t, err, "AddAnnotatedNodesToGroups") + + assertGroupNeedUpdate(t, groups, "node-1", "node-1a", "node-1b") + assertGroupNeedUpdate(t, groups, "node-2", "node-2a") + assertGroupNeedUpdate(t, groups, "master-1", "master-1a", "master-1b") +} + +func TestAddAnnotatedNodesToNeedsUpdateNodesMissing(t *testing.T) { + c, cloud, cluster := getTestSetup() + + groups := make(map[string]*cloudinstances.CloudInstanceGroup) + makeGroup(groups, c.K8sClient, cloud, "node-1", kopsapi.InstanceGroupRoleNode, 2, 1) + + groups["node-1"].Ready[0].Node = nil + groups["node-1"].NeedUpdate[0].Node = nil + + err := c.AdjustNeedUpdate(groups, cluster, &kopsapi.InstanceGroupList{}) + assert.NoError(t, err, "AddAnnotatedNodesToGroups") +} + +func assertGroupNeedUpdate(t *testing.T, groups map[string]*cloudinstances.CloudInstanceGroup, groupName string, nodes ...string) { + notFound := map[string]bool{} + for _, node := range nodes { + notFound[node] = true + } + for _, node := range groups[groupName].NeedUpdate { + if notFound[node.ID] { + notFound[node.ID] = false + } else { + t.Errorf("node %s of group %s is unexpectedly in NeedUpdate", node.ID, groupName) + } + } + for nodeID, v := range notFound { + if v { + t.Errorf("node %s of group %s is missing from NeedUpdate", nodeID, groupName) + } + } +} + func TestRollingUpdateTaintAllButOneNeedUpdate(t *testing.T) { c, cloud, cluster := getTestSetup()