Remove optionality and exit when specific error prefix is matched

Signed-off-by: Jack Andersen <jandersen@plaid.com>
2022-11-25 08:03:55 -08:00 · 2022-11-25 08:03:55 -08:00 · 6efd68f428
parent f9ea9b3ef8
commit 6efd68f428
4 changed files with 15 additions and 19 deletions
--- a/cmd/kops/rolling-update_cluster.go
+++ b/cmd/kops/rolling-update_cluster.go
@ -107,10 +107,6 @@ type RollingUpdateOptions struct {
 	// does not validate, after a validation period.
 	FailOnValidate bool
 	// ExitOnFirstError exits the rolling update when a single instancegroup's
 	// rolling update experiences an error instead of retrying all instancegroups.
 	ExitOnFirstError bool
 	// DrainTimeout is the maximum time to wait while draining a node.
 	DrainTimeout time.Duration
@ -155,7 +151,6 @@ func (o *RollingUpdateOptions) InitDefaults() {
 	o.CloudOnly = false
 	o.FailOnDrainError = false
 	o.FailOnValidate = true
 	o.ExitOnFirstError = false
 	o.ControlPlaneInterval = 15 * time.Second
 	o.NodeInterval = 15 * time.Second
@ -213,7 +208,6 @@ func NewCmdRollingUpdateCluster(f *util.Factory, out io.Writer) *cobra.Command {
 	cmd.Flags().BoolVar(&options.FailOnDrainError, "fail-on-drain-error", true, "Fail if draining a node fails")
 	cmd.Flags().BoolVar(&options.FailOnValidate, "fail-on-validate-error", true, "Fail if the cluster fails to validate")
 	cmd.Flags().BoolVar(&options.ExitOnFirstError, "exit-on-first-error", false, "Exit on the first node or apiserver instancegroup error")
 	cmd.Flags().SetNormalizeFunc(func(f *pflag.FlagSet, name string) pflag.NormalizedName {
 		switch name {
@ -368,7 +362,6 @@ func RunRollingUpdateCluster(ctx context.Context, f *util.Factory, out io.Writer
 		ValidationTimeout: options.ValidationTimeout,
 		ValidateCount:     int(options.ValidateCount),
 		DrainTimeout:      options.DrainTimeout,
 		ExitOnFirstError:  options.ExitOnFirstError,
 		// TODO should we expose this to the UI?
 		ValidateTickDuration:    30 * time.Second,
 		ValidateSuccessDuration: 10 * time.Second,
--- a/docs/cli/kops_rolling-update_cluster.md
+++ b/docs/cli/kops_rolling-update_cluster.md
@ -58,7 +58,6 @@ kops rolling-update cluster [CLUSTER] [flags]
 ```
      --bastion-interval duration         Time to wait between restarting bastions (default 15s)
      --cloudonly                         Perform rolling update without confirming progress with Kubernetes
      --exit-on-first-error               Exit on the first node or apiserver instancegroup error
      --control-plane-interval duration   Time to wait between restarting control plane nodes (default 15s)
      --drain-timeout duration            Maximum time to wait for a node to drain (default 15m0s)
      --fail-on-drain-error               Fail if draining a node fails (default true)
--- a/pkg/instancegroups/rollingupdate.go
+++ b/pkg/instancegroups/rollingupdate.go
@ -20,6 +20,7 @@ import (
 	"context"
 	"fmt"
 	"sort"
 	"strings"
 	"sync"
 	"time"
@ -82,11 +83,6 @@ type RollingUpdateCluster struct {
 	// DrainTimeout is the maximum amount of time to wait while draining a node.
 	DrainTimeout time.Duration
 	// ExitOnFirstError ensures the rolling update stops on the first error returned by any
 	// node or apiserver instancegroup. The default is `false` which will try to roll every instance
 	// group in serial and then return any errors.
 	ExitOnFirstError bool
 	// Options holds user-specified options
 	Options RollingUpdateOptions
 }
@ -192,7 +188,7 @@ func (c *RollingUpdateCluster) RollingUpdate(groups map[string]*cloudinstances.C
 		for _, k := range sortGroups(apiServerGroups) {
 			err := c.rollingUpdateInstanceGroup(apiServerGroups[k], c.NodeInterval)
-			if err != nil && c.ExitOnFirstError {
+			if err != nil && exitableError(err) {
 				return err
 			}
@ -214,7 +210,7 @@ func (c *RollingUpdateCluster) RollingUpdate(groups map[string]*cloudinstances.C
 		for _, k := range sortGroups(nodeGroups) {
 			err := c.rollingUpdateInstanceGroup(nodeGroups[k], c.NodeInterval)
-			if err != nil && c.ExitOnFirstError {
+			if err != nil && exitableError(err) {
 				return err
 			}
@ -241,3 +237,13 @@ func sortGroups(groupMap map[string]*cloudinstances.CloudInstanceGroup) []string
 	sort.Strings(groups)
 	return groups
 }
 // exitableError inspects an error to determine if the error is
 // fatal enough that the rolling update cannot continue.
 //
 // For example, if a cluster is unable to be validated by the deadline, then it
 // is unlikely that it will validate on the next instance roll, so an early exit as a
 // warning to the user is more appropriate.
 func exitableError(err error) bool {
 	return strings.HasPrefix(err.Error(), "error validating cluster")
 }
--- a/pkg/instancegroups/rollingupdate_test.go
+++ b/pkg/instancegroups/rollingupdate_test.go
@ -562,22 +562,20 @@ func TestRollingUpdateValidationErrorInstanceGroupNil(t *testing.T) {
 	assertGroupInstanceCount(t, cloud, "bastion-1", 1)
 }
-func TestRollingUpdateValidationErrorInstanceGroupExitFirstFailure(t *testing.T) {
+func TestRollingUpdateValidationErrorInstanceGroupExitableError(t *testing.T) {
 	c, cloud := getTestSetup()
 	groups := make(map[string]*cloudinstances.CloudInstanceGroup)
 	makeGroup(groups, c.K8sClient, cloud, "node-1", kopsapi.InstanceGroupRoleNode, 3, 3)
 	makeGroup(groups, c.K8sClient, cloud, "node-2", kopsapi.InstanceGroupRoleNode, 3, 3)
 	makeGroup(groups, c.K8sClient, cloud, "node-3", kopsapi.InstanceGroupRoleNode, 3, 3)
-	makeGroup(groups, c.K8sClient, cloud, "master-1", kopsapi.InstanceGroupRoleMaster, 2, 0)
+	makeGroup(groups, c.K8sClient, cloud, "master-1", kopsapi.InstanceGroupRoleControlPlane, 2, 0)
 	makeGroup(groups, c.K8sClient, cloud, "bastion-1", kopsapi.InstanceGroupRoleBastion, 1, 0)
 	c.ClusterValidator = &instanceGroupNodeSpecificErrorClusterValidator{
 		InstanceGroup: groups["node-2"].InstanceGroup,
 	}
 	c.ExitOnFirstError = true
 	err := c.RollingUpdate(groups, &kopsapi.InstanceGroupList{})
 	assert.Error(t, err, "rolling update")