mirror of https://github.com/kubernetes/kops.git
Remove optionality and exit when specific error prefix is matched
Signed-off-by: Jack Andersen <jandersen@plaid.com>
This commit is contained in:
parent
f9ea9b3ef8
commit
6efd68f428
|
|
@ -107,10 +107,6 @@ type RollingUpdateOptions struct {
|
||||||
// does not validate, after a validation period.
|
// does not validate, after a validation period.
|
||||||
FailOnValidate bool
|
FailOnValidate bool
|
||||||
|
|
||||||
// ExitOnFirstError exits the rolling update when a single instancegroup's
|
|
||||||
// rolling update experiences an error instead of retrying all instancegroups.
|
|
||||||
ExitOnFirstError bool
|
|
||||||
|
|
||||||
// DrainTimeout is the maximum time to wait while draining a node.
|
// DrainTimeout is the maximum time to wait while draining a node.
|
||||||
DrainTimeout time.Duration
|
DrainTimeout time.Duration
|
||||||
|
|
||||||
|
|
@ -155,7 +151,6 @@ func (o *RollingUpdateOptions) InitDefaults() {
|
||||||
o.CloudOnly = false
|
o.CloudOnly = false
|
||||||
o.FailOnDrainError = false
|
o.FailOnDrainError = false
|
||||||
o.FailOnValidate = true
|
o.FailOnValidate = true
|
||||||
o.ExitOnFirstError = false
|
|
||||||
|
|
||||||
o.ControlPlaneInterval = 15 * time.Second
|
o.ControlPlaneInterval = 15 * time.Second
|
||||||
o.NodeInterval = 15 * time.Second
|
o.NodeInterval = 15 * time.Second
|
||||||
|
|
@ -213,7 +208,6 @@ func NewCmdRollingUpdateCluster(f *util.Factory, out io.Writer) *cobra.Command {
|
||||||
|
|
||||||
cmd.Flags().BoolVar(&options.FailOnDrainError, "fail-on-drain-error", true, "Fail if draining a node fails")
|
cmd.Flags().BoolVar(&options.FailOnDrainError, "fail-on-drain-error", true, "Fail if draining a node fails")
|
||||||
cmd.Flags().BoolVar(&options.FailOnValidate, "fail-on-validate-error", true, "Fail if the cluster fails to validate")
|
cmd.Flags().BoolVar(&options.FailOnValidate, "fail-on-validate-error", true, "Fail if the cluster fails to validate")
|
||||||
cmd.Flags().BoolVar(&options.ExitOnFirstError, "exit-on-first-error", false, "Exit on the first node or apiserver instancegroup error")
|
|
||||||
|
|
||||||
cmd.Flags().SetNormalizeFunc(func(f *pflag.FlagSet, name string) pflag.NormalizedName {
|
cmd.Flags().SetNormalizeFunc(func(f *pflag.FlagSet, name string) pflag.NormalizedName {
|
||||||
switch name {
|
switch name {
|
||||||
|
|
@ -368,7 +362,6 @@ func RunRollingUpdateCluster(ctx context.Context, f *util.Factory, out io.Writer
|
||||||
ValidationTimeout: options.ValidationTimeout,
|
ValidationTimeout: options.ValidationTimeout,
|
||||||
ValidateCount: int(options.ValidateCount),
|
ValidateCount: int(options.ValidateCount),
|
||||||
DrainTimeout: options.DrainTimeout,
|
DrainTimeout: options.DrainTimeout,
|
||||||
ExitOnFirstError: options.ExitOnFirstError,
|
|
||||||
// TODO should we expose this to the UI?
|
// TODO should we expose this to the UI?
|
||||||
ValidateTickDuration: 30 * time.Second,
|
ValidateTickDuration: 30 * time.Second,
|
||||||
ValidateSuccessDuration: 10 * time.Second,
|
ValidateSuccessDuration: 10 * time.Second,
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,6 @@ kops rolling-update cluster [CLUSTER] [flags]
|
||||||
```
|
```
|
||||||
--bastion-interval duration Time to wait between restarting bastions (default 15s)
|
--bastion-interval duration Time to wait between restarting bastions (default 15s)
|
||||||
--cloudonly Perform rolling update without confirming progress with Kubernetes
|
--cloudonly Perform rolling update without confirming progress with Kubernetes
|
||||||
--exit-on-first-error Exit on the first node or apiserver instancegroup error
|
|
||||||
--control-plane-interval duration Time to wait between restarting control plane nodes (default 15s)
|
--control-plane-interval duration Time to wait between restarting control plane nodes (default 15s)
|
||||||
--drain-timeout duration Maximum time to wait for a node to drain (default 15m0s)
|
--drain-timeout duration Maximum time to wait for a node to drain (default 15m0s)
|
||||||
--fail-on-drain-error Fail if draining a node fails (default true)
|
--fail-on-drain-error Fail if draining a node fails (default true)
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
|
@ -82,11 +83,6 @@ type RollingUpdateCluster struct {
|
||||||
// DrainTimeout is the maximum amount of time to wait while draining a node.
|
// DrainTimeout is the maximum amount of time to wait while draining a node.
|
||||||
DrainTimeout time.Duration
|
DrainTimeout time.Duration
|
||||||
|
|
||||||
// ExitOnFirstError ensures the rolling update stops on the first error returned by any
|
|
||||||
// node or apiserver instancegroup. The default is `false` which will try to roll every instance
|
|
||||||
// group in serial and then return any errors.
|
|
||||||
ExitOnFirstError bool
|
|
||||||
|
|
||||||
// Options holds user-specified options
|
// Options holds user-specified options
|
||||||
Options RollingUpdateOptions
|
Options RollingUpdateOptions
|
||||||
}
|
}
|
||||||
|
|
@ -192,7 +188,7 @@ func (c *RollingUpdateCluster) RollingUpdate(groups map[string]*cloudinstances.C
|
||||||
|
|
||||||
for _, k := range sortGroups(apiServerGroups) {
|
for _, k := range sortGroups(apiServerGroups) {
|
||||||
err := c.rollingUpdateInstanceGroup(apiServerGroups[k], c.NodeInterval)
|
err := c.rollingUpdateInstanceGroup(apiServerGroups[k], c.NodeInterval)
|
||||||
if err != nil && c.ExitOnFirstError {
|
if err != nil && exitableError(err) {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -214,7 +210,7 @@ func (c *RollingUpdateCluster) RollingUpdate(groups map[string]*cloudinstances.C
|
||||||
|
|
||||||
for _, k := range sortGroups(nodeGroups) {
|
for _, k := range sortGroups(nodeGroups) {
|
||||||
err := c.rollingUpdateInstanceGroup(nodeGroups[k], c.NodeInterval)
|
err := c.rollingUpdateInstanceGroup(nodeGroups[k], c.NodeInterval)
|
||||||
if err != nil && c.ExitOnFirstError {
|
if err != nil && exitableError(err) {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -241,3 +237,13 @@ func sortGroups(groupMap map[string]*cloudinstances.CloudInstanceGroup) []string
|
||||||
sort.Strings(groups)
|
sort.Strings(groups)
|
||||||
return groups
|
return groups
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// exitableError inspects an error to determine if the error is
|
||||||
|
// fatal enough that the rolling update cannot continue.
|
||||||
|
//
|
||||||
|
// For example, if a cluster is unable to be validated by the deadline, then it
|
||||||
|
// is unlikely that it will validate on the next instance roll, so an early exit as a
|
||||||
|
// warning to the user is more appropriate.
|
||||||
|
func exitableError(err error) bool {
|
||||||
|
return strings.HasPrefix(err.Error(), "error validating cluster")
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -562,22 +562,20 @@ func TestRollingUpdateValidationErrorInstanceGroupNil(t *testing.T) {
|
||||||
assertGroupInstanceCount(t, cloud, "bastion-1", 1)
|
assertGroupInstanceCount(t, cloud, "bastion-1", 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRollingUpdateValidationErrorInstanceGroupExitFirstFailure(t *testing.T) {
|
func TestRollingUpdateValidationErrorInstanceGroupExitableError(t *testing.T) {
|
||||||
c, cloud := getTestSetup()
|
c, cloud := getTestSetup()
|
||||||
|
|
||||||
groups := make(map[string]*cloudinstances.CloudInstanceGroup)
|
groups := make(map[string]*cloudinstances.CloudInstanceGroup)
|
||||||
makeGroup(groups, c.K8sClient, cloud, "node-1", kopsapi.InstanceGroupRoleNode, 3, 3)
|
makeGroup(groups, c.K8sClient, cloud, "node-1", kopsapi.InstanceGroupRoleNode, 3, 3)
|
||||||
makeGroup(groups, c.K8sClient, cloud, "node-2", kopsapi.InstanceGroupRoleNode, 3, 3)
|
makeGroup(groups, c.K8sClient, cloud, "node-2", kopsapi.InstanceGroupRoleNode, 3, 3)
|
||||||
makeGroup(groups, c.K8sClient, cloud, "node-3", kopsapi.InstanceGroupRoleNode, 3, 3)
|
makeGroup(groups, c.K8sClient, cloud, "node-3", kopsapi.InstanceGroupRoleNode, 3, 3)
|
||||||
makeGroup(groups, c.K8sClient, cloud, "master-1", kopsapi.InstanceGroupRoleMaster, 2, 0)
|
makeGroup(groups, c.K8sClient, cloud, "master-1", kopsapi.InstanceGroupRoleControlPlane, 2, 0)
|
||||||
makeGroup(groups, c.K8sClient, cloud, "bastion-1", kopsapi.InstanceGroupRoleBastion, 1, 0)
|
makeGroup(groups, c.K8sClient, cloud, "bastion-1", kopsapi.InstanceGroupRoleBastion, 1, 0)
|
||||||
|
|
||||||
c.ClusterValidator = &instanceGroupNodeSpecificErrorClusterValidator{
|
c.ClusterValidator = &instanceGroupNodeSpecificErrorClusterValidator{
|
||||||
InstanceGroup: groups["node-2"].InstanceGroup,
|
InstanceGroup: groups["node-2"].InstanceGroup,
|
||||||
}
|
}
|
||||||
|
|
||||||
c.ExitOnFirstError = true
|
|
||||||
|
|
||||||
err := c.RollingUpdate(groups, &kopsapi.InstanceGroupList{})
|
err := c.RollingUpdate(groups, &kopsapi.InstanceGroupList{})
|
||||||
assert.Error(t, err, "rolling update")
|
assert.Error(t, err, "rolling update")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue