mirror of https://github.com/kubernetes/kops.git
validate cluster n times in rolling update
This commit is contained in:
parent
fde8ceaaa8
commit
e1e79790ef
|
|
@ -121,6 +121,9 @@ type RollingUpdateOptions struct {
|
|||
// ValidationTimeout is the timeout for validation to succeed after the drain and pause
|
||||
ValidationTimeout time.Duration
|
||||
|
||||
// ValidateTimes is the amount of time that a cluster needs to be validated after single node update
|
||||
ValidateTimes int32
|
||||
|
||||
// MasterInterval is the minimum time to wait after stopping a master node. This does not include drain and validate time.
|
||||
MasterInterval time.Duration
|
||||
|
||||
|
|
@ -158,6 +161,7 @@ func (o *RollingUpdateOptions) InitDefaults() {
|
|||
|
||||
o.PostDrainDelay = 5 * time.Second
|
||||
o.ValidationTimeout = 15 * time.Minute
|
||||
o.ValidateTimes = 2
|
||||
}
|
||||
|
||||
func NewCmdRollingUpdateCluster(f *util.Factory, out io.Writer) *cobra.Command {
|
||||
|
|
@ -177,6 +181,7 @@ func NewCmdRollingUpdateCluster(f *util.Factory, out io.Writer) *cobra.Command {
|
|||
cmd.Flags().BoolVar(&options.CloudOnly, "cloudonly", options.CloudOnly, "Perform rolling update without confirming progress with k8s")
|
||||
|
||||
cmd.Flags().DurationVar(&options.ValidationTimeout, "validation-timeout", options.ValidationTimeout, "Maximum time to wait for a cluster to validate")
|
||||
cmd.Flags().Int32Var(&options.ValidateTimes, "validate-times", options.ValidateTimes, "Amount of times that a cluster needs to be validated after single node update")
|
||||
cmd.Flags().DurationVar(&options.MasterInterval, "master-interval", options.MasterInterval, "Time to wait between restarting masters")
|
||||
cmd.Flags().DurationVar(&options.NodeInterval, "node-interval", options.NodeInterval, "Time to wait between restarting nodes")
|
||||
cmd.Flags().DurationVar(&options.BastionInterval, "bastion-interval", options.BastionInterval, "Time to wait between restarting bastions")
|
||||
|
|
@ -333,9 +338,10 @@ func RunRollingUpdateCluster(f *util.Factory, out io.Writer, options *RollingUpd
|
|||
ClusterName: options.ClusterName,
|
||||
PostDrainDelay: options.PostDrainDelay,
|
||||
ValidationTimeout: options.ValidationTimeout,
|
||||
ValidateTimes: options.ValidateTimes,
|
||||
ValidateSucceeded: 0,
|
||||
// TODO should we expose this to the UI?
|
||||
ValidateTickDuration: 30 * time.Second,
|
||||
ValidateSuccessDuration: 10 * time.Second,
|
||||
}
|
||||
|
||||
err = d.AdjustNeedUpdate(groups, cluster, list)
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ kops rolling-update cluster [flags]
|
|||
--master-interval duration Time to wait between restarting masters (default 15s)
|
||||
--node-interval duration Time to wait between restarting nodes (default 15s)
|
||||
--post-drain-delay duration Time to wait after draining each node (default 5s)
|
||||
--validate-times int32 Amount of times that a cluster needs to be validated after single node update (default 2)
|
||||
--validation-timeout duration Maximum time to wait for a cluster to validate (default 15m0s)
|
||||
-y, --yes Perform rolling update immediately, without --yes rolling-update executes a dry-run
|
||||
```
|
||||
|
|
|
|||
|
|
@ -430,10 +430,12 @@ func (c *RollingUpdateCluster) validateClusterWithDuration(duration time.Duratio
|
|||
func (c *RollingUpdateCluster) tryValidateCluster(duration time.Duration) bool {
|
||||
result, err := c.ClusterValidator.Validate()
|
||||
|
||||
if err == nil && len(result.Failures) == 0 && c.ValidateSuccessDuration > 0 {
|
||||
klog.Infof("Cluster validated; revalidating in %s to make sure it does not flap.", c.ValidateSuccessDuration)
|
||||
time.Sleep(c.ValidateSuccessDuration)
|
||||
result, err = c.ClusterValidator.Validate()
|
||||
if err == nil && len(result.Failures) == 0 && c.ValidateTimes > 0 {
|
||||
c.ValidateSucceeded++
|
||||
if c.ValidateSucceeded < c.ValidateTimes {
|
||||
klog.Infof("Cluster validated; revalidating %d time(s) to make sure it does not flap.", c.ValidateTimes-c.ValidateSucceeded)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -64,9 +64,11 @@ type RollingUpdateCluster struct {
|
|||
// ValidateTickDuration is the amount of time to wait between cluster validation attempts
|
||||
ValidateTickDuration time.Duration
|
||||
|
||||
// ValidateSuccessDuration is the amount of time a cluster must continue to validate successfully
|
||||
// before updating the next node
|
||||
ValidateSuccessDuration time.Duration
|
||||
// ValidateTimes is the amount of time that a cluster needs to be validated after single node update
|
||||
ValidateTimes int32
|
||||
|
||||
// ValidateSucceeded is the amount of times that a cluster validate is succeeded already
|
||||
ValidateSucceeded int32
|
||||
}
|
||||
|
||||
// AdjustNeedUpdate adjusts the set of instances that need updating, using factors outside those known by the cloud implementation
|
||||
|
|
|
|||
|
|
@ -68,7 +68,8 @@ func getTestSetup() (*RollingUpdateCluster, *awsup.MockAWSCloud, *kopsapi.Cluste
|
|||
ClusterValidator: &successfulClusterValidator{},
|
||||
FailOnValidate: true,
|
||||
ValidateTickDuration: 1 * time.Millisecond,
|
||||
ValidateSuccessDuration: 5 * time.Millisecond,
|
||||
ValidateTimes: 1,
|
||||
ValidateSucceeded: 0,
|
||||
}
|
||||
|
||||
return c, mockcloud, cluster
|
||||
|
|
@ -511,6 +512,7 @@ func (v *flappingClusterValidator) Validate() (*validation.ValidationCluster, er
|
|||
|
||||
func TestRollingUpdateFlappingValidation(t *testing.T) {
|
||||
c, cloud, cluster := getTestSetup()
|
||||
c.ValidateTimes = 3
|
||||
|
||||
// This should only take a few milliseconds,
|
||||
// but we have to pad to allow for random delays (e.g. GC)
|
||||
|
|
@ -977,7 +979,6 @@ func TestRollingUpdateMaxUnavailableAllNeedUpdate(t *testing.T) {
|
|||
c, cloud, cluster := getTestSetup()
|
||||
|
||||
concurrentTest := newConcurrentTest(t, cloud, 0, true)
|
||||
c.ValidateSuccessDuration = 0
|
||||
c.ClusterValidator = concurrentTest
|
||||
cloud.MockEC2 = concurrentTest
|
||||
|
||||
|
|
@ -1000,7 +1001,6 @@ func TestRollingUpdateMaxUnavailableAllButOneNeedUpdate(t *testing.T) {
|
|||
c, cloud, cluster := getTestSetup()
|
||||
|
||||
concurrentTest := newConcurrentTest(t, cloud, 0, false)
|
||||
c.ValidateSuccessDuration = 0
|
||||
c.ClusterValidator = concurrentTest
|
||||
cloud.MockEC2 = concurrentTest
|
||||
|
||||
|
|
@ -1022,7 +1022,6 @@ func TestRollingUpdateMaxUnavailableAllNeedUpdateMaster(t *testing.T) {
|
|||
c, cloud, cluster := getTestSetup()
|
||||
|
||||
concurrentTest := newConcurrentTest(t, cloud, 0, true)
|
||||
c.ValidateSuccessDuration = 0
|
||||
c.ClusterValidator = concurrentTest
|
||||
cloud.MockEC2 = concurrentTest
|
||||
|
||||
|
|
@ -1074,7 +1073,6 @@ func TestRollingUpdateMaxSurgeAllNeedUpdate(t *testing.T) {
|
|||
c, cloud, cluster := getTestSetup()
|
||||
|
||||
concurrentTest := newConcurrentTest(t, cloud, 2, true)
|
||||
c.ValidateSuccessDuration = 0
|
||||
c.ClusterValidator = concurrentTest
|
||||
cloud.MockAutoscaling = &concurrentTestAutoscaling{
|
||||
AutoScalingAPI: cloud.MockAutoscaling,
|
||||
|
|
@ -1101,7 +1099,6 @@ func TestRollingUpdateMaxSurgeAllButOneNeedUpdate(t *testing.T) {
|
|||
c, cloud, cluster := getTestSetup()
|
||||
|
||||
concurrentTest := newConcurrentTest(t, cloud, 2, false)
|
||||
c.ValidateSuccessDuration = 0
|
||||
c.ClusterValidator = concurrentTest
|
||||
cloud.MockAutoscaling = &concurrentTestAutoscaling{
|
||||
AutoScalingAPI: cloud.MockAutoscaling,
|
||||
|
|
@ -1248,7 +1245,6 @@ func TestRollingUpdateMaxSurgeAllNeedUpdateOneAlreadyDetached(t *testing.T) {
|
|||
detached: map[string]bool{},
|
||||
}
|
||||
|
||||
c.ValidateSuccessDuration = 0
|
||||
c.ClusterValidator = alreadyDetachedTest
|
||||
cloud.MockAutoscaling = &alreadyDetachedTestAutoscaling{
|
||||
AutoScalingAPI: cloud.MockAutoscaling,
|
||||
|
|
@ -1277,7 +1273,6 @@ func TestRollingUpdateMaxSurgeAllNeedUpdateMaxAlreadyDetached(t *testing.T) {
|
|||
|
||||
// Should behave the same as TestRollingUpdateMaxUnavailableAllNeedUpdate
|
||||
concurrentTest := newConcurrentTest(t, cloud, 0, true)
|
||||
c.ValidateSuccessDuration = 0
|
||||
c.ClusterValidator = concurrentTest
|
||||
cloud.MockEC2 = concurrentTest
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue