Address review comments

This commit is contained in:
John Gardiner Myers 2020-01-26 10:46:03 -08:00
parent 5f72d12132
commit d56ad41334
1 changed files with 26 additions and 25 deletions

View File

@ -151,7 +151,7 @@ func (r *RollingUpdateInstanceGroup) RollingUpdate(rollingUpdateData *RollingUpd
settings := resolveSettings(cluster, r.CloudGroup.InstanceGroup, numInstances) settings := resolveSettings(cluster, r.CloudGroup.InstanceGroup, numInstances)
concurrency := 0 runningDrains := 0
maxConcurrency := settings.MaxUnavailable.IntValue() maxConcurrency := settings.MaxUnavailable.IntValue()
if maxConcurrency == 0 { if maxConcurrency == 0 {
@ -166,24 +166,26 @@ func (r *RollingUpdateInstanceGroup) RollingUpdate(rollingUpdateData *RollingUpd
terminateChan := make(chan error, maxConcurrency) terminateChan := make(chan error, maxConcurrency)
for uIdx, u := range update { for uIdx, u := range update {
go r.drainTerminateAndWait(u, rollingUpdateData, terminateChan, isBastion, sleepAfterTerminate) go func(m *cloudinstances.CloudInstanceGroupMember) {
concurrency++ terminateChan <- r.drainTerminateAndWait(m, rollingUpdateData, isBastion, sleepAfterTerminate)
}(u)
runningDrains++
// Wait until after one node is deleted and its replacement validates before the concurrent draining // Wait until after one node is deleted and its replacement validates before the concurrent draining
// in case the current spec does not result in usable nodes. // in case the current spec does not result in usable nodes.
if concurrency < maxConcurrency && (!noneReady || uIdx > 0) { if runningDrains < maxConcurrency && (!noneReady || uIdx > 0) {
continue continue
} }
err = <-terminateChan err = <-terminateChan
concurrency-- runningDrains--
if err != nil { if err != nil {
return waitForPendingBeforeReturningError(concurrency, terminateChan, err) return waitForPendingBeforeReturningError(runningDrains, terminateChan, err)
} }
err = r.maybeValidate(rollingUpdateData, validationTimeout) err = r.maybeValidate(rollingUpdateData, validationTimeout)
if err != nil { if err != nil {
return waitForPendingBeforeReturningError(concurrency, terminateChan, err) return waitForPendingBeforeReturningError(runningDrains, terminateChan, err)
} }
if rollingUpdateData.Interactive { if rollingUpdateData.Interactive {
@ -202,13 +204,15 @@ func (r *RollingUpdateInstanceGroup) RollingUpdate(rollingUpdateData *RollingUpd
} }
} }
// Validation tends to return failures from the start of drain until the replacement is
// fully ready, so sweep up as many completions as we can before starting the next drain.
sweep: sweep:
for concurrency > 0 { for runningDrains > 0 {
select { select {
case err = <-terminateChan: case err = <-terminateChan:
concurrency-- runningDrains--
if err != nil { if err != nil {
return waitForPendingBeforeReturningError(concurrency, terminateChan, err) return waitForPendingBeforeReturningError(runningDrains, terminateChan, err)
} }
default: default:
break sweep break sweep
@ -216,12 +220,12 @@ func (r *RollingUpdateInstanceGroup) RollingUpdate(rollingUpdateData *RollingUpd
} }
} }
if concurrency > 0 { if runningDrains > 0 {
for concurrency > 0 { for runningDrains > 0 {
err = <-terminateChan err = <-terminateChan
concurrency-- runningDrains--
if err != nil { if err != nil {
return waitForPendingBeforeReturningError(concurrency, terminateChan, err) return waitForPendingBeforeReturningError(runningDrains, terminateChan, err)
} }
} }
@ -234,10 +238,10 @@ func (r *RollingUpdateInstanceGroup) RollingUpdate(rollingUpdateData *RollingUpd
return nil return nil
} }
func waitForPendingBeforeReturningError(concurrency int, terminateChan chan error, err error) error { func waitForPendingBeforeReturningError(runningDrains int, terminateChan chan error, err error) error {
for concurrency > 0 { for runningDrains > 0 {
<-terminateChan <-terminateChan
concurrency-- runningDrains--
} }
return err return err
} }
@ -300,7 +304,7 @@ func (r *RollingUpdateInstanceGroup) patchTaint(rollingUpdateData *RollingUpdate
return err return err
} }
func (r *RollingUpdateInstanceGroup) drainTerminateAndWait(u *cloudinstances.CloudInstanceGroupMember, rollingUpdateData *RollingUpdateCluster, terminateChan chan error, isBastion bool, sleepAfterTerminate time.Duration) { func (r *RollingUpdateInstanceGroup) drainTerminateAndWait(u *cloudinstances.CloudInstanceGroupMember, rollingUpdateData *RollingUpdateCluster, isBastion bool, sleepAfterTerminate time.Duration) error {
instanceId := u.ID instanceId := u.ID
nodeName := "" nodeName := ""
@ -321,8 +325,7 @@ func (r *RollingUpdateInstanceGroup) drainTerminateAndWait(u *cloudinstances.Clo
if err := r.DrainNode(u, rollingUpdateData); err != nil { if err := r.DrainNode(u, rollingUpdateData); err != nil {
if rollingUpdateData.FailOnDrainError { if rollingUpdateData.FailOnDrainError {
terminateChan <- fmt.Errorf("failed to drain node %q: %v", nodeName, err) return fmt.Errorf("failed to drain node %q: %v", nodeName, err)
return
} }
klog.Infof("Ignoring error draining node %q: %v", nodeName, err) klog.Infof("Ignoring error draining node %q: %v", nodeName, err)
} }
@ -339,23 +342,21 @@ func (r *RollingUpdateInstanceGroup) drainTerminateAndWait(u *cloudinstances.Clo
} else { } else {
klog.Infof("deleting node %q from kubernetes", nodeName) klog.Infof("deleting node %q from kubernetes", nodeName)
if err := r.deleteNode(u.Node, rollingUpdateData); err != nil { if err := r.deleteNode(u.Node, rollingUpdateData); err != nil {
terminateChan <- fmt.Errorf("error deleting node %q: %v", nodeName, err) return fmt.Errorf("error deleting node %q: %v", nodeName, err)
return
} }
} }
} }
if err := r.DeleteInstance(u); err != nil { if err := r.DeleteInstance(u); err != nil {
klog.Errorf("error deleting instance %q, node %q: %v", instanceId, nodeName, err) klog.Errorf("error deleting instance %q, node %q: %v", instanceId, nodeName, err)
terminateChan <- err return err
return
} }
// Wait for the minimum interval // Wait for the minimum interval
klog.Infof("waiting for %v after terminating instance", sleepAfterTerminate) klog.Infof("waiting for %v after terminating instance", sleepAfterTerminate)
time.Sleep(sleepAfterTerminate) time.Sleep(sleepAfterTerminate)
terminateChan <- nil return nil
} }
func (r *RollingUpdateInstanceGroup) maybeValidate(rollingUpdateData *RollingUpdateCluster, validationTimeout time.Duration) error { func (r *RollingUpdateInstanceGroup) maybeValidate(rollingUpdateData *RollingUpdateCluster, validationTimeout time.Duration) error {