Advanced deployment scale down old unhealthy pods firstly (#150)
* advanced deployment scale down old unhealthy pods firstly Signed-off-by: mingzhou.swx <mingzhou.swx@alibaba-inc.com> * add e2e for advanced deployment scale down old unhealthy pod first Signed-off-by: mingzhou.swx <mingzhou.swx@alibaba-inc.com> --------- Signed-off-by: mingzhou.swx <mingzhou.swx@alibaba-inc.com> Co-authored-by: mingzhou.swx <mingzhou.swx@alibaba-inc.com>
This commit is contained in:
		
							parent
							
								
									e99c529795
								
							
						
					
					
						commit
						72e1c0b936
					
				| 
						 | 
				
			
			@ -302,6 +302,30 @@ func (dc *DeploymentController) scale(ctx context.Context, deployment *apps.Depl
 | 
			
		|||
		// replica sets.
 | 
			
		||||
		deploymentReplicasToAdd := allowedSize - allRSsReplicas
 | 
			
		||||
 | 
			
		||||
		// Scale down the unhealthy replicas in old replica sets firstly to avoid some bad cases.
 | 
			
		||||
		// For example:
 | 
			
		||||
		//       _______________________________________________________________________________________
 | 
			
		||||
		//       | ReplicaSet    |       oldRS-1      |   oldRS-2            |          newRS           |
 | 
			
		||||
		//       | --------------| -------------------|----------------------|--------------------------|
 | 
			
		||||
		//       | Replicas      |  4 healthy Pods    |  2 unhealthy Pods    |    4 unhealthy Pods      |
 | 
			
		||||
		//       ---------------------------------------------------------------------------------------
 | 
			
		||||
		// If we want to scale down these replica sets from 10 to 6, we expect to scale down the oldRS-2
 | 
			
		||||
		// from 2 to 0 firstly, then scale down oldRS-1 1 Pod and newRS 1 Pod based on proportion.
 | 
			
		||||
		//
 | 
			
		||||
		// We do not scale down the newRS unhealthy Pods with higher priority, because these new revision
 | 
			
		||||
		// Pods may be just created, not the one with the crash or other problems.
 | 
			
		||||
		var err error
 | 
			
		||||
		var cleanupCount int32
 | 
			
		||||
		if deploymentReplicasToAdd < 0 {
 | 
			
		||||
			oldRSs, cleanupCount, err = dc.cleanupUnhealthyReplicas(ctx, oldRSs, deployment, -deploymentReplicasToAdd)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
			klog.V(4).Infof("Cleaned up unhealthy replicas from old RSes by %d during scaling", cleanupCount)
 | 
			
		||||
			deploymentReplicasToAdd += cleanupCount
 | 
			
		||||
			allRSs = deploymentutil.FilterActiveReplicaSets(append(oldRSs, newRS))
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// The additional replicas should be distributed proportionally amongst the active
 | 
			
		||||
		// replica sets from the larger to the smaller in size replica set. Scaling direction
 | 
			
		||||
		// drives what happens in case we are trying to scale replica sets of the same size.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -44,7 +44,7 @@ var _ = SIGDescribe("Advanced Deployment", func() {
 | 
			
		|||
		return k8sClient.Get(context.TODO(), key, object)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	UpdateDeployment := func(deployment *apps.Deployment, version string) *apps.Deployment {
 | 
			
		||||
	UpdateDeployment := func(deployment *apps.Deployment, version string, images ...string) *apps.Deployment {
 | 
			
		||||
		By(fmt.Sprintf("update deployment %v to version: %v", client.ObjectKeyFromObject(deployment), version))
 | 
			
		||||
		var clone *apps.Deployment
 | 
			
		||||
		Expect(retry.RetryOnConflict(defaultRetry, func() error {
 | 
			
		||||
| 
						 | 
				
			
			@ -53,7 +53,11 @@ var _ = SIGDescribe("Advanced Deployment", func() {
 | 
			
		|||
			if err != nil {
 | 
			
		||||
				return err
 | 
			
		||||
			}
 | 
			
		||||
			clone.Spec.Template.Spec.Containers[0].Image = deployment.Spec.Template.Spec.Containers[0].Image
 | 
			
		||||
			if len(images) == 0 {
 | 
			
		||||
				clone.Spec.Template.Spec.Containers[0].Image = deployment.Spec.Template.Spec.Containers[0].Image
 | 
			
		||||
			} else {
 | 
			
		||||
				clone.Spec.Template.Spec.Containers[0].Image = images[0]
 | 
			
		||||
			}
 | 
			
		||||
			clone.Spec.Template.Spec.Containers[0].Env[0].Value = version
 | 
			
		||||
			strategy := unmarshal(clone.Annotations[rolloutsv1alpha1.DeploymentStrategyAnnotation])
 | 
			
		||||
			strategy.Paused = true
 | 
			
		||||
| 
						 | 
				
			
			@ -324,6 +328,19 @@ var _ = SIGDescribe("Advanced Deployment", func() {
 | 
			
		|||
			UpdatePartitionWithCheck(deployment, intstr.FromInt(3))
 | 
			
		||||
			UpdatePartitionWithCheck(deployment, intstr.FromInt(5))
 | 
			
		||||
		})
 | 
			
		||||
 | 
			
		||||
		It("scale down old unhealthy first", func() {
 | 
			
		||||
			deployment := &apps.Deployment{}
 | 
			
		||||
			deployment.Namespace = namespace
 | 
			
		||||
			Expect(ReadYamlToObject("./test_data/deployment/deployment.yaml", deployment)).ToNot(HaveOccurred())
 | 
			
		||||
			deployment.Annotations["rollouts.kruise.io/deployment-strategy"] = `{"rollingUpdate":{"maxUnavailable":0,"maxSurge":1}}`
 | 
			
		||||
			CreateObject(deployment)
 | 
			
		||||
			UpdateDeployment(deployment, "version2", "busybox:not-exists")
 | 
			
		||||
			UpdatePartitionWithoutCheck(deployment, intstr.FromInt(1))
 | 
			
		||||
			CheckReplicas(deployment, 6, 5, 1)
 | 
			
		||||
			UpdateDeployment(deployment, "version3", "busybox:1.32")
 | 
			
		||||
			CheckReplicas(deployment, 5, 5, 0)
 | 
			
		||||
		})
 | 
			
		||||
	})
 | 
			
		||||
})
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue