fix deployment replicas syncer in case that status.replicas haven't been collected
Signed-off-by: chaosi-zju <chaosi@zju.edu.cn>
This commit is contained in:
parent
dbb82bebd2
commit
31d6855e63
|
@ -18,7 +18,8 @@ package deploymentreplicassyncer
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
|
@ -38,6 +39,8 @@ import (
|
|||
const (
|
||||
// ControllerName is the controller name that will be used when reporting events.
|
||||
ControllerName = "deployment-replicas-syncer"
|
||||
|
||||
waitDeploymentStatusInterval = 1 * time.Second
|
||||
)
|
||||
|
||||
// DeploymentReplicasSyncer is to sync deployment replicas from status field to spec field.
|
||||
|
@ -98,6 +101,7 @@ func (r *DeploymentReplicasSyncer) Reconcile(ctx context.Context, req controller
|
|||
binding := &workv1alpha2.ResourceBinding{}
|
||||
bindingName := names.GenerateBindingName(util.DeploymentKind, req.Name)
|
||||
|
||||
// 1. get latest binding
|
||||
if err := r.Client.Get(ctx, client.ObjectKey{Namespace: req.Namespace, Name: bindingName}, binding); err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
klog.Infof("no need to update deployment replicas for binding not found")
|
||||
|
@ -106,11 +110,12 @@ func (r *DeploymentReplicasSyncer) Reconcile(ctx context.Context, req controller
|
|||
return controllerruntime.Result{}, err
|
||||
}
|
||||
|
||||
// if it is not divided schedule type, no need to update replicas
|
||||
// 2. if it is not divided schedule type, no need to update replicas
|
||||
if binding.Spec.Placement.ReplicaSchedulingType() != policyv1alpha1.ReplicaSchedulingTypeDivided {
|
||||
return controllerruntime.Result{}, nil
|
||||
}
|
||||
|
||||
// 3. get latest deployment
|
||||
if err := r.Client.Get(ctx, req.NamespacedName, deployment); err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
klog.Infof("no need to update deployment replicas for deployment not found")
|
||||
|
@ -119,39 +124,18 @@ func (r *DeploymentReplicasSyncer) Reconcile(ctx context.Context, req controller
|
|||
return controllerruntime.Result{}, err
|
||||
}
|
||||
|
||||
// if replicas in spec already the same as in status, no need to update replicas
|
||||
// 4. if replicas in spec already the same as in status, no need to update replicas
|
||||
if deployment.Spec.Replicas != nil && *deployment.Spec.Replicas == deployment.Status.Replicas {
|
||||
klog.Infof("replicas in spec field (%d) already equal to in status field (%d)", *deployment.Spec.Replicas, deployment.Status.Replicas)
|
||||
return controllerruntime.Result{}, nil
|
||||
}
|
||||
|
||||
// make sure the replicas change in deployment.spec can sync to binding.spec, otherwise retry
|
||||
if deployment.Spec.Replicas == nil || *deployment.Spec.Replicas != binding.Spec.Replicas {
|
||||
klog.V(4).Infof("wait until replicas of binding (%d) equal to replicas of deployment (%d)",
|
||||
binding.Spec.Replicas, *deployment.Spec.Replicas)
|
||||
return controllerruntime.Result{}, fmt.Errorf("retry to wait replicas change sync to binding")
|
||||
// 5. judge the deployment modification in spec has taken effect and its status has been collected, otherwise retry.
|
||||
if !isDeploymentStatusCollected(deployment, binding) {
|
||||
return controllerruntime.Result{RequeueAfter: waitDeploymentStatusInterval}, nil
|
||||
}
|
||||
|
||||
// make sure the scheduler observed generation equal to generation in binding, otherwise retry
|
||||
if binding.Generation != binding.Status.SchedulerObservedGeneration {
|
||||
klog.V(4).Infof("wait until scheduler observed generation (%d) equal to generation in binding (%d)",
|
||||
binding.Status.SchedulerObservedGeneration, binding.Generation)
|
||||
return controllerruntime.Result{}, fmt.Errorf("retry to wait scheduler observed generation")
|
||||
}
|
||||
|
||||
if len(binding.Status.AggregatedStatus) != len(binding.Spec.Clusters) {
|
||||
klog.V(4).Infof("wait until all clusters status collected, got: %d, expected: %d",
|
||||
len(binding.Status.AggregatedStatus), len(binding.Spec.Clusters))
|
||||
return controllerruntime.Result{}, fmt.Errorf("retry to wait status in binding collected")
|
||||
}
|
||||
for _, status := range binding.Status.AggregatedStatus {
|
||||
if status.Status == nil {
|
||||
klog.V(4).Infof("wait until aggregated status of cluster %s collected", status.ClusterName)
|
||||
return controllerruntime.Result{}, fmt.Errorf("retry to wait status in binding collected")
|
||||
}
|
||||
}
|
||||
|
||||
// update replicas
|
||||
// 6. update spec replicas with status replicas
|
||||
oldReplicas := *deployment.Spec.Replicas
|
||||
deployment.Spec.Replicas = &deployment.Status.Replicas
|
||||
if err := r.Client.Update(ctx, deployment); err != nil {
|
||||
|
@ -159,8 +143,64 @@ func (r *DeploymentReplicasSyncer) Reconcile(ctx context.Context, req controller
|
|||
return controllerruntime.Result{}, err
|
||||
}
|
||||
|
||||
klog.Infof("successfully udpate deployment (%s/%s) replicas from %d to %d", deployment.Namespace,
|
||||
deployment.Namespace, oldReplicas, deployment.Status.Replicas)
|
||||
klog.Infof("successfully update deployment (%s/%s) replicas from %d to %d", deployment.Namespace,
|
||||
deployment.Name, oldReplicas, deployment.Status.Replicas)
|
||||
|
||||
return controllerruntime.Result{}, nil
|
||||
}
|
||||
|
||||
// isDeploymentStatusCollected judge whether deployment modification in spec has taken effect and its status has been collected.
|
||||
func isDeploymentStatusCollected(deployment *appsv1.Deployment, binding *workv1alpha2.ResourceBinding) bool {
|
||||
// make sure the replicas change in deployment.spec can sync to binding.spec, otherwise retry
|
||||
if deployment.Spec.Replicas == nil || *deployment.Spec.Replicas != binding.Spec.Replicas {
|
||||
klog.V(4).Infof("wait until replicas of binding (%d) equal to replicas of deployment (%d)",
|
||||
binding.Spec.Replicas, *deployment.Spec.Replicas)
|
||||
return false
|
||||
}
|
||||
|
||||
// make sure the scheduler observed generation equal to generation in binding, otherwise retry
|
||||
if binding.Generation != binding.Status.SchedulerObservedGeneration {
|
||||
klog.V(4).Infof("wait until scheduler observed generation (%d) equal to generation in binding (%d)",
|
||||
binding.Status.SchedulerObservedGeneration, binding.Generation)
|
||||
return false
|
||||
}
|
||||
|
||||
// make sure the number of aggregated status in binding is as expected.
|
||||
if len(binding.Status.AggregatedStatus) != len(binding.Spec.Clusters) {
|
||||
klog.V(4).Infof("wait until all clusters status collected, got: %d, expected: %d",
|
||||
len(binding.Status.AggregatedStatus), len(binding.Spec.Clusters))
|
||||
return false
|
||||
}
|
||||
|
||||
// make sure the status.replicas in binding has been collected from work to binding.
|
||||
bindingStatusSumReplicas := int32(0)
|
||||
for _, status := range binding.Status.AggregatedStatus {
|
||||
if status.Status == nil {
|
||||
klog.V(4).Infof("wait until aggregated status of cluster %s collected", status.ClusterName)
|
||||
return false
|
||||
}
|
||||
itemStatus := &appsv1.DeploymentStatus{}
|
||||
if err := json.Unmarshal(status.Status.Raw, itemStatus); err != nil {
|
||||
klog.Errorf("unmarshal status.raw of cluster %s in binding failed: %+v", status.ClusterName, err)
|
||||
return false
|
||||
}
|
||||
// if member cluster deployment is controlled by hpa, its status.replicas must > 0 (since hpa.minReplicas > 0),
|
||||
// so, if its status replicas is 0, means the status haven't been collected from member cluster, and needs waiting.
|
||||
if itemStatus.Replicas <= 0 {
|
||||
klog.V(4).Infof("wait until aggregated status replicas of cluster %s collected", status.ClusterName)
|
||||
return false
|
||||
}
|
||||
bindingStatusSumReplicas += itemStatus.Replicas
|
||||
}
|
||||
|
||||
// make sure the latest collected status.replicas is synced from binding to template.
|
||||
// e.g: user set deployment.spec.replicas = 2, then sum replicas in binding.status becomes 2, however, if now
|
||||
// deployment.status.replicas is still 1, we shouldn't update spec.replicas to 1 until status.replicas becomes 2.
|
||||
if deployment.Status.Replicas != bindingStatusSumReplicas {
|
||||
klog.V(4).Infof("wait until deployment status replicas (%d) equal to binding status replicas (%d)",
|
||||
deployment.Status.Replicas, bindingStatusSumReplicas)
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||
package native
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
|
@ -74,7 +75,18 @@ func reflectDeploymentStatus(object *unstructured.Unstructured) (*runtime.RawExt
|
|||
AvailableReplicas: deploymentStatus.AvailableReplicas,
|
||||
UnavailableReplicas: deploymentStatus.UnavailableReplicas,
|
||||
}
|
||||
return helper.BuildStatusRawExtension(grabStatus)
|
||||
|
||||
grabStatusRaw, err := helper.BuildStatusRawExtension(grabStatus)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// if status is empty struct, it actually means status haven't been collected from member cluster.
|
||||
if bytes.Equal(grabStatusRaw.Raw, []byte("{}")) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return grabStatusRaw, nil
|
||||
}
|
||||
|
||||
func reflectServiceStatus(object *unstructured.Unstructured) (*runtime.RawExtension, error) {
|
||||
|
|
Loading…
Reference in New Issue