diff --git a/pkg/controllers/deploymentreplicassyncer/deployment_replicas_syncer_controller.go b/pkg/controllers/deploymentreplicassyncer/deployment_replicas_syncer_controller.go index 04f09d804..0ba81d5e4 100644 --- a/pkg/controllers/deploymentreplicassyncer/deployment_replicas_syncer_controller.go +++ b/pkg/controllers/deploymentreplicassyncer/deployment_replicas_syncer_controller.go @@ -18,7 +18,8 @@ package deploymentreplicassyncer import ( "context" - "fmt" + "encoding/json" + "time" appsv1 "k8s.io/api/apps/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -38,6 +39,8 @@ import ( const ( // ControllerName is the controller name that will be used when reporting events. ControllerName = "deployment-replicas-syncer" + + waitDeploymentStatusInterval = 1 * time.Second ) // DeploymentReplicasSyncer is to sync deployment replicas from status field to spec field. @@ -98,6 +101,7 @@ func (r *DeploymentReplicasSyncer) Reconcile(ctx context.Context, req controller binding := &workv1alpha2.ResourceBinding{} bindingName := names.GenerateBindingName(util.DeploymentKind, req.Name) + // 1. get latest binding if err := r.Client.Get(ctx, client.ObjectKey{Namespace: req.Namespace, Name: bindingName}, binding); err != nil { if apierrors.IsNotFound(err) { klog.Infof("no need to update deployment replicas for binding not found") @@ -106,11 +110,12 @@ func (r *DeploymentReplicasSyncer) Reconcile(ctx context.Context, req controller return controllerruntime.Result{}, err } - // if it is not divided schedule type, no need to update replicas + // 2. if it is not divided schedule type, no need to update replicas if binding.Spec.Placement.ReplicaSchedulingType() != policyv1alpha1.ReplicaSchedulingTypeDivided { return controllerruntime.Result{}, nil } + // 3. get latest deployment if err := r.Client.Get(ctx, req.NamespacedName, deployment); err != nil { if apierrors.IsNotFound(err) { klog.Infof("no need to update deployment replicas for deployment not found") @@ -119,39 +124,18 @@ func (r *DeploymentReplicasSyncer) Reconcile(ctx context.Context, req controller return controllerruntime.Result{}, err } - // if replicas in spec already the same as in status, no need to update replicas + // 4. if replicas in spec already the same as in status, no need to update replicas if deployment.Spec.Replicas != nil && *deployment.Spec.Replicas == deployment.Status.Replicas { klog.Infof("replicas in spec field (%d) already equal to in status field (%d)", *deployment.Spec.Replicas, deployment.Status.Replicas) return controllerruntime.Result{}, nil } - // make sure the replicas change in deployment.spec can sync to binding.spec, otherwise retry - if deployment.Spec.Replicas == nil || *deployment.Spec.Replicas != binding.Spec.Replicas { - klog.V(4).Infof("wait until replicas of binding (%d) equal to replicas of deployment (%d)", - binding.Spec.Replicas, *deployment.Spec.Replicas) - return controllerruntime.Result{}, fmt.Errorf("retry to wait replicas change sync to binding") + // 5. judge the deployment modification in spec has taken effect and its status has been collected, otherwise retry. + if !isDeploymentStatusCollected(deployment, binding) { + return controllerruntime.Result{RequeueAfter: waitDeploymentStatusInterval}, nil } - // make sure the scheduler observed generation equal to generation in binding, otherwise retry - if binding.Generation != binding.Status.SchedulerObservedGeneration { - klog.V(4).Infof("wait until scheduler observed generation (%d) equal to generation in binding (%d)", - binding.Status.SchedulerObservedGeneration, binding.Generation) - return controllerruntime.Result{}, fmt.Errorf("retry to wait scheduler observed generation") - } - - if len(binding.Status.AggregatedStatus) != len(binding.Spec.Clusters) { - klog.V(4).Infof("wait until all clusters status collected, got: %d, expected: %d", - len(binding.Status.AggregatedStatus), len(binding.Spec.Clusters)) - return controllerruntime.Result{}, fmt.Errorf("retry to wait status in binding collected") - } - for _, status := range binding.Status.AggregatedStatus { - if status.Status == nil { - klog.V(4).Infof("wait until aggregated status of cluster %s collected", status.ClusterName) - return controllerruntime.Result{}, fmt.Errorf("retry to wait status in binding collected") - } - } - - // update replicas + // 6. update spec replicas with status replicas oldReplicas := *deployment.Spec.Replicas deployment.Spec.Replicas = &deployment.Status.Replicas if err := r.Client.Update(ctx, deployment); err != nil { @@ -159,8 +143,64 @@ func (r *DeploymentReplicasSyncer) Reconcile(ctx context.Context, req controller return controllerruntime.Result{}, err } - klog.Infof("successfully udpate deployment (%s/%s) replicas from %d to %d", deployment.Namespace, - deployment.Namespace, oldReplicas, deployment.Status.Replicas) + klog.Infof("successfully update deployment (%s/%s) replicas from %d to %d", deployment.Namespace, + deployment.Name, oldReplicas, deployment.Status.Replicas) return controllerruntime.Result{}, nil } + +// isDeploymentStatusCollected judge whether deployment modification in spec has taken effect and its status has been collected. +func isDeploymentStatusCollected(deployment *appsv1.Deployment, binding *workv1alpha2.ResourceBinding) bool { + // make sure the replicas change in deployment.spec can sync to binding.spec, otherwise retry + if deployment.Spec.Replicas == nil || *deployment.Spec.Replicas != binding.Spec.Replicas { + klog.V(4).Infof("wait until replicas of binding (%d) equal to replicas of deployment (%d)", + binding.Spec.Replicas, *deployment.Spec.Replicas) + return false + } + + // make sure the scheduler observed generation equal to generation in binding, otherwise retry + if binding.Generation != binding.Status.SchedulerObservedGeneration { + klog.V(4).Infof("wait until scheduler observed generation (%d) equal to generation in binding (%d)", + binding.Status.SchedulerObservedGeneration, binding.Generation) + return false + } + + // make sure the number of aggregated status in binding is as expected. + if len(binding.Status.AggregatedStatus) != len(binding.Spec.Clusters) { + klog.V(4).Infof("wait until all clusters status collected, got: %d, expected: %d", + len(binding.Status.AggregatedStatus), len(binding.Spec.Clusters)) + return false + } + + // make sure the status.replicas in binding has been collected from work to binding. + bindingStatusSumReplicas := int32(0) + for _, status := range binding.Status.AggregatedStatus { + if status.Status == nil { + klog.V(4).Infof("wait until aggregated status of cluster %s collected", status.ClusterName) + return false + } + itemStatus := &appsv1.DeploymentStatus{} + if err := json.Unmarshal(status.Status.Raw, itemStatus); err != nil { + klog.Errorf("unmarshal status.raw of cluster %s in binding failed: %+v", status.ClusterName, err) + return false + } + // if member cluster deployment is controlled by hpa, its status.replicas must > 0 (since hpa.minReplicas > 0), + // so, if its status replicas is 0, means the status haven't been collected from member cluster, and needs waiting. + if itemStatus.Replicas <= 0 { + klog.V(4).Infof("wait until aggregated status replicas of cluster %s collected", status.ClusterName) + return false + } + bindingStatusSumReplicas += itemStatus.Replicas + } + + // make sure the latest collected status.replicas is synced from binding to template. + // e.g: user set deployment.spec.replicas = 2, then sum replicas in binding.status becomes 2, however, if now + // deployment.status.replicas is still 1, we shouldn't update spec.replicas to 1 until status.replicas becomes 2. + if deployment.Status.Replicas != bindingStatusSumReplicas { + klog.V(4).Infof("wait until deployment status replicas (%d) equal to binding status replicas (%d)", + deployment.Status.Replicas, bindingStatusSumReplicas) + return false + } + + return true +} diff --git a/pkg/resourceinterpreter/default/native/reflectstatus.go b/pkg/resourceinterpreter/default/native/reflectstatus.go index b92266ed6..4f1520519 100644 --- a/pkg/resourceinterpreter/default/native/reflectstatus.go +++ b/pkg/resourceinterpreter/default/native/reflectstatus.go @@ -17,6 +17,7 @@ limitations under the License. package native import ( + "bytes" "fmt" appsv1 "k8s.io/api/apps/v1" @@ -74,7 +75,18 @@ func reflectDeploymentStatus(object *unstructured.Unstructured) (*runtime.RawExt AvailableReplicas: deploymentStatus.AvailableReplicas, UnavailableReplicas: deploymentStatus.UnavailableReplicas, } - return helper.BuildStatusRawExtension(grabStatus) + + grabStatusRaw, err := helper.BuildStatusRawExtension(grabStatus) + if err != nil { + return nil, err + } + + // if status is empty struct, it actually means status haven't been collected from member cluster. + if bytes.Equal(grabStatusRaw.Raw, []byte("{}")) { + return nil, nil + } + + return grabStatusRaw, nil } func reflectServiceStatus(object *unstructured.Unstructured) (*runtime.RawExtension, error) {