352 lines
12 KiB
Go
352 lines
12 KiB
Go
/*
|
|
Copyright 2022 The Kruise Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package workloads
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sort"
|
|
|
|
"github.com/openkruise/rollouts/api/v1alpha1"
|
|
"github.com/openkruise/rollouts/pkg/util"
|
|
utilclient "github.com/openkruise/rollouts/pkg/util/client"
|
|
apps "k8s.io/api/apps/v1"
|
|
v1 "k8s.io/api/core/v1"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
"k8s.io/apimachinery/pkg/runtime/schema"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/apimachinery/pkg/util/intstr"
|
|
"k8s.io/client-go/tools/record"
|
|
"k8s.io/klog/v2"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
|
|
)
|
|
|
|
// DeploymentsRolloutController is responsible for handling Deployment type of workloads
|
|
type DeploymentsRolloutController struct {
|
|
deploymentController
|
|
stable *apps.Deployment
|
|
canary *apps.Deployment
|
|
}
|
|
|
|
// NewDeploymentRolloutController creates a new Deployment rollout controller
|
|
func NewDeploymentRolloutController(cli client.Client, recorder record.EventRecorder, release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus, stableNamespacedName types.NamespacedName) *DeploymentsRolloutController {
|
|
return &DeploymentsRolloutController{
|
|
deploymentController: deploymentController{
|
|
workloadController: workloadController{
|
|
client: cli,
|
|
recorder: recorder,
|
|
release: release,
|
|
newStatus: newStatus,
|
|
},
|
|
stableNamespacedName: stableNamespacedName,
|
|
canaryNamespacedName: stableNamespacedName,
|
|
releaseKey: client.ObjectKeyFromObject(release),
|
|
},
|
|
}
|
|
}
|
|
|
|
// VerifyWorkload verifies that the workload is ready to execute release plan
|
|
func (c *DeploymentsRolloutController) VerifyWorkload() (bool, error) {
|
|
// claim the deployment is under our control, and create canary deployment if it needs.
|
|
// Do not move this function to Preparing phase, otherwise multi canary deployments
|
|
// will be repeatedly created due to informer cache latency.
|
|
if _, err := c.claimDeployment(c.stable, c.canary); err != nil {
|
|
return false, err
|
|
}
|
|
|
|
c.recorder.Event(c.release, v1.EventTypeNormal, "Verified", "ReleasePlan and the Deployment resource are verified")
|
|
return true, nil
|
|
}
|
|
|
|
// PrepareBeforeProgress makes sure that the Deployment is under our control
|
|
func (c *DeploymentsRolloutController) PrepareBeforeProgress() (bool, *int32, error) {
|
|
// the workload is verified, and we should record revision and replicas info before progressing
|
|
if err := c.recordDeploymentRevisionAndReplicas(); err != nil {
|
|
klog.Errorf("Failed to record deployment(%v) revision and replicas info, error: %v", c.stableNamespacedName, err)
|
|
return false, nil, err
|
|
}
|
|
|
|
c.recorder.Event(c.release, v1.EventTypeNormal, "Initialized", "Rollout resource are initialized")
|
|
return true, nil, nil
|
|
}
|
|
|
|
// UpgradeOneBatch calculates the number of pods we can upgrade once
|
|
// according to the release plan and then set the canary deployment replicas
|
|
func (c *DeploymentsRolloutController) UpgradeOneBatch() (bool, error) {
|
|
if err := c.fetchStableDeployment(); err != nil {
|
|
return false, err
|
|
}
|
|
if err := c.fetchCanaryDeployment(); err != nil {
|
|
return false, err
|
|
}
|
|
|
|
// canary replicas now we have at current state
|
|
currentCanaryReplicas := *c.canary.Spec.Replicas
|
|
|
|
// canary goal we should achieve
|
|
canaryGoal := c.calculateCurrentCanary(c.newStatus.ObservedWorkloadReplicas)
|
|
|
|
klog.V(3).InfoS("upgraded one batch, but no need to update replicas of canary Deployment",
|
|
"Deployment", client.ObjectKeyFromObject(c.canary),
|
|
"BatchRelease", c.releaseKey,
|
|
"current-batch", c.newStatus.CanaryStatus.CurrentBatch,
|
|
"canary-goal", canaryGoal,
|
|
"current-canary-replicas", currentCanaryReplicas,
|
|
"current-canary-status-replicas", c.canary.Status.UpdatedReplicas)
|
|
|
|
if err := c.patchDeploymentReplicas(c.canary, canaryGoal); err != nil {
|
|
return false, err
|
|
}
|
|
|
|
// patch current batch label to pods
|
|
patchDone, err := c.patchPodBatchLabel(canaryGoal)
|
|
if !patchDone || err != nil {
|
|
return false, err
|
|
}
|
|
|
|
c.recorder.Eventf(c.release, v1.EventTypeNormal, "Batch Rollout", "Finished submitting all upgrade quests for batch %d", c.newStatus.CanaryStatus.CurrentBatch)
|
|
return true, nil
|
|
}
|
|
|
|
// CheckOneBatchReady checks to see if the pods are all available according to the rollout plan
|
|
func (c *DeploymentsRolloutController) CheckOneBatchReady() (bool, error) {
|
|
if err := c.fetchStableDeployment(); err != nil {
|
|
return false, err
|
|
}
|
|
if err := c.fetchCanaryDeployment(); err != nil {
|
|
return false, err
|
|
}
|
|
|
|
// in case of workload status is Untrustworthy
|
|
if c.canary.Status.ObservedGeneration != c.canary.Generation {
|
|
return false, nil
|
|
}
|
|
|
|
// canary pods that have been created
|
|
canaryPodCount := c.canary.Status.Replicas
|
|
// canary pods that have been available
|
|
availableCanaryPodCount := c.canary.Status.AvailableReplicas
|
|
// canary goal that should have in current batch
|
|
canaryGoal := c.calculateCurrentCanary(c.newStatus.ObservedWorkloadReplicas)
|
|
// max unavailable of deployment
|
|
var maxUnavailable *intstr.IntOrString
|
|
if c.canary.Spec.Strategy.RollingUpdate != nil {
|
|
maxUnavailable = c.canary.Spec.Strategy.RollingUpdate.MaxUnavailable
|
|
}
|
|
|
|
var err error
|
|
var pods []*v1.Pod
|
|
// if rolloutID is not set, no need to list pods,
|
|
// because we cannot patch correct batch label to pod.
|
|
if c.release.Spec.ReleasePlan.RolloutID != "" {
|
|
pods, err = util.ListOwnedPods(c.client, c.canary)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
}
|
|
|
|
klog.InfoS("checking the batch releasing progress",
|
|
"BatchRelease", c.releaseKey,
|
|
"len(pods)", len(pods),
|
|
"canary-goal", canaryGoal,
|
|
"current-batch", c.newStatus.CanaryStatus.CurrentBatch,
|
|
"canary-available-pod-count", availableCanaryPodCount,
|
|
"stable-pod-status-replicas", c.stable.Status.Replicas)
|
|
|
|
if !isBatchReady(c.release, pods, maxUnavailable, canaryGoal, canaryGoal, canaryPodCount, availableCanaryPodCount) {
|
|
klog.Infof("BatchRelease(%v) batch is not ready yet, current batch=%d", c.releaseKey, c.newStatus.CanaryStatus.CurrentBatch)
|
|
return false, nil
|
|
}
|
|
|
|
klog.Infof("BatchRelease(%v) batch is ready, current batch=%d", c.releaseKey, c.newStatus.CanaryStatus.CurrentBatch)
|
|
return true, nil
|
|
}
|
|
|
|
// FinalizeProgress makes sure restore deployments and clean up some canary settings
|
|
func (c *DeploymentsRolloutController) FinalizeProgress(cleanup bool) (bool, error) {
|
|
if err := c.fetchStableDeployment(); client.IgnoreNotFound(err) != nil {
|
|
return false, err
|
|
}
|
|
|
|
// make the deployment ride out of our control, and clean up canary resources
|
|
succeed, err := c.releaseDeployment(c.stable, cleanup)
|
|
if !succeed || err != nil {
|
|
klog.Errorf("Failed to finalize deployment(%v), error: %v", c.stableNamespacedName, err)
|
|
return false, err
|
|
}
|
|
|
|
c.recorder.Eventf(c.release, v1.EventTypeNormal, "Finalized", "Finalized: cleanup=%v", cleanup)
|
|
return true, nil
|
|
}
|
|
|
|
// SyncWorkloadInfo return workloadInfo if workload info is changed during rollout
|
|
// TODO: abstract a WorkloadEventTypeJudge interface for these following `if` clauses
|
|
func (c *DeploymentsRolloutController) SyncWorkloadInfo() (WorkloadEventType, *util.WorkloadInfo, error) {
|
|
// ignore the sync if the release plan is deleted
|
|
if c.release.DeletionTimestamp != nil {
|
|
return IgnoreWorkloadEvent, nil, nil
|
|
}
|
|
|
|
var err error
|
|
err = c.fetchStableDeployment()
|
|
if err != nil {
|
|
if apierrors.IsNotFound(err) {
|
|
return WorkloadHasGone, nil, err
|
|
}
|
|
return "", nil, err
|
|
}
|
|
|
|
err = c.fetchCanaryDeployment()
|
|
if client.IgnoreNotFound(err) != nil {
|
|
return "", nil, err
|
|
}
|
|
|
|
workloadInfo := util.NewWorkloadInfo()
|
|
if c.canary != nil {
|
|
workloadInfo.Status = &util.WorkloadStatus{
|
|
UpdatedReplicas: c.canary.Status.Replicas,
|
|
UpdatedReadyReplicas: c.canary.Status.AvailableReplicas,
|
|
}
|
|
}
|
|
|
|
// in case of that the canary deployment is being deleted but still have the finalizer, it is out of our expectation
|
|
if c.canary != nil && c.canary.DeletionTimestamp != nil && controllerutil.ContainsFinalizer(c.canary, util.CanaryDeploymentFinalizer) {
|
|
return WorkloadUnHealthy, workloadInfo, nil
|
|
}
|
|
|
|
// in case of that the workload status is trustworthy
|
|
if c.stable.Status.ObservedGeneration != c.stable.Generation {
|
|
klog.Warningf("Deployment(%v) is still reconciling, waiting for it to complete, generation: %v, observed: %v",
|
|
c.stableNamespacedName, c.stable.Generation, c.stable.Status.ObservedGeneration)
|
|
return WorkloadStillReconciling, workloadInfo, nil
|
|
}
|
|
|
|
// in case of that the workload has been promoted
|
|
if !c.stable.Spec.Paused && c.stable.Status.UpdatedReplicas == c.stable.Status.Replicas {
|
|
return IgnoreWorkloadEvent, workloadInfo, nil
|
|
}
|
|
|
|
// in case of that the workload is scaling up/down
|
|
if *c.stable.Spec.Replicas != c.newStatus.ObservedWorkloadReplicas && c.newStatus.ObservedWorkloadReplicas != -1 {
|
|
workloadInfo.Replicas = c.stable.Spec.Replicas
|
|
klog.Warningf("Deployment(%v) replicas changed during releasing, should pause and wait for it to complete, replicas from: %v -> %v",
|
|
c.stableNamespacedName, c.newStatus.ObservedWorkloadReplicas, *c.stable.Spec.Replicas)
|
|
return WorkloadReplicasChanged, workloadInfo, nil
|
|
}
|
|
|
|
// in case of that the workload revision was changed
|
|
if hashRevision := util.ComputeHash(&c.stable.Spec.Template, nil); hashRevision != c.newStatus.UpdateRevision {
|
|
workloadInfo.Status.UpdateRevision = hashRevision
|
|
klog.Warningf("Deployment(%v) updateRevision changed during releasing", c.stableNamespacedName)
|
|
return WorkloadPodTemplateChanged, workloadInfo, nil
|
|
}
|
|
|
|
return IgnoreWorkloadEvent, workloadInfo, nil
|
|
}
|
|
|
|
/* ----------------------------------
|
|
The functions below are helper functions
|
|
------------------------------------- */
|
|
// fetchStableDeployment fetch stable deployment to c.stable
|
|
func (c *DeploymentsRolloutController) fetchStableDeployment() error {
|
|
if c.stable != nil {
|
|
return nil
|
|
}
|
|
|
|
stable := &apps.Deployment{}
|
|
if err := c.client.Get(context.TODO(), c.stableNamespacedName, stable); err != nil {
|
|
klog.Errorf("BatchRelease(%v) get stable deployment error: %v", c.releaseKey, err)
|
|
return err
|
|
}
|
|
c.stable = stable
|
|
return nil
|
|
}
|
|
|
|
// fetchCanaryDeployment fetch canary deployment to c.canary
|
|
func (c *DeploymentsRolloutController) fetchCanaryDeployment() error {
|
|
var err error
|
|
defer func() {
|
|
if err != nil {
|
|
klog.Errorf("BatchRelease(%v) get canary deployment error: %v", c.releaseKey, err)
|
|
}
|
|
}()
|
|
|
|
err = c.fetchStableDeployment()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ds, err := c.listCanaryDeployment(client.InNamespace(c.stable.Namespace), utilclient.DisableDeepCopy)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ds = util.FilterActiveDeployment(ds)
|
|
sort.Slice(ds, func(i, j int) bool {
|
|
return ds[i].CreationTimestamp.After(ds[j].CreationTimestamp.Time)
|
|
})
|
|
|
|
if len(ds) == 0 || !util.EqualIgnoreHash(&ds[0].Spec.Template, &c.stable.Spec.Template) {
|
|
err = apierrors.NewNotFound(schema.GroupResource{
|
|
Group: apps.SchemeGroupVersion.Group,
|
|
Resource: c.stable.Kind,
|
|
}, fmt.Sprintf("%v-canary", c.canaryNamespacedName.Name))
|
|
return err
|
|
}
|
|
|
|
c.canary = ds[0].DeepCopy()
|
|
return nil
|
|
}
|
|
|
|
// recordDeploymentRevisionAndReplicas records stableRevision, canaryRevision, workloadReplicas to BatchRelease.Status
|
|
func (c *DeploymentsRolloutController) recordDeploymentRevisionAndReplicas() error {
|
|
err := c.fetchStableDeployment()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
updateRevision := util.ComputeHash(&c.stable.Spec.Template, nil)
|
|
stableRevision, err := c.GetStablePodTemplateHash(c.stable)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c.newStatus.StableRevision = stableRevision
|
|
c.newStatus.UpdateRevision = updateRevision
|
|
c.newStatus.ObservedWorkloadReplicas = *c.stable.Spec.Replicas
|
|
return nil
|
|
}
|
|
|
|
func (c *DeploymentsRolloutController) patchPodBatchLabel(canaryGoal int32) (bool, error) {
|
|
rolloutID := c.release.Spec.ReleasePlan.RolloutID
|
|
// if rolloutID is not set, no need to list pods,
|
|
// because we cannot patch correct batch label to pod.
|
|
if rolloutID == "" || c.canary == nil {
|
|
return true, nil
|
|
}
|
|
|
|
pods, err := util.ListOwnedPods(c.client, c.canary)
|
|
if err != nil {
|
|
klog.Errorf("Failed to list pods for Deployment %v", c.stableNamespacedName)
|
|
return false, err
|
|
}
|
|
|
|
batchID := c.release.Status.CanaryStatus.CurrentBatch + 1
|
|
updateRevision := c.release.Status.UpdateRevision
|
|
return patchPodBatchLabel(c.client, pods, rolloutID, batchID, updateRevision, canaryGoal, c.releaseKey)
|
|
}
|