support bluegreen release: release logic

Signed-off-by: yunbo <yunbo10124scut@gmail.com>
This commit is contained in:
yunbo 2024-11-13 10:29:35 +08:00 committed by Zhen Zhang
parent 09e01cb95b
commit 3f66aae0ae
15 changed files with 1787 additions and 262 deletions

View File

@ -70,7 +70,7 @@ type FinalizingPolicyType string
const (
// WaitResumeFinalizingPolicyType will wait workload to be resumed, which means
// controller will be hold at Finalizing phase util all pods of workload is upgraded.
// controller will be hold at Finalizing phase until all pods of workload is upgraded.
// WaitResumeFinalizingPolicyType only works in canary-style BatchRelease controller.
WaitResumeFinalizingPolicyType FinalizingPolicyType = "WaitResume"
// ImmediateFinalizingPolicyType will not to wait workload to be resumed.

View File

@ -561,27 +561,22 @@ type FinalisingStepType string
//goland:noinspection GoUnusedConst
const (
// Route all traffic to stable or new version based on FinaliseReason (for bluegreen)
FinalisingStepTypeRouteAllTraffic FinalisingStepType = "RouteAllTraffic"
// Route all traffic to new version (for bluegreen)
FinalisingStepRouteTrafficToNew FinalisingStepType = "FinalisingStepRouteTrafficToNew"
// Restore the GatewayAPI/Ingress/Istio
FinalisingStepRouteTrafficToStable FinalisingStepType = "FinalisingStepRouteTrafficToStable"
// Restore the stable Service, i.e. remove corresponding selector
FinalisingStepTypeStableService FinalisingStepType = "RestoreStableService"
// Remove the canary Service
FinalisingStepTypeRemoveCanaryService FinalisingStepType = "RemoveCanaryService"
FinalisingStepRestoreStableService FinalisingStepType = "RestoreStableService"
// Remove the Canary Service
FinalisingStepRemoveCanaryService FinalisingStepType = "RemoveCanaryService"
// Patch Batch Release to scale down (exception: the canary Deployment will be
// scaled down in FinalisingStepTypeDeleteBR step)
// For Both BlueGreenStrategy and CanaryStrategy:
// set workload.pause=false, set workload.partition=0
FinalisingStepTypeBatchRelease FinalisingStepType = "PatchBatchRelease"
// Execute the FinalisingTrafficRouting function
FinalisingStepTypeTrafficRouting FinalisingStepType = "FinalisingTrafficRouting"
// Restore the GatewayAPI/Ingress/Istio
FinalisingStepTypeGateway FinalisingStepType = "RestoreGateway"
// Delete Canary Service
FinalisingStepTypeDeleteCanaryService FinalisingStepType = "DeleteCanaryService"
FinalisingStepResumeWorkload FinalisingStepType = "ResumeWorkload"
// Delete Batch Release
FinalisingStepTypeDeleteBR FinalisingStepType = "DeleteBatchRelease"
FinalisingStepReleaseWorkloadControl FinalisingStepType = "ReleaseWorkloadControl"
// All needed work done
FinalisingStepTypeEnd FinalisingStepType = "END"
)

View File

@ -0,0 +1,471 @@
/*
Copyright 2022 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package rollout
import (
"context"
"fmt"
"reflect"
"time"
"github.com/openkruise/rollouts/api/v1alpha1"
"github.com/openkruise/rollouts/api/v1beta1"
"github.com/openkruise/rollouts/pkg/trafficrouting"
"github.com/openkruise/rollouts/pkg/util"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
utilpointer "k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
)
type blueGreenReleaseManager struct {
client.Client
trafficRoutingManager *trafficrouting.Manager
recorder record.EventRecorder
}
func (m *blueGreenReleaseManager) runCanary(c *RolloutContext) error {
blueGreenStatus := c.NewStatus.BlueGreenStatus
if br, err := fetchBatchRelease(m.Client, c.Rollout.Namespace, c.Rollout.Name); err != nil && !errors.IsNotFound(err) {
klog.Errorf("rollout(%s/%s) fetch batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return err
} else if err == nil {
// This line will do something important:
// - sync status from br to Rollout: to better observability;
// - sync rollout-id from Rollout to br: to make BatchRelease
// relabels pods in the scene where only rollout-id is changed.
if err = m.syncBatchRelease(br, blueGreenStatus); err != nil {
klog.Errorf("rollout(%s/%s) sync batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return err
}
}
if blueGreenStatus.PodTemplateHash == "" {
blueGreenStatus.PodTemplateHash = c.Workload.PodTemplateHash
}
if m.doCanaryJump(c) {
klog.Infof("rollout(%s/%s) canary step jumped", c.Rollout.Namespace, c.Rollout.Name)
return nil
}
// When the first batch is trafficRouting rolling and the next steps are rolling release,
// We need to clean up the canary-related resources first and then rollout the rest of the batch.
currentStep := c.Rollout.Spec.Strategy.BlueGreen.Steps[blueGreenStatus.CurrentStepIndex-1]
if currentStep.Traffic == nil && len(currentStep.Matches) == 0 {
tr := newTrafficRoutingContext(c)
done, err := m.trafficRoutingManager.FinalisingTrafficRouting(tr)
blueGreenStatus.LastUpdateTime = tr.LastUpdateTime
if err != nil {
return err
} else if !done {
klog.Infof("rollout(%s/%s) cleaning up canary-related resources", c.Rollout.Namespace, c.Rollout.Name)
expectedTime := time.Now().Add(tr.RecheckDuration)
c.RecheckTime = &expectedTime
return nil
}
}
switch blueGreenStatus.CurrentStepState {
// before CanaryStepStateUpgrade, handle some special cases, to prevent traffic loss
case v1beta1.CanaryStepStateInit:
klog.Infof("rollout(%s/%s) run bluegreen strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1beta1.CanaryStepStateInit)
tr := newTrafficRoutingContext(c)
if currentStep.Traffic != nil || len(currentStep.Matches) > 0 {
//TODO - consider istio subsets
if blueGreenStatus.CurrentStepIndex == 1 {
klog.Infof("Before the first batch, rollout(%s/%s) patch stable Service", c.Rollout.Namespace, c.Rollout.Name)
retry, err := m.trafficRoutingManager.PatchStableService(tr)
if err != nil {
return err
} else if retry {
expectedTime := time.Now().Add(tr.RecheckDuration)
c.RecheckTime = &expectedTime
return nil
}
}
}
blueGreenStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
blueGreenStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
blueGreenStatus.CurrentStepIndex, v1beta1.CanaryStepStateInit, blueGreenStatus.CurrentStepState)
fallthrough
case v1beta1.CanaryStepStateUpgrade:
klog.Infof("rollout(%s/%s) run bluegreen strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1beta1.CanaryStepStateUpgrade)
done, err := m.doCanaryUpgrade(c)
if err != nil {
return err
} else if done {
blueGreenStatus.CurrentStepState = v1beta1.CanaryStepStateTrafficRouting
blueGreenStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
blueGreenStatus.CurrentStepIndex, v1beta1.CanaryStepStateUpgrade, blueGreenStatus.CurrentStepState)
}
case v1beta1.CanaryStepStateTrafficRouting:
klog.Infof("rollout(%s/%s) run bluegreen strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1beta1.CanaryStepStateTrafficRouting)
tr := newTrafficRoutingContext(c)
done, err := m.trafficRoutingManager.DoTrafficRouting(tr)
blueGreenStatus.LastUpdateTime = tr.LastUpdateTime
if err != nil {
return err
} else if done {
blueGreenStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
blueGreenStatus.CurrentStepState = v1beta1.CanaryStepStateMetricsAnalysis
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
blueGreenStatus.CurrentStepIndex, v1beta1.CanaryStepStateTrafficRouting, blueGreenStatus.CurrentStepState)
}
expectedTime := time.Now().Add(time.Duration(defaultGracePeriodSeconds) * time.Second)
c.RecheckTime = &expectedTime
case v1beta1.CanaryStepStateMetricsAnalysis:
klog.Infof("rollout(%s/%s) run bluegreen strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1beta1.CanaryStepStateMetricsAnalysis)
done, err := m.doCanaryMetricsAnalysis(c)
if err != nil {
return err
} else if done {
blueGreenStatus.CurrentStepState = v1beta1.CanaryStepStatePaused
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
blueGreenStatus.CurrentStepIndex, v1beta1.CanaryStepStateMetricsAnalysis, blueGreenStatus.CurrentStepState)
}
case v1beta1.CanaryStepStatePaused:
klog.Infof("rollout(%s/%s) run bluegreen strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1beta1.CanaryStepStatePaused)
done, err := m.doCanaryPaused(c)
if err != nil {
return err
} else if done {
blueGreenStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
blueGreenStatus.CurrentStepState = v1beta1.CanaryStepStateReady
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
blueGreenStatus.CurrentStepIndex, v1beta1.CanaryStepStatePaused, blueGreenStatus.CurrentStepState)
}
case v1beta1.CanaryStepStateReady:
klog.Infof("rollout(%s/%s) run bluegreen strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1beta1.CanaryStepStateReady)
blueGreenStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
// run next step
if len(c.Rollout.Spec.Strategy.BlueGreen.Steps) > int(blueGreenStatus.CurrentStepIndex) {
blueGreenStatus.CurrentStepIndex++
blueGreenStatus.NextStepIndex = util.NextBatchIndex(c.Rollout, blueGreenStatus.CurrentStepIndex)
blueGreenStatus.CurrentStepState = v1beta1.CanaryStepStateInit
klog.Infof("rollout(%s/%s) bluegreen step from(%d) -> to(%d)", c.Rollout.Namespace, c.Rollout.Name, blueGreenStatus.CurrentStepIndex-1, blueGreenStatus.CurrentStepIndex)
return nil
}
// completed
blueGreenStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s), run all steps", c.Rollout.Namespace, c.Rollout.Name,
blueGreenStatus.CurrentStepIndex, v1beta1.CanaryStepStateReady, blueGreenStatus.CurrentStepState)
fallthrough
// canary completed
case v1beta1.CanaryStepStateCompleted:
klog.Infof("rollout(%s/%s) run bluegreen strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1beta1.CanaryStepStateCompleted)
}
return nil
}
func (m *blueGreenReleaseManager) doCanaryUpgrade(c *RolloutContext) (bool, error) {
// verify whether batchRelease configuration is the latest
steps := len(c.Rollout.Spec.Strategy.BlueGreen.Steps)
blueGreenStatus := c.NewStatus.BlueGreenStatus
cond := util.GetRolloutCondition(*c.NewStatus, v1beta1.RolloutConditionProgressing)
cond.Message = fmt.Sprintf("Rollout is in step(%d/%d), and upgrade workload to new version", blueGreenStatus.CurrentStepIndex, steps)
c.NewStatus.Message = cond.Message
// run batch release to upgrade the workloads
done, br, err := runBatchRelease(m, c.Rollout, getRolloutID(c.Workload), blueGreenStatus.CurrentStepIndex, c.Workload.IsInRollback)
if err != nil {
return false, err
} else if !done {
return false, nil
}
if br.Status.ObservedReleasePlanHash != util.HashReleasePlanBatches(&br.Spec.ReleasePlan) ||
br.Generation != br.Status.ObservedGeneration {
klog.Infof("rollout(%s/%s) batchRelease status is inconsistent, and wait a moment", c.Rollout.Namespace, c.Rollout.Name)
return false, nil
}
// check whether batchRelease is ready(whether new pods is ready.)
if br.Status.CanaryStatus.CurrentBatchState != v1beta1.ReadyBatchState ||
br.Status.CanaryStatus.CurrentBatch+1 < blueGreenStatus.CurrentStepIndex {
klog.Infof("rollout(%s/%s) batchRelease status(%s) is not ready, and wait a moment", c.Rollout.Namespace, c.Rollout.Name, util.DumpJSON(br.Status))
return false, nil
}
m.recorder.Eventf(c.Rollout, corev1.EventTypeNormal, "Progressing", fmt.Sprintf("upgrade step(%d) canary pods with new versions done", blueGreenStatus.CurrentStepIndex))
klog.Infof("rollout(%s/%s) batch(%s) state(%s), and success",
c.Rollout.Namespace, c.Rollout.Name, util.DumpJSON(br.Status), br.Status.CanaryStatus.CurrentBatchState)
// set the latest PodTemplateHash to selector the latest pods.
blueGreenStatus.PodTemplateHash = c.Workload.PodTemplateHash
return true, nil
}
func (m *blueGreenReleaseManager) doCanaryMetricsAnalysis(c *RolloutContext) (bool, error) {
// todo
return true, nil
}
func (m *blueGreenReleaseManager) doCanaryPaused(c *RolloutContext) (bool, error) {
blueGreenStatus := c.NewStatus.BlueGreenStatus
currentStep := c.Rollout.Spec.Strategy.BlueGreen.Steps[blueGreenStatus.CurrentStepIndex-1]
steps := len(c.Rollout.Spec.Strategy.BlueGreen.Steps)
cond := util.GetRolloutCondition(*c.NewStatus, v1beta1.RolloutConditionProgressing)
// need manual confirmation
if currentStep.Pause.Duration == nil {
klog.Infof("rollout(%s/%s) don't set pause duration, and need manual confirmation", c.Rollout.Namespace, c.Rollout.Name)
cond.Message = fmt.Sprintf("Rollout is in step(%d/%d), and you need manually confirm to enter the next step", blueGreenStatus.CurrentStepIndex, steps)
c.NewStatus.Message = cond.Message
return false, nil
}
cond.Message = fmt.Sprintf("Rollout is in step(%d/%d), and wait duration(%d seconds) to enter the next step", blueGreenStatus.CurrentStepIndex, steps, *currentStep.Pause.Duration)
c.NewStatus.Message = cond.Message
// wait duration time, then go to next step
duration := time.Second * time.Duration(*currentStep.Pause.Duration)
expectedTime := blueGreenStatus.LastUpdateTime.Add(duration)
if expectedTime.Before(time.Now()) {
klog.Infof("rollout(%s/%s) canary step(%d) paused duration(%d seconds), and go to the next step",
c.Rollout.Namespace, c.Rollout.Name, blueGreenStatus.CurrentStepIndex, *currentStep.Pause.Duration)
return true, nil
}
c.RecheckTime = &expectedTime
return false, nil
}
func (m *blueGreenReleaseManager) doCanaryJump(c *RolloutContext) (jumped bool) {
bluegreenStatus := c.NewStatus.BlueGreenStatus
// since we forbid adding or removing steps, currentStepIndex should always be valid
currentStep := c.Rollout.Spec.Strategy.BlueGreen.Steps[bluegreenStatus.CurrentStepIndex-1]
// nextIndex=-1 means the release is done, nextIndex=0 is not used
if nextIndex := bluegreenStatus.NextStepIndex; nextIndex != util.NextBatchIndex(c.Rollout, bluegreenStatus.CurrentStepIndex) && nextIndex > 0 {
currentIndexBackup := bluegreenStatus.CurrentStepIndex
currentStepStateBackup := bluegreenStatus.CurrentStepState
// update the current and next stepIndex
bluegreenStatus.CurrentStepIndex = nextIndex
bluegreenStatus.NextStepIndex = util.NextBatchIndex(c.Rollout, nextIndex)
nextStep := c.Rollout.Spec.Strategy.BlueGreen.Steps[nextIndex-1]
// compare next step and current step to decide the state we should go
if reflect.DeepEqual(nextStep.Replicas, currentStep.Replicas) {
bluegreenStatus.CurrentStepState = v1beta1.CanaryStepStateTrafficRouting
} else {
bluegreenStatus.CurrentStepState = v1beta1.CanaryStepStateInit
}
bluegreenStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
klog.Infof("rollout(%s/%s) step(%d->%d) state from(%s -> %s)",
c.Rollout.Namespace, c.Rollout.Name,
currentIndexBackup, bluegreenStatus.CurrentStepIndex,
currentStepStateBackup, bluegreenStatus.CurrentStepState)
return true
}
return false
}
// cleanup after rollout is completed or finished
func (m *blueGreenReleaseManager) doCanaryFinalising(c *RolloutContext) (bool, error) {
blueGreenStatus := c.NewStatus.BlueGreenStatus
if blueGreenStatus == nil {
return true, nil
}
// rollout progressing complete, remove rollout progressing annotation in workload
err := removeRolloutProgressingAnnotation(m.Client, c)
if err != nil {
return false, err
}
tr := newTrafficRoutingContext(c)
// execute steps based on the predefined order for each reason
nextStep := nextBlueGreenTask(c.FinalizeReason, blueGreenStatus.FinalisingStep)
// if current step is empty, set it with the first step
// if current step is end, we just return
if len(blueGreenStatus.FinalisingStep) == 0 {
blueGreenStatus.FinalisingStep = nextStep
blueGreenStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
} else if blueGreenStatus.FinalisingStep == v1beta1.FinalisingStepTypeEnd {
klog.Infof("rollout(%s/%s) finalising process is already completed", c.Rollout.Namespace, c.Rollout.Name)
return true, nil
}
klog.Infof("rollout(%s/%s) Finalising Step is %s", c.Rollout.Namespace, c.Rollout.Name, blueGreenStatus.FinalisingStep)
var retry bool
// the order of steps is maitained by calculating thenextStep
switch blueGreenStatus.FinalisingStep {
// set workload.pause=false; set workload.partition=0
case v1beta1.FinalisingStepResumeWorkload:
retry, err = finalizingBatchRelease(m.Client, c)
// delete batchRelease
case v1beta1.FinalisingStepReleaseWorkloadControl:
retry, err = removeBatchRelease(m.Client, c)
// restore the gateway resources (ingress/gatewayAPI/Istio), that means
// only stable Service will accept the traffic
case v1beta1.FinalisingStepRouteTrafficToStable:
retry, err = m.trafficRoutingManager.RestoreGateway(tr)
// restore the stable service
case v1beta1.FinalisingStepRestoreStableService:
retry, err = m.trafficRoutingManager.RestoreStableService(tr)
// remove canary service
case v1beta1.FinalisingStepRemoveCanaryService:
retry, err = m.trafficRoutingManager.RemoveCanaryService(tr)
// route all traffic to new version
case v1beta1.FinalisingStepRouteTrafficToNew:
retry, err = m.trafficRoutingManager.RouteAllTrafficToNewVersion(tr)
default:
nextStep = nextBlueGreenTask(c.FinalizeReason, "")
klog.Warningf("unexpected finalising step, current step(%s), start from the first step(%s)", blueGreenStatus.FinalisingStep, nextStep)
blueGreenStatus.FinalisingStep = nextStep
return false, nil
}
if err != nil || retry {
return false, err
}
// current step is done, run the next step
blueGreenStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
blueGreenStatus.FinalisingStep = nextStep
if blueGreenStatus.FinalisingStep == v1beta1.FinalisingStepTypeEnd {
return true, nil
}
return false, nil
}
func (m *blueGreenReleaseManager) fetchClient() client.Client {
return m.Client
}
func (m *blueGreenReleaseManager) createBatchRelease(rollout *v1beta1.Rollout, rolloutID string, batch int32, isRollback bool) *v1beta1.BatchRelease {
var batches []v1beta1.ReleaseBatch
for _, step := range rollout.Spec.Strategy.BlueGreen.Steps {
batches = append(batches, v1beta1.ReleaseBatch{CanaryReplicas: *step.Replicas})
}
br := &v1beta1.BatchRelease{
ObjectMeta: metav1.ObjectMeta{
Namespace: rollout.Namespace,
Name: rollout.Name,
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(rollout, rolloutControllerKind)},
},
Spec: v1beta1.BatchReleaseSpec{
WorkloadRef: v1beta1.ObjectRef{
APIVersion: rollout.Spec.WorkloadRef.APIVersion,
Kind: rollout.Spec.WorkloadRef.Kind,
Name: rollout.Spec.WorkloadRef.Name,
},
ReleasePlan: v1beta1.ReleasePlan{
Batches: batches,
RolloutID: rolloutID,
BatchPartition: utilpointer.Int32Ptr(batch),
FailureThreshold: rollout.Spec.Strategy.BlueGreen.FailureThreshold,
RollingStyle: v1beta1.BlueGreenRollingStyle,
},
},
}
annotations := map[string]string{}
if isRollback {
annotations[v1alpha1.RollbackInBatchAnnotation] = rollout.Annotations[v1alpha1.RollbackInBatchAnnotation]
}
if len(annotations) > 0 {
br.Annotations = annotations
}
return br
}
// syncBatchRelease sync status of br to blueGreenStatus, and sync rollout-id of blueGreenStatus to br.
func (m *blueGreenReleaseManager) syncBatchRelease(br *v1beta1.BatchRelease, blueGreenStatus *v1beta1.BlueGreenStatus) error {
// sync from BatchRelease status to Rollout blueGreenStatus
blueGreenStatus.UpdatedReplicas = br.Status.CanaryStatus.UpdatedReplicas
blueGreenStatus.UpdatedReadyReplicas = br.Status.CanaryStatus.UpdatedReadyReplicas
// Do not remove this line currently, otherwise, users will be not able to judge whether the BatchRelease works
// in the scene where only rollout-id changed.
// TODO: optimize the logic to better understand
blueGreenStatus.Message = fmt.Sprintf("BatchRelease is at state %s, rollout-id %s, step %d",
br.Status.CanaryStatus.CurrentBatchState, br.Status.ObservedRolloutID, br.Status.CanaryStatus.CurrentBatch+1)
// sync rolloutId from blueGreenStatus to BatchRelease
if blueGreenStatus.ObservedRolloutID != br.Spec.ReleasePlan.RolloutID {
body := fmt.Sprintf(`{"spec":{"releasePlan":{"rolloutID":"%s"}}}`, blueGreenStatus.ObservedRolloutID)
return m.Patch(context.TODO(), br, client.RawPatch(types.MergePatchType, []byte(body)))
}
return nil
}
/*
- Rollback Scenario:
why the first step is to restore the gateway? (aka. route all traffic to stable version)
we cannot remove selector of the stable service firstly as canary does, because users are allowed to configure "0%" traffic
in bluegreen strategy. Consider the following example:
- replicas: 50% // step 1
traffic: 0%
if user is at step 1, and then attempts to rollback directly, Rollout should route all traffic to stable service
(keep unchanged actually). However, if we remove the selector of the stable service instead, we would inadvertently
route some traffic to the new version for a period, which is undesirable.
- Rollout Deletion and Disabling Scenario:
If Rollout is being deleted or disabled, it suggests users want to release the new version using workload built-in strategy,
such as rollingUpdate for Deployment, instead of blue-green or canary. And thus, we can simply remove
the label selector of the stable service, routing traffic to reach both stable and updated pods.
- Rollout success Scenario:
This indicates the rollout has completed its final batch and the user has confirmed to
transition fully to the new version. We can simply route all traffic to new version. Additionally, given that all
traffic is routed to the canary Service, it is safe to remove selector of stable Service, which additionally works
as a workaround for a bug caused by ingress-nginx controller (see https://github.com/kubernetes/ingress-nginx/issues/9635)
*/
func nextBlueGreenTask(reason string, currentTask v1beta1.FinalisingStepType) v1beta1.FinalisingStepType {
var taskSequence []v1beta1.FinalisingStepType
switch reason {
case v1beta1.FinaliseReasonSuccess: // success
taskSequence = []v1beta1.FinalisingStepType{
v1beta1.FinalisingStepRouteTrafficToNew,
v1beta1.FinalisingStepRestoreStableService,
v1beta1.FinalisingStepResumeWorkload,
v1beta1.FinalisingStepRouteTrafficToStable,
v1beta1.FinalisingStepRemoveCanaryService,
v1beta1.FinalisingStepReleaseWorkloadControl,
}
case v1beta1.FinaliseReasonRollback: // rollback
taskSequence = []v1beta1.FinalisingStepType{
v1beta1.FinalisingStepRouteTrafficToStable, // route all traffic to stable version
v1beta1.FinalisingStepResumeWorkload,
v1beta1.FinalisingStepRestoreStableService,
v1beta1.FinalisingStepRemoveCanaryService,
v1beta1.FinalisingStepReleaseWorkloadControl,
}
default: // others: disabled/deleting rollout
taskSequence = []v1beta1.FinalisingStepType{
v1beta1.FinalisingStepRestoreStableService,
v1beta1.FinalisingStepResumeWorkload, // scale up new, scale down old
v1beta1.FinalisingStepRouteTrafficToStable,
v1beta1.FinalisingStepRemoveCanaryService,
v1beta1.FinalisingStepReleaseWorkloadControl,
}
}
// if currentTask is empty, return first task
if len(currentTask) == 0 {
return taskSequence[0]
}
// find next task
for i := range taskSequence {
if currentTask == taskSequence[i] && i < len(taskSequence)-1 {
return taskSequence[i+1]
}
}
return v1beta1.FinalisingStepTypeEnd
}

View File

@ -0,0 +1,331 @@
/*
Copyright 2022 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package rollout
import (
"context"
"reflect"
"testing"
"github.com/openkruise/rollouts/api/v1alpha1"
"github.com/openkruise/rollouts/api/v1beta1"
"github.com/openkruise/rollouts/pkg/trafficrouting"
"github.com/openkruise/rollouts/pkg/util"
apps "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
netv1 "k8s.io/api/networking/v1"
// metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/tools/record"
utilpointer "k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)
func TestBlueGreenRunCanary(t *testing.T) {
cases := []struct {
name string
getObj func() ([]*apps.Deployment, []*apps.ReplicaSet)
getNetwork func() ([]*corev1.Service, []*netv1.Ingress)
getRollout func() (*v1beta1.Rollout, *v1beta1.BatchRelease)
expectStatus func() *v1beta1.RolloutStatus
expectBr func() *v1beta1.BatchRelease
}{
{
name: "run bluegreen upgrade1",
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
dep1 := deploymentDemo.DeepCopy()
rs1 := rsDemo.DeepCopy()
return []*apps.Deployment{dep1}, []*apps.ReplicaSet{rs1}
},
getNetwork: func() ([]*corev1.Service, []*netv1.Ingress) {
return []*corev1.Service{demoService.DeepCopy()}, []*netv1.Ingress{demoIngress.DeepCopy()}
},
getRollout: func() (*v1beta1.Rollout, *v1beta1.BatchRelease) {
obj := rolloutDemoBlueGreen.DeepCopy()
obj.Status.BlueGreenStatus.ObservedWorkloadGeneration = 2
obj.Status.BlueGreenStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
obj.Status.BlueGreenStatus.StableRevision = "pod-template-hash-v1"
obj.Status.BlueGreenStatus.UpdatedRevision = "6f8cc56547"
obj.Status.BlueGreenStatus.CurrentStepIndex = 1
obj.Status.BlueGreenStatus.NextStepIndex = 2
obj.Status.BlueGreenStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonInRolling
util.SetRolloutCondition(&obj.Status, *cond)
return obj, nil
},
expectStatus: func() *v1beta1.RolloutStatus {
s := rolloutDemoBlueGreen.Status.DeepCopy()
s.BlueGreenStatus.ObservedWorkloadGeneration = 2
s.BlueGreenStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
s.BlueGreenStatus.StableRevision = "pod-template-hash-v1"
s.BlueGreenStatus.UpdatedRevision = "6f8cc56547"
s.BlueGreenStatus.CurrentStepIndex = 1
s.BlueGreenStatus.NextStepIndex = 2
s.BlueGreenStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonInRolling
util.SetRolloutCondition(s, *cond)
return s
},
expectBr: func() *v1beta1.BatchRelease {
br := batchDemo.DeepCopy()
br.Spec.ReleasePlan.Batches = []v1beta1.ReleaseBatch{
{
CanaryReplicas: intstr.FromString("50%"),
},
{
CanaryReplicas: intstr.FromString("100%"),
},
{
CanaryReplicas: intstr.FromString("100%"),
},
{
CanaryReplicas: intstr.FromString("100%"),
},
}
br.Spec.ReleasePlan.BatchPartition = utilpointer.Int32(0)
br.Spec.ReleasePlan.RollingStyle = v1beta1.BlueGreenRollingStyle
return br
},
},
{
name: "run bluegreen traffic routing",
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
dep1 := deploymentDemo.DeepCopy()
rs1 := rsDemo.DeepCopy()
rs2 := rsDemo.DeepCopy()
rs2.Name = "echoserver-canary"
rs2.Labels["pod-template-hash"] = "pod-template-hash-v2"
rs2.Spec.Template.Spec.Containers[0].Image = "echoserver:v2"
return []*apps.Deployment{dep1}, []*apps.ReplicaSet{rs1, rs2}
},
getNetwork: func() ([]*corev1.Service, []*netv1.Ingress) {
return []*corev1.Service{demoService.DeepCopy()}, []*netv1.Ingress{demoIngress.DeepCopy()}
},
getRollout: func() (*v1beta1.Rollout, *v1beta1.BatchRelease) {
obj := rolloutDemoBlueGreen.DeepCopy()
obj.Status.BlueGreenStatus.ObservedWorkloadGeneration = 2
obj.Status.BlueGreenStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
obj.Status.BlueGreenStatus.StableRevision = "pod-template-hash-v1"
obj.Status.BlueGreenStatus.UpdatedRevision = "6f8cc56547"
obj.Status.BlueGreenStatus.CurrentStepIndex = 1
obj.Status.BlueGreenStatus.NextStepIndex = 2
obj.Status.BlueGreenStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonInRolling
util.SetRolloutCondition(&obj.Status, *cond)
br := batchDemo.DeepCopy()
br.Spec.ReleasePlan.Batches = []v1beta1.ReleaseBatch{
{
CanaryReplicas: intstr.FromString("50%"),
},
{
CanaryReplicas: intstr.FromString("100%"),
},
{
CanaryReplicas: intstr.FromString("100%"),
},
{
CanaryReplicas: intstr.FromString("100%"),
},
}
br.Spec.ReleasePlan.BatchPartition = utilpointer.Int32(0)
br.Spec.ReleasePlan.RollingStyle = v1beta1.BlueGreenRollingStyle
br.Status = v1beta1.BatchReleaseStatus{
ObservedGeneration: 1,
ObservedReleasePlanHash: util.HashReleasePlanBatches(&br.Spec.ReleasePlan),
CanaryStatus: v1beta1.BatchReleaseCanaryStatus{
CurrentBatchState: v1beta1.ReadyBatchState,
CurrentBatch: 0,
UpdatedReplicas: 1,
UpdatedReadyReplicas: 1,
},
}
return obj, br
},
expectStatus: func() *v1beta1.RolloutStatus {
s := rolloutDemoBlueGreen.Status.DeepCopy()
s.BlueGreenStatus.ObservedWorkloadGeneration = 2
s.BlueGreenStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
s.BlueGreenStatus.StableRevision = "pod-template-hash-v1"
s.BlueGreenStatus.UpdatedRevision = "6f8cc56547"
s.BlueGreenStatus.PodTemplateHash = "pod-template-hash-v2"
s.BlueGreenStatus.UpdatedReplicas = 1
s.BlueGreenStatus.UpdatedReadyReplicas = 1
s.BlueGreenStatus.CurrentStepIndex = 1
s.BlueGreenStatus.NextStepIndex = 2
s.BlueGreenStatus.CurrentStepState = v1beta1.CanaryStepStateTrafficRouting
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonInRolling
util.SetRolloutCondition(s, *cond)
return s
},
expectBr: func() *v1beta1.BatchRelease {
br := batchDemo.DeepCopy()
br.Spec.ReleasePlan.Batches = []v1beta1.ReleaseBatch{
{
CanaryReplicas: intstr.FromString("50%"),
},
{
CanaryReplicas: intstr.FromString("100%"),
},
{
CanaryReplicas: intstr.FromString("100%"),
},
{
CanaryReplicas: intstr.FromString("100%"),
},
}
br.Spec.ReleasePlan.BatchPartition = utilpointer.Int32(0)
br.Spec.ReleasePlan.RollingStyle = v1beta1.BlueGreenRollingStyle
return br
},
},
}
for _, cs := range cases {
t.Run(cs.name, func(t *testing.T) {
deps, rss := cs.getObj()
rollout, br := cs.getRollout()
fc := fake.NewClientBuilder().WithScheme(scheme).WithObjects(rollout).Build()
for _, rs := range rss {
_ = fc.Create(context.TODO(), rs)
}
for _, dep := range deps {
_ = fc.Create(context.TODO(), dep)
}
if br != nil {
_ = fc.Create(context.TODO(), br)
}
ss, in := cs.getNetwork()
_ = fc.Create(context.TODO(), ss[0])
_ = fc.Create(context.TODO(), in[0])
r := &RolloutReconciler{
Client: fc,
Scheme: scheme,
Recorder: record.NewFakeRecorder(10),
finder: util.NewControllerFinder(fc),
trafficRoutingManager: trafficrouting.NewTrafficRoutingManager(fc),
}
r.blueGreenManager = &blueGreenReleaseManager{
Client: fc,
trafficRoutingManager: r.trafficRoutingManager,
recorder: r.Recorder,
}
workload, err := r.finder.GetWorkloadForRef(rollout)
if err != nil {
t.Fatalf("GetWorkloadForRef failed: %s", err.Error())
}
c := &RolloutContext{
Rollout: rollout,
NewStatus: rollout.Status.DeepCopy(),
Workload: workload,
}
err = r.blueGreenManager.runCanary(c)
if err != nil {
t.Fatalf("reconcileRolloutProgressing failed: %s", err.Error())
}
checkBatchReleaseEqual(fc, t, client.ObjectKey{Name: rollout.Name}, cs.expectBr())
cStatus := c.NewStatus.DeepCopy()
cStatus.Message = ""
if cStatus.BlueGreenStatus != nil {
cStatus.BlueGreenStatus.LastUpdateTime = nil
cStatus.BlueGreenStatus.Message = ""
}
cond := util.GetRolloutCondition(*cStatus, v1beta1.RolloutConditionProgressing)
cond.Message = ""
util.SetRolloutCondition(cStatus, *cond)
expectStatus := cs.expectStatus()
if !reflect.DeepEqual(expectStatus, cStatus) {
t.Fatalf("expect(%s), but get(%s)", util.DumpJSON(cs.expectStatus()), util.DumpJSON(cStatus))
}
})
}
}
func TestBlueGreenRunCanaryPaused(t *testing.T) {
cases := []struct {
name string
getRollout func() *v1beta1.Rollout
expectStatus func() *v1beta1.RolloutStatus
}{
{
name: "paused, last step, 60% weight",
getRollout: func() *v1beta1.Rollout {
obj := rolloutDemoBlueGreen.DeepCopy()
obj.Status.BlueGreenStatus.ObservedWorkloadGeneration = 2
obj.Status.BlueGreenStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
obj.Status.BlueGreenStatus.StableRevision = "pod-template-hash-v1"
obj.Status.BlueGreenStatus.UpdatedRevision = "6f8cc56547"
obj.Status.BlueGreenStatus.CurrentStepIndex = 3
obj.Status.BlueGreenStatus.NextStepIndex = 4
obj.Status.BlueGreenStatus.PodTemplateHash = "pod-template-hash-v2"
obj.Status.BlueGreenStatus.CurrentStepState = v1beta1.CanaryStepStatePaused
return obj
},
expectStatus: func() *v1beta1.RolloutStatus {
obj := rolloutDemoBlueGreen.Status.DeepCopy()
obj.BlueGreenStatus.ObservedWorkloadGeneration = 2
obj.BlueGreenStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
obj.BlueGreenStatus.StableRevision = "pod-template-hash-v1"
obj.BlueGreenStatus.UpdatedRevision = "6f8cc56547"
obj.BlueGreenStatus.CurrentStepIndex = 3
obj.BlueGreenStatus.NextStepIndex = 4
obj.BlueGreenStatus.PodTemplateHash = "pod-template-hash-v2"
obj.BlueGreenStatus.CurrentStepState = v1beta1.CanaryStepStatePaused
return obj
},
},
}
for _, cs := range cases {
t.Run(cs.name, func(t *testing.T) {
rollout := cs.getRollout()
fc := fake.NewClientBuilder().WithScheme(scheme).WithObjects(rollout).Build()
r := &RolloutReconciler{
Client: fc,
Scheme: scheme,
Recorder: record.NewFakeRecorder(10),
finder: util.NewControllerFinder(fc),
trafficRoutingManager: trafficrouting.NewTrafficRoutingManager(fc),
}
r.blueGreenManager = &blueGreenReleaseManager{
Client: fc,
trafficRoutingManager: r.trafficRoutingManager,
recorder: r.Recorder,
}
c := &RolloutContext{
Rollout: rollout,
NewStatus: rollout.Status.DeepCopy(),
}
err := r.blueGreenManager.runCanary(c)
if err != nil {
t.Fatalf("reconcileRolloutProgressing failed: %s", err.Error())
}
cStatus := c.NewStatus.DeepCopy()
cStatus.BlueGreenStatus.LastUpdateTime = nil
cStatus.BlueGreenStatus.Message = ""
cStatus.Message = ""
if !reflect.DeepEqual(cs.expectStatus(), cStatus) {
t.Fatalf("expect(%s), but get(%s)", util.DumpJSON(cs.expectStatus()), util.DumpJSON(cStatus))
}
})
}
}

View File

@ -29,11 +29,9 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/retry"
"k8s.io/klog/v2"
utilpointer "k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
@ -47,7 +45,7 @@ type canaryReleaseManager struct {
func (m *canaryReleaseManager) runCanary(c *RolloutContext) error {
canaryStatus := c.NewStatus.CanaryStatus
if br, err := m.fetchBatchRelease(c.Rollout.Namespace, c.Rollout.Name); err != nil && !errors.IsNotFound(err) {
if br, err := fetchBatchRelease(m.Client, c.Rollout.Namespace, c.Rollout.Name); err != nil && !errors.IsNotFound(err) {
klog.Errorf("rollout(%s/%s) fetch batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return err
} else if err == nil {
@ -114,7 +112,7 @@ func (m *canaryReleaseManager) runCanary(c *RolloutContext) error {
*/
expectedReplicas, _ := intstr.GetScaledValueFromIntOrPercent(currentStep.Replicas, int(c.Workload.Replicas), true)
if expectedReplicas >= int(c.Workload.Replicas) && v1beta1.IsRealPartition(c.Rollout) {
klog.Infof("special case detected: rollout(%s/%s) restore stable Service", c.Rollout.Namespace, c.Rollout.Name)
klog.Infof("Bypass the ingress-nginx bug for partition-style, rollout(%s/%s) restore stable Service", c.Rollout.Namespace, c.Rollout.Name)
retry, err := m.trafficRoutingManager.RestoreStableService(tr)
if err != nil {
return err
@ -144,7 +142,7 @@ func (m *canaryReleaseManager) runCanary(c *RolloutContext) error {
*/
if canaryStatus.CurrentStepIndex == 1 {
if !tr.DisableGenerateCanaryService {
klog.Infof("special case detected: rollout(%s/%s) patch stable Service", c.Rollout.Namespace, c.Rollout.Name)
klog.Infof("Before the first batch, rollout(%s/%s) patch stable Service", c.Rollout.Namespace, c.Rollout.Name)
retry, err := m.trafficRoutingManager.PatchStableService(tr)
if err != nil {
return err
@ -194,13 +192,7 @@ func (m *canaryReleaseManager) runCanary(c *RolloutContext) error {
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
canaryStatus.CurrentStepIndex, v1beta1.CanaryStepStateTrafficRouting, canaryStatus.CurrentStepState)
}
// in two cases, we should wait the default grace period
// - a period after CanaryStepStateUpgrade is just done (https://github.com/openkruise/rollouts/pull/185)
// - a period after CanaryStepStateTrafficRouting is just done
if tr.RecheckDuration <= 0 {
tr.RecheckDuration = time.Duration(trafficrouting.GetGraceSeconds(c.Rollout.Spec.Strategy.GetTrafficRouting(), defaultGracePeriodSeconds)) * time.Second
}
expectedTime := time.Now().Add(tr.RecheckDuration)
expectedTime := time.Now().Add(time.Duration(defaultGracePeriodSeconds) * time.Second)
c.RecheckTime = &expectedTime
case v1beta1.CanaryStepStateMetricsAnalysis:
@ -259,7 +251,7 @@ func (m *canaryReleaseManager) doCanaryUpgrade(c *RolloutContext) (bool, error)
cond.Message = fmt.Sprintf("Rollout is in step(%d/%d), and upgrade workload to new version", canaryStatus.CurrentStepIndex, steps)
c.NewStatus.Message = cond.Message
// run batch release to upgrade the workloads
done, br, err := m.runBatchRelease(c.Rollout, getRolloutID(c.Workload), canaryStatus.CurrentStepIndex, c.Workload.IsInRollback)
done, br, err := runBatchRelease(m, c.Rollout, getRolloutID(c.Workload), canaryStatus.CurrentStepIndex, c.Workload.IsInRollback)
if err != nil {
return false, err
} else if !done {
@ -323,39 +315,27 @@ func (m *canaryReleaseManager) doCanaryPaused(c *RolloutContext) (bool, error) {
func (m *canaryReleaseManager) doCanaryJump(c *RolloutContext) (jumped bool) {
canaryStatus := c.NewStatus.CanaryStatus
nextIndex := canaryStatus.NextStepIndex
/*
we set the CurrentStepIndex same as NextStepIndex to prevent currentStepIndex from out of range
for example, if we had a rollout with 4 steps and CurrentStepIndex was 2
then, the user removed 3 steps from the plan, we can calculate NextStepIndex is 1 correctly,
but CurrentStepIndex remains 2, which could cause out of range.
*/
resetCurrentIndex := false
if int(canaryStatus.CurrentStepIndex) > len(c.Rollout.Spec.Strategy.Canary.Steps) {
canaryStatus.CurrentStepIndex = nextIndex
resetCurrentIndex = true
}
// since we forbid adding or removing steps, currentStepIndex should always be valid
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
if resetCurrentIndex || nextIndex != util.NextBatchIndex(c.Rollout, canaryStatus.CurrentStepIndex) && nextIndex > 0 {
// nextIndex=-1 means the release is done, nextIndex=0 is not used
if nextIndex := canaryStatus.NextStepIndex; nextIndex != util.NextBatchIndex(c.Rollout, canaryStatus.CurrentStepIndex) && nextIndex > 0 {
currentIndexBackup := canaryStatus.CurrentStepIndex
currentStepStateBackup := canaryStatus.CurrentStepState
// update the current and next stepIndex
canaryStatus.CurrentStepIndex = nextIndex
canaryStatus.NextStepIndex = util.NextBatchIndex(c.Rollout, nextIndex)
nextStep := c.Rollout.Spec.Strategy.Canary.Steps[nextIndex-1]
// if the Replicas between currentStep and nextStep is same, we can jump to
// the TrafficRouting step; otherwise, we should start from the Init step
if reflect.DeepEqual(nextStep.Replicas, currentStep.Replicas) && !resetCurrentIndex {
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
// compare next step and current step to decide the state we should go
if reflect.DeepEqual(nextStep.Replicas, currentStep.Replicas) {
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateTrafficRouting
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
canaryStatus.CurrentStepIndex, currentStepStateBackup, canaryStatus.CurrentStepState)
} else {
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateInit
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
canaryStatus.CurrentStepIndex, currentStepStateBackup, v1beta1.CanaryStepStateInit)
}
klog.Infof("rollout(%s/%s) canary step from(%d) -> to(%d)", c.Rollout.Namespace, c.Rollout.Name, currentIndexBackup, canaryStatus.CurrentStepIndex)
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
klog.Infof("rollout(%s/%s) step(%d->%d) state from(%s -> %s)",
c.Rollout.Namespace, c.Rollout.Name,
currentIndexBackup, canaryStatus.CurrentStepIndex,
currentStepStateBackup, canaryStatus.CurrentStepState)
return true
}
return false
@ -369,13 +349,13 @@ func (m *canaryReleaseManager) doCanaryFinalising(c *RolloutContext) (bool, erro
return true, nil
}
// rollout progressing complete, remove rollout progressing annotation in workload
err := m.removeRolloutProgressingAnnotation(c)
err := removeRolloutProgressingAnnotation(m.Client, c)
if err != nil {
return false, err
}
tr := newTrafficRoutingContext(c)
// execute steps based on the predefined order for each reason
nextStep := nextTask(c.FinalizeReason, canaryStatus.FinalisingStep)
nextStep := nextCanaryTask(c.FinalizeReason, canaryStatus.FinalisingStep)
// if current step is empty, set it with the first step
// if current step is end, we just return
if len(canaryStatus.FinalisingStep) == 0 {
@ -388,27 +368,27 @@ func (m *canaryReleaseManager) doCanaryFinalising(c *RolloutContext) (bool, erro
klog.Infof("rollout(%s/%s) Finalising Step is %s", c.Rollout.Namespace, c.Rollout.Name, canaryStatus.FinalisingStep)
var retry bool
// the order of steps is maitained by calculating thenextStep
// the order of steps is maitained by calculating the nextStep
switch canaryStatus.FinalisingStep {
// set workload.pause=false; set workload.partition=0
case v1beta1.FinalisingStepTypeBatchRelease:
retry, err = m.finalizingBatchRelease(c)
case v1beta1.FinalisingStepResumeWorkload:
retry, err = finalizingBatchRelease(m.Client, c)
// delete batchRelease
case v1beta1.FinalisingStepTypeDeleteBR:
retry, err = m.removeBatchRelease(c)
case v1beta1.FinalisingStepReleaseWorkloadControl:
retry, err = removeBatchRelease(m.Client, c)
// restore the gateway resources (ingress/gatewayAPI/Istio), that means
// only stable Service will accept the traffic
case v1beta1.FinalisingStepTypeGateway:
case v1beta1.FinalisingStepRouteTrafficToStable:
retry, err = m.trafficRoutingManager.RestoreGateway(tr)
// restore the stable service
case v1beta1.FinalisingStepTypeStableService:
case v1beta1.FinalisingStepRestoreStableService:
retry, err = m.trafficRoutingManager.RestoreStableService(tr)
// remove canary service
case v1beta1.FinalisingStepTypeRemoveCanaryService:
case v1beta1.FinalisingStepRemoveCanaryService:
retry, err = m.trafficRoutingManager.RemoveCanaryService(tr)
default:
nextStep = nextTask(c.FinalizeReason, "")
nextStep = nextCanaryTask(c.FinalizeReason, "")
klog.Warningf("unexpected finalising step, current step(%s), start from the first step(%s)", canaryStatus.FinalisingStep, nextStep)
canaryStatus.FinalisingStep = nextStep
return false, nil
@ -425,75 +405,11 @@ func (m *canaryReleaseManager) doCanaryFinalising(c *RolloutContext) (bool, erro
return false, nil
}
func (m *canaryReleaseManager) removeRolloutProgressingAnnotation(c *RolloutContext) error {
if c.Workload == nil {
return nil
}
if _, ok := c.Workload.Annotations[util.InRolloutProgressingAnnotation]; !ok {
return nil
}
workloadRef := c.Rollout.Spec.WorkloadRef
workloadGVK := schema.FromAPIVersionAndKind(workloadRef.APIVersion, workloadRef.Kind)
obj := util.GetEmptyWorkloadObject(workloadGVK)
obj.SetNamespace(c.Workload.Namespace)
obj.SetName(c.Workload.Name)
body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}}`, util.InRolloutProgressingAnnotation)
if err := m.Patch(context.TODO(), obj, client.RawPatch(types.MergePatchType, []byte(body))); err != nil {
klog.Errorf("rollout(%s/%s) patch workload(%s) failed: %s", c.Rollout.Namespace, c.Rollout.Name, c.Workload.Name, err.Error())
return err
}
klog.Infof("remove rollout(%s/%s) workload(%s) annotation[%s] success", c.Rollout.Namespace, c.Rollout.Name, c.Workload.Name, util.InRolloutProgressingAnnotation)
return nil
func (m *canaryReleaseManager) fetchClient() client.Client {
return m.Client
}
func (m *canaryReleaseManager) runBatchRelease(rollout *v1beta1.Rollout, rolloutId string, batch int32, isRollback bool) (bool, *v1beta1.BatchRelease, error) {
batch = batch - 1
br, err := m.fetchBatchRelease(rollout.Namespace, rollout.Name)
if errors.IsNotFound(err) {
// create new BatchRelease Crd
br = createBatchRelease(rollout, rolloutId, batch, isRollback)
if err = m.Create(context.TODO(), br); err != nil && !errors.IsAlreadyExists(err) {
klog.Errorf("rollout(%s/%s) create BatchRelease failed: %s", rollout.Namespace, rollout.Name, err.Error())
return false, nil, err
}
klog.Infof("rollout(%s/%s) create BatchRelease(%s) success", rollout.Namespace, rollout.Name, util.DumpJSON(br))
return false, br, nil
} else if err != nil {
klog.Errorf("rollout(%s/%s) fetch BatchRelease failed: %s", rollout.Namespace, rollout.Name, err.Error())
return false, nil, err
}
// check whether batchRelease configuration is the latest
newBr := createBatchRelease(rollout, rolloutId, batch, isRollback)
if reflect.DeepEqual(br.Spec, newBr.Spec) && reflect.DeepEqual(br.Annotations, newBr.Annotations) {
klog.Infof("rollout(%s/%s) do batchRelease batch(%d) success", rollout.Namespace, rollout.Name, batch+1)
return true, br, nil
}
// update batchRelease to the latest version
if err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
if err = m.Get(context.TODO(), client.ObjectKey{Namespace: newBr.Namespace, Name: newBr.Name}, br); err != nil {
klog.Errorf("error getting BatchRelease(%s/%s) from client", newBr.Namespace, newBr.Name)
return err
}
br.Spec = newBr.Spec
br.Annotations = newBr.Annotations
return m.Client.Update(context.TODO(), br)
}); err != nil {
klog.Errorf("rollout(%s/%s) update batchRelease failed: %s", rollout.Namespace, rollout.Name, err.Error())
return false, nil, err
}
klog.Infof("rollout(%s/%s) update batchRelease(%s) configuration to latest", rollout.Namespace, rollout.Name, util.DumpJSON(br))
return false, br, nil
}
func (m *canaryReleaseManager) fetchBatchRelease(ns, name string) (*v1beta1.BatchRelease, error) {
br := &v1beta1.BatchRelease{}
// batchRelease.name is equal related rollout.name
err := m.Get(context.TODO(), client.ObjectKey{Namespace: ns, Name: name}, br)
return br, err
}
func createBatchRelease(rollout *v1beta1.Rollout, rolloutID string, batch int32, isRollback bool) *v1beta1.BatchRelease {
func (m *canaryReleaseManager) createBatchRelease(rollout *v1beta1.Rollout, rolloutID string, batch int32, isRollback bool) *v1beta1.BatchRelease {
var batches []v1beta1.ReleaseBatch
for _, step := range rollout.Spec.Strategy.Canary.Steps {
batches = append(batches, v1beta1.ReleaseBatch{CanaryReplicas: *step.Replicas})
@ -531,83 +447,6 @@ func createBatchRelease(rollout *v1beta1.Rollout, rolloutID string, batch int32,
return br
}
// bool means if we need retry; if error is not nil, always retry
func (m *canaryReleaseManager) removeBatchRelease(c *RolloutContext) (bool, error) {
batch := &v1beta1.BatchRelease{}
err := m.Get(context.TODO(), client.ObjectKey{Namespace: c.Rollout.Namespace, Name: c.Rollout.Name}, batch)
if err != nil && errors.IsNotFound(err) {
return false, nil
} else if err != nil {
klog.Errorf("rollout(%s/%s) fetch BatchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name)
return true, err
}
if !batch.DeletionTimestamp.IsZero() {
klog.Infof("rollout(%s/%s) BatchRelease is terminating, and wait a moment", c.Rollout.Namespace, c.Rollout.Name)
return true, nil
}
//delete batchRelease
err = m.Delete(context.TODO(), batch)
if err != nil {
klog.Errorf("rollout(%s/%s) delete BatchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return true, err
}
klog.Infof("rollout(%s/%s) deleting BatchRelease, and wait a moment", c.Rollout.Namespace, c.Rollout.Name)
return true, nil
}
// bool means if we need retry; if error is not nil, always retry
func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool, error) {
br, err := m.fetchBatchRelease(c.Rollout.Namespace, c.Rollout.Name)
if err != nil {
if errors.IsNotFound(err) {
return false, nil
}
return true, err
}
waitReady := c.WaitReady
// The Completed phase means batchRelease controller has processed all it
// should process. If BatchRelease phase is completed, we can do nothing.
if br.Spec.ReleasePlan.BatchPartition == nil &&
br.Status.Phase == v1beta1.RolloutPhaseCompleted {
klog.Infof("rollout(%s/%s) finalizing batchRelease(%s) done", c.Rollout.Namespace, c.Rollout.Name, util.DumpJSON(br.Status))
return false, nil
}
// If BatchPartition is nil, BatchRelease will directly resume workload via:
// - * set workload Paused = false if it needs;
// - * set workload Partition = null if it needs.
if br.Spec.ReleasePlan.BatchPartition == nil {
// - If checkReady is true, finalizing policy must be "WaitResume";
// - If checkReady is false, finalizing policy must be NOT "WaitResume";
// Otherwise, we should correct it.
switch br.Spec.ReleasePlan.FinalizingPolicy {
case v1beta1.WaitResumeFinalizingPolicyType:
if waitReady { // no need to patch again
return true, nil
}
default:
if !waitReady { // no need to patch again
return true, nil
}
}
}
// Correct finalizing policy.
policy := v1beta1.ImmediateFinalizingPolicyType
if waitReady {
policy = v1beta1.WaitResumeFinalizingPolicyType
}
// Patch BatchPartition and FinalizingPolicy, BatchPartition always patch null here.
body := fmt.Sprintf(`{"spec":{"releasePlan":{"batchPartition":null,"finalizingPolicy":"%s"}}}`, policy)
if err = m.Patch(context.TODO(), br, client.RawPatch(types.MergePatchType, []byte(body))); err != nil {
return true, err
}
klog.Infof("rollout(%s/%s) patch batchRelease(%s) success", c.Rollout.Namespace, c.Rollout.Name, body)
return true, nil
}
// syncBatchRelease sync status of br to canaryStatus, and sync rollout-id of canaryStatus to br.
func (m *canaryReleaseManager) syncBatchRelease(br *v1beta1.BatchRelease, canaryStatus *v1beta1.CanaryStatus) error {
// sync from BatchRelease status to Rollout canaryStatus
@ -628,7 +467,7 @@ func (m *canaryReleaseManager) syncBatchRelease(br *v1beta1.BatchRelease, canary
}
// calculate next task
func nextTask(reason string, currentTask v1beta1.FinalisingStepType) v1beta1.FinalisingStepType {
func nextCanaryTask(reason string, currentTask v1beta1.FinalisingStepType) v1beta1.FinalisingStepType {
var taskSequence []v1beta1.FinalisingStepType
//REVIEW - should we consider more complex scenarios?
// like, user rollbacks the workload and disables the Rollout at the same time?
@ -638,19 +477,19 @@ func nextTask(reason string, currentTask v1beta1.FinalisingStepType) v1beta1.Fin
// in the first step
case v1beta1.FinaliseReasonRollback: // rollback
taskSequence = []v1beta1.FinalisingStepType{
v1beta1.FinalisingStepTypeGateway, // route all traffic to stable version
v1beta1.FinalisingStepTypeBatchRelease, // scale up old, scale down new
v1beta1.FinalisingStepTypeDeleteBR,
v1beta1.FinalisingStepTypeStableService,
v1beta1.FinalisingStepTypeRemoveCanaryService,
v1beta1.FinalisingStepRouteTrafficToStable, // route all traffic to stable version
v1beta1.FinalisingStepResumeWorkload, // scale up old, scale down new
v1beta1.FinalisingStepReleaseWorkloadControl,
v1beta1.FinalisingStepRestoreStableService,
v1beta1.FinalisingStepRemoveCanaryService,
}
default: // others: success/disabled/deleting rollout
taskSequence = []v1beta1.FinalisingStepType{
v1beta1.FinalisingStepTypeStableService,
v1beta1.FinalisingStepTypeGateway,
v1beta1.FinalisingStepTypeRemoveCanaryService,
v1beta1.FinalisingStepTypeBatchRelease, // scale up new, scale down old
v1beta1.FinalisingStepTypeDeleteBR,
v1beta1.FinalisingStepRestoreStableService,
v1beta1.FinalisingStepRouteTrafficToStable,
v1beta1.FinalisingStepRemoveCanaryService,
v1beta1.FinalisingStepResumeWorkload, // scale up new, scale down old
v1beta1.FinalisingStepReleaseWorkloadControl,
}
}
// if currentTask is empty, return first task

View File

@ -66,6 +66,7 @@ type RolloutReconciler struct {
finder *util.ControllerFinder
trafficRoutingManager *trafficrouting.Manager
canaryManager *canaryReleaseManager
blueGreenManager *blueGreenReleaseManager
}
//+kubebuilder:rbac:groups=rollouts.kruise.io,resources=rollouts,verbs=get;list;watch;create;update;patch;delete
@ -198,5 +199,10 @@ func (r *RolloutReconciler) SetupWithManager(mgr ctrl.Manager) error {
trafficRoutingManager: r.trafficRoutingManager,
recorder: r.Recorder,
}
r.blueGreenManager = &blueGreenReleaseManager{
Client: mgr.GetClient(),
trafficRoutingManager: r.trafficRoutingManager,
recorder: r.Recorder,
}
return nil
}

View File

@ -106,6 +106,73 @@ var (
},
},
}
rolloutDemoBlueGreen = &v1beta1.Rollout{
ObjectMeta: metav1.ObjectMeta{
Name: "rollout-demo",
Labels: map[string]string{},
Annotations: map[string]string{
util.RolloutHashAnnotation: "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd",
},
},
Spec: v1beta1.RolloutSpec{
WorkloadRef: v1beta1.ObjectRef{
APIVersion: "apps/v1",
Kind: "Deployment",
Name: "echoserver",
},
Strategy: v1beta1.RolloutStrategy{
BlueGreen: &v1beta1.BlueGreenStrategy{
Steps: []v1beta1.CanaryStep{
{
TrafficRoutingStrategy: v1beta1.TrafficRoutingStrategy{
Traffic: utilpointer.String("0%"),
},
Replicas: &intstr.IntOrString{StrVal: "50%", Type: intstr.String},
},
{
TrafficRoutingStrategy: v1beta1.TrafficRoutingStrategy{
Traffic: utilpointer.String("0%"),
},
Replicas: &intstr.IntOrString{StrVal: "100%", Type: intstr.String},
},
{
TrafficRoutingStrategy: v1beta1.TrafficRoutingStrategy{
Traffic: utilpointer.String("50%"),
},
Replicas: &intstr.IntOrString{StrVal: "100%", Type: intstr.String},
},
{
TrafficRoutingStrategy: v1beta1.TrafficRoutingStrategy{
Traffic: utilpointer.String("100%"),
},
Replicas: &intstr.IntOrString{StrVal: "100%", Type: intstr.String},
},
},
TrafficRoutings: []v1beta1.TrafficRoutingRef{
{
Service: "echoserver",
Ingress: &v1beta1.IngressTrafficRouting{
Name: "echoserver",
},
GracePeriodSeconds: 0, // To facilitate testing, don't wait after traffic routing operation
},
},
},
},
},
Status: v1beta1.RolloutStatus{
Phase: v1beta1.RolloutPhaseProgressing,
BlueGreenStatus: &v1beta1.BlueGreenStatus{},
Conditions: []v1beta1.RolloutCondition{
{
Type: v1beta1.RolloutConditionProgressing,
Reason: v1alpha1.ProgressingReasonInitializing,
Status: corev1.ConditionTrue,
},
},
},
}
maxUnavailable = intstr.FromString("20%")
deploymentDemo = &apps.Deployment{
TypeMeta: metav1.TypeMeta{

View File

@ -378,8 +378,7 @@ func (r *RolloutReconciler) getReleaseManager(rollout *v1beta1.Rollout) (Release
if rollout.Spec.Strategy.IsCanaryStragegy() {
return r.canaryManager, nil
} else if rollout.Spec.Strategy.IsBlueGreenRelease() {
// placeholder for upcoming PR
// return r.blueGreenManager, nil
return r.blueGreenManager, nil
}
return nil, fmt.Errorf("unknown rolling style: %s, and thus cannot call corresponding release manager", rollout.Spec.Strategy.GetRollingStyle())
}
@ -420,7 +419,7 @@ func (r *RolloutReconciler) doProgressingReset(c *RolloutContext) (bool, error)
}
// if no trafficRouting exists, simply remove batchRelease
if !c.Rollout.Spec.Strategy.HasTrafficRoutings() {
retry, err := releaseManager.removeBatchRelease(c)
retry, err := removeBatchRelease(releaseManager.fetchClient(), c)
if err != nil {
klog.Errorf("rollout(%s/%s) DoFinalising batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return false, err
@ -440,11 +439,11 @@ func (r *RolloutReconciler) doProgressingReset(c *RolloutContext) (bool, error)
switch subStatus.FinalisingStep {
default:
// start from FinalisingStepTypeGateway
subStatus.FinalisingStep = v1beta1.FinalisingStepTypeGateway
subStatus.FinalisingStep = v1beta1.FinalisingStepRouteTrafficToStable
fallthrough
// firstly, restore the gateway resources (ingress/gatewayAPI/Istio), that means
// only stable Service will accept the traffic
case v1beta1.FinalisingStepTypeGateway:
case v1beta1.FinalisingStepRouteTrafficToStable:
retry, err := r.trafficRoutingManager.RestoreGateway(tr)
if err != nil || retry {
subStatus.LastUpdateTime = tr.LastUpdateTime
@ -452,13 +451,13 @@ func (r *RolloutReconciler) doProgressingReset(c *RolloutContext) (bool, error)
}
klog.Infof("rollout(%s/%s) in step (%s), and success", c.Rollout.Namespace, c.Rollout.Name, subStatus.FinalisingStep)
subStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
subStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteBR
subStatus.FinalisingStep = v1beta1.FinalisingStepReleaseWorkloadControl
fallthrough
// secondly, remove the batchRelease. For canary release, it means the immediate deletion of
// canary deployment, for other release, the v2 pods won't be deleted immediately
// in both cases, only the stable pods (v1) accept the traffic
case v1beta1.FinalisingStepTypeDeleteBR:
retry, err := releaseManager.removeBatchRelease(c)
case v1beta1.FinalisingStepReleaseWorkloadControl:
retry, err := removeBatchRelease(releaseManager.fetchClient(), c)
if err != nil {
klog.Errorf("rollout(%s/%s) Finalize batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return false, err
@ -467,7 +466,7 @@ func (r *RolloutReconciler) doProgressingReset(c *RolloutContext) (bool, error)
}
klog.Infof("rollout(%s/%s) in step (%s), and success", c.Rollout.Namespace, c.Rollout.Name, subStatus.FinalisingStep)
subStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
subStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteCanaryService
subStatus.FinalisingStep = v1beta1.FinalisingStepRemoveCanaryService
fallthrough
// finally, remove the canary service. This step can swap with the last step.
/*
@ -478,7 +477,7 @@ func (r *RolloutReconciler) doProgressingReset(c *RolloutContext) (bool, error)
stable service selector, then the traffic will route to both v1 and v2 before executing the
first step of v3 release.
*/
case v1beta1.FinalisingStepTypeDeleteCanaryService:
case v1beta1.FinalisingStepRemoveCanaryService:
// ignore the grace period because it is the last step
_, err := r.trafficRoutingManager.RemoveCanaryService(tr)
if err != nil {
@ -495,7 +494,7 @@ func (r *RolloutReconciler) recalculateCanaryStep(c *RolloutContext) (int32, err
if err != nil {
return 0, err
}
batch, err := releaseManager.fetchBatchRelease(c.Rollout.Namespace, c.Rollout.Name)
batch, err := fetchBatchRelease(releaseManager.fetchClient(), c.Rollout.Namespace, c.Rollout.Name)
if errors.IsNotFound(err) {
return 1, nil
} else if err != nil {
@ -506,7 +505,12 @@ func (r *RolloutReconciler) recalculateCanaryStep(c *RolloutContext) (int32, err
if c.NewStatus != nil {
currentIndex = c.NewStatus.GetSubStatus().CurrentStepIndex - 1
}
steps := append([]int{}, int(currentIndex))
steps := make([]int, 0)
// currentIndex may greater than len(c.Rollout.Spec.Strategy.GetSteps()) if user changed the release plan
// currentIndex should never be less than 0 theoricaly unless user changed it intentionally
if ci := int(currentIndex); ci >= 0 && ci < len(c.Rollout.Spec.Strategy.GetSteps()) {
steps = append(steps, ci)
}
// we don't distinguish between the changes in Replicas and Traffic
// Whatever the change is, we recalculate the step.
// we put the current step index first for retrieval, so that if Traffic is the only change,

View File

@ -289,7 +289,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeStableService
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepRestoreStableService
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
@ -334,7 +334,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
// given that the selector of stable Service is removed
// we will go on it the next step, i.e. patch restoreGateway
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeStableService
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepRestoreStableService
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
@ -358,7 +358,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeGateway
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepRouteTrafficToStable
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
@ -403,7 +403,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
// because the batchRelease hasn't completed (ie. br.Status.Phase is not completed),
// it will take more than one reconciles to go on to the next step
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeBatchRelease
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepResumeWorkload
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
@ -427,7 +427,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeBatchRelease
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepResumeWorkload
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
@ -471,7 +471,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
// the batchRelease has completed (ie. br.Status.Phase is completed),
// we expect the finalizing step to be next step, i.e. deleteBatchRelease
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeBatchRelease
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepResumeWorkload
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
@ -494,7 +494,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteBR
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepReleaseWorkloadControl
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
cond2 := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond2.Reason = v1alpha1.ProgressingReasonFinalising
@ -534,7 +534,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
// deleteBatchRelease is the last step, and it won't wait a grace time
// after this step, this release should be succeeded
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteBR
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepReleaseWorkloadControl
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
@ -740,7 +740,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
s.CanaryStatus.CurrentStepIndex = 3
s.CanaryStatus.CanaryReplicas = 5
s.CanaryStatus.CanaryReadyReplicas = 3
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeGateway
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepRouteTrafficToStable
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
@ -790,7 +790,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
obj.Status.CanaryStatus.CanaryReadyReplicas = 3
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteBR
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepReleaseWorkloadControl
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonInRolling
util.SetRolloutCondition(&obj.Status, *cond)

View File

@ -17,7 +17,17 @@ limitations under the License.
package rollout
import (
"context"
"fmt"
"reflect"
"github.com/openkruise/rollouts/api/v1beta1"
"github.com/openkruise/rollouts/pkg/util"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client"
)
type ReleaseManager interface {
@ -27,8 +37,147 @@ type ReleaseManager interface {
doCanaryJump(c *RolloutContext) bool
// called when user accomplishes a release / does a rollback, or disables/removes the Rollout Resource
doCanaryFinalising(c *RolloutContext) (bool, error)
// fetch the BatchRelease object
fetchBatchRelease(ns, name string) (*v1beta1.BatchRelease, error)
// remove the BatchRelease object
removeBatchRelease(c *RolloutContext) (bool, error)
// create btach release
createBatchRelease(rollout *v1beta1.Rollout, rolloutID string, batch int32, isRollback bool) *v1beta1.BatchRelease
// retrun client
fetchClient() client.Client
}
func fetchBatchRelease(cli client.Client, ns, name string) (*v1beta1.BatchRelease, error) {
br := &v1beta1.BatchRelease{}
// batchRelease.name is equal related rollout.name
err := cli.Get(context.TODO(), client.ObjectKey{Namespace: ns, Name: name}, br)
return br, err
}
func removeRolloutProgressingAnnotation(cli client.Client, c *RolloutContext) error {
if c.Workload == nil {
return nil
}
if _, ok := c.Workload.Annotations[util.InRolloutProgressingAnnotation]; !ok {
return nil
}
workloadRef := c.Rollout.Spec.WorkloadRef
workloadGVK := schema.FromAPIVersionAndKind(workloadRef.APIVersion, workloadRef.Kind)
obj := util.GetEmptyWorkloadObject(workloadGVK)
obj.SetNamespace(c.Workload.Namespace)
obj.SetName(c.Workload.Name)
body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}}`, util.InRolloutProgressingAnnotation)
if err := cli.Patch(context.TODO(), obj, client.RawPatch(types.MergePatchType, []byte(body))); err != nil {
klog.Errorf("rollout(%s/%s) patch workload(%s) failed: %s", c.Rollout.Namespace, c.Rollout.Name, c.Workload.Name, err.Error())
return err
}
klog.Infof("remove rollout(%s/%s) workload(%s) annotation[%s] success", c.Rollout.Namespace, c.Rollout.Name, c.Workload.Name, util.InRolloutProgressingAnnotation)
return nil
}
// bool means if we need retry; if error is not nil, always retry
func removeBatchRelease(cli client.Client, c *RolloutContext) (bool, error) {
batch := &v1beta1.BatchRelease{}
err := cli.Get(context.TODO(), client.ObjectKey{Namespace: c.Rollout.Namespace, Name: c.Rollout.Name}, batch)
if err != nil && errors.IsNotFound(err) {
return false, nil
} else if err != nil {
klog.Errorf("rollout(%s/%s) fetch BatchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name)
return true, err
}
if !batch.DeletionTimestamp.IsZero() {
klog.Infof("rollout(%s/%s) BatchRelease is terminating, and wait a moment", c.Rollout.Namespace, c.Rollout.Name)
return true, nil
}
//delete batchRelease
err = cli.Delete(context.TODO(), batch)
if err != nil {
klog.Errorf("rollout(%s/%s) delete BatchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return true, err
}
klog.Infof("rollout(%s/%s) deleting BatchRelease, and wait a moment", c.Rollout.Namespace, c.Rollout.Name)
return true, nil
}
// bool means if we need retry; if error is not nil, always retry
/*
what does c.waitReady do:
c.waitReady is true
-> finalizingPolicy of batchRelease should be waitResume.
-> br.Status.Phase will be RolloutPhaseCompleted until all pods of workload is upgraded.
-> and then, this function return true.
and vice versa.
Note, c.waitReady field is true only when the finalizing reason is FinaliseReasonSuccess.
And, finalizingPolicy field is respected only when it is canary-style.
*/
func finalizingBatchRelease(cli client.Client, c *RolloutContext) (bool, error) {
br, err := fetchBatchRelease(cli, c.Rollout.Namespace, c.Rollout.Name)
if err != nil {
if errors.IsNotFound(err) {
return false, nil
}
return true, err
}
// The Completed phase means batchRelease controller has processed all it
// should process. If BatchRelease phase is completed, we can do nothing.
if br.Spec.ReleasePlan.BatchPartition == nil &&
br.Status.Phase == v1beta1.RolloutPhaseCompleted {
klog.Infof("rollout(%s/%s) finalizing batchRelease(%s) done", c.Rollout.Namespace, c.Rollout.Name, util.DumpJSON(br.Status))
return false, nil
}
// If BatchPartition is already nil, and waitReady is also satisfied, do nothing
if br.Spec.ReleasePlan.BatchPartition == nil {
// if waitReady is true, and currentPolicy is already waitResume, do nothing
// if waitReady is false, and currentPolicy is already immediate, do nothing
if currentPolicy := br.Spec.ReleasePlan.FinalizingPolicy; (currentPolicy == v1beta1.WaitResumeFinalizingPolicyType) == c.WaitReady {
return true, nil
}
}
// Correct finalizing policy.
policy := v1beta1.ImmediateFinalizingPolicyType
if c.WaitReady {
policy = v1beta1.WaitResumeFinalizingPolicyType
}
// Patch BatchPartition and FinalizingPolicy, BatchPartition always patch null here.
body := fmt.Sprintf(`{"spec":{"releasePlan":{"batchPartition":null,"finalizingPolicy":"%s"}}}`, policy)
if err = cli.Patch(context.TODO(), br, client.RawPatch(types.MergePatchType, []byte(body))); err != nil {
return true, err
}
klog.Infof("rollout(%s/%s) patch batchRelease(%s) success", c.Rollout.Namespace, c.Rollout.Name, body)
return true, nil
}
func runBatchRelease(m ReleaseManager, rollout *v1beta1.Rollout, rolloutId string, batch int32, isRollback bool) (bool, *v1beta1.BatchRelease, error) {
cli := m.fetchClient()
batch = batch - 1
br, err := fetchBatchRelease(cli, rollout.Namespace, rollout.Name)
if errors.IsNotFound(err) {
// create new BatchRelease Crd
br = m.createBatchRelease(rollout, rolloutId, batch, isRollback)
if err = cli.Create(context.TODO(), br); err != nil && !errors.IsAlreadyExists(err) {
klog.Errorf("rollout(%s/%s) create BatchRelease failed: %s", rollout.Namespace, rollout.Name, err.Error())
return false, nil, err
}
klog.Infof("rollout(%s/%s) create BatchRelease(%s) success", rollout.Namespace, rollout.Name, util.DumpJSON(br))
return false, br, nil
} else if err != nil {
klog.Errorf("rollout(%s/%s) fetch BatchRelease failed: %s", rollout.Namespace, rollout.Name, err.Error())
return false, nil, err
}
// check whether batchRelease configuration is the latest
newBr := m.createBatchRelease(rollout, rolloutId, batch, isRollback)
if reflect.DeepEqual(br.Spec, newBr.Spec) && reflect.DeepEqual(br.Annotations, newBr.Annotations) {
klog.Infof("rollout(%s/%s) do batchRelease batch(%d) success", rollout.Namespace, rollout.Name, batch+1)
return true, br, nil
}
// update batchRelease to the latest version
br.Spec = newBr.Spec
br.Annotations = newBr.Annotations
if err := cli.Update(context.TODO(), br); err != nil {
klog.Errorf("rollout(%s/%s) update batchRelease failed: %s, requeue to retry", rollout.Namespace, rollout.Name, err.Error())
return false, nil, err
}
klog.Infof("rollout(%s/%s) update batchRelease(%s) configuration to latest", rollout.Namespace, rollout.Name, util.DumpJSON(br))
return false, br, nil
}

View File

@ -34,6 +34,7 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"
"k8s.io/utils/integer"
utilpointer "k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
)
@ -223,6 +224,41 @@ func (m *Manager) FinalisingTrafficRouting(c *TrafficRoutingContext) (bool, erro
return true, nil
}
// returns:
// - if error is not nil, usually we need to retry later. Only if error is nil, we consider the bool.
// - The bool value indicates whether retry is needed. If true, it usually means
// gateway resources have been updated and we need to wait for `graceSeconds`.
//
// only if error is nil AND retry is false, this calling can be considered as completed
func (m *Manager) RouteAllTrafficToNewVersion(c *TrafficRoutingContext) (bool, error) {
klog.InfoS("route all traffic to new version", "rollout", c.Key)
if len(c.ObjectRef) == 0 {
return false, nil
}
// build up the network provider
stableService := c.ObjectRef[0].Service
cServiceName := getCanaryServiceName(stableService, c.OnlyTrafficRouting, c.DisableGenerateCanaryService)
trController, err := newNetworkProvider(m.Client, c, stableService, cServiceName)
if err != nil {
klog.Errorf("%s newTrafficRoutingController failed: %s", c.Key, err.Error())
return false, err
}
graceSeconds := GetGraceSeconds(c.ObjectRef, defaultGracePeriodSeconds)
retry, remaining, err := grace.RunWithGraceSeconds(string(c.OwnerRef.UID), "updateRoute", graceSeconds, func() (bool, error) {
// route all traffic to new version
c.Strategy.Matches = nil
c.Strategy.Traffic = utilpointer.StringPtr("100%")
//NOTE - This return value "verified" has the opposite semantics with "modified"
verified, err := trController.EnsureRoutes(context.TODO(), &c.Strategy)
if !verified {
c.LastUpdateTime = &metav1.Time{Time: time.Now()}
}
return !verified, err
})
UpdateRecheckDuration(c, remaining)
return retry, err
}
// returns:
// - if error is not nil, usually we need to retry later. Only if error is nil, we consider the bool.
// - The bool value indicates whether retry is needed. If true, it usually means
@ -437,11 +473,11 @@ func (m *Manager) RestoreStableService(c *TrafficRoutingContext) (bool, error) {
serviceName := c.ObjectRef[0].Service
err := m.Get(context.TODO(), client.ObjectKey{Namespace: c.Namespace, Name: serviceName}, stableService)
if errors.IsNotFound(err) {
return true, nil
return false, nil
}
if err != nil {
klog.Errorf("%s get stable service(%s) failed: %s", c.Key, serviceName, err.Error())
return false, err
return true, err
}
// restore stable Service

View File

@ -1293,8 +1293,10 @@ func TestRestoreGateway(t *testing.T) {
// the second call, it should be no error and no retry
time.Sleep(1 * time.Second)
retry, err = manager.RestoreGateway(c)
if err != nil || retry {
t.Fatalf("RestoreGateway failed: %s", err)
if err != nil {
t.Fatalf("RestoreGateway failed: %s", err.Error())
} else if retry {
t.Fatalf("RestoreGateway failed: retry should be false")
}
})
}
@ -1403,8 +1405,175 @@ func TestRemoveCanaryService(t *testing.T) {
// the second call, it should be no error and no retry
time.Sleep(1 * time.Second)
retry, err = manager.RemoveCanaryService(c)
if err != nil || retry {
t.Fatalf("RemoveCanaryService failed: %s", err)
if err != nil {
t.Fatalf("RemoveCanaryService failed: %s", err.Error())
} else if retry {
t.Fatalf("RemoveCanaryService failed: retry should be false")
}
})
}
}
func TestRouteAllTrafficToNewVersion(t *testing.T) {
cases := []struct {
name string
getObj func() ([]*corev1.Service, []*netv1.Ingress)
getRollout func() (*v1beta1.Rollout, *util.Workload)
onlyTrafficRouting bool
expectObj func() ([]*corev1.Service, []*netv1.Ingress)
expectNotFound func() ([]*corev1.Service, []*netv1.Ingress)
retry bool
}{
{
name: "Route all traffic test1",
getObj: func() ([]*corev1.Service, []*netv1.Ingress) {
s1 := demoService.DeepCopy()
s2 := demoService.DeepCopy()
s2.Name = "echoserver-canary"
s2.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v2"
c1 := demoIngress.DeepCopy()
c2 := demoIngress.DeepCopy()
c2.Name = "echoserver-canary"
c2.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)] = "true"
c2.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)] = "100"
c2.Spec.Rules[0].HTTP.Paths[0].Backend.Service.Name = "echoserver-canary"
return []*corev1.Service{s1, s2}, []*netv1.Ingress{c1, c2}
},
getRollout: func() (*v1beta1.Rollout, *util.Workload) {
obj := demoRollout.DeepCopy()
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now().Add(-time.Hour)}
return obj, &util.Workload{RevisionLabelKey: apps.DefaultDeploymentUniqueLabelKey}
},
expectObj: func() ([]*corev1.Service, []*netv1.Ingress) {
// service and ingress remain unchanged
s1 := demoService.DeepCopy()
s2 := demoService.DeepCopy()
s2.Name = "echoserver-canary"
s2.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v2"
c1 := demoIngress.DeepCopy()
c2 := demoIngress.DeepCopy()
c2.Name = "echoserver-canary"
c2.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)] = "true"
c2.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)] = "100"
c2.Spec.Rules[0].HTTP.Paths[0].Backend.Service.Name = "echoserver-canary"
return []*corev1.Service{s1, s2}, []*netv1.Ingress{c1, c2}
},
expectNotFound: func() ([]*corev1.Service, []*netv1.Ingress) {
return nil, nil
},
retry: false,
},
{
name: "Route all traffic test2",
getObj: func() ([]*corev1.Service, []*netv1.Ingress) {
s1 := demoService.DeepCopy()
s2 := demoService.DeepCopy()
s2.Name = "echoserver-canary"
s2.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v2"
c1 := demoIngress.DeepCopy()
c2 := demoIngress.DeepCopy()
c2.Name = "echoserver-canary"
c2.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)] = "true"
c2.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)] = "50"
c2.Spec.Rules[0].HTTP.Paths[0].Backend.Service.Name = "echoserver-canary"
return []*corev1.Service{s1, s2}, []*netv1.Ingress{c1, c2}
},
getRollout: func() (*v1beta1.Rollout, *util.Workload) {
obj := demoRollout.DeepCopy()
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now().Add(-time.Hour)}
return obj, &util.Workload{RevisionLabelKey: apps.DefaultDeploymentUniqueLabelKey}
},
expectObj: func() ([]*corev1.Service, []*netv1.Ingress) {
// service and ingress remain unchanged
s1 := demoService.DeepCopy()
s2 := demoService.DeepCopy()
s2.Name = "echoserver-canary"
s2.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v2"
c1 := demoIngress.DeepCopy()
c2 := demoIngress.DeepCopy()
c2.Name = "echoserver-canary"
c2.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)] = "true"
c2.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)] = "100"
c2.Spec.Rules[0].HTTP.Paths[0].Backend.Service.Name = "echoserver-canary"
return []*corev1.Service{s1, s2}, []*netv1.Ingress{c1, c2}
},
expectNotFound: func() ([]*corev1.Service, []*netv1.Ingress) {
return nil, nil
},
retry: true,
},
}
for _, cs := range cases {
t.Run(cs.name, func(t *testing.T) {
ss, ig := cs.getObj()
cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ig[0], ss[0], demoConf.DeepCopy()).Build()
if len(ss) == 2 {
_ = cli.Create(context.TODO(), ss[1])
}
if len(ig) == 2 {
_ = cli.Create(context.TODO(), ig[1])
}
rollout, workload := cs.getRollout()
newStatus := rollout.Status.DeepCopy()
currentStep := rollout.Spec.Strategy.Canary.Steps[newStatus.CanaryStatus.CurrentStepIndex-1]
c := &TrafficRoutingContext{
Key: fmt.Sprintf("Rollout(%s/%s)", rollout.Namespace, rollout.Name),
Namespace: rollout.Namespace,
ObjectRef: rollout.Spec.Strategy.Canary.TrafficRoutings,
Strategy: currentStep.TrafficRoutingStrategy,
OwnerRef: *metav1.NewControllerRef(rollout, v1beta1.SchemeGroupVersion.WithKind("Rollout")),
RevisionLabelKey: workload.RevisionLabelKey,
StableRevision: newStatus.CanaryStatus.StableRevision,
CanaryRevision: newStatus.CanaryStatus.PodTemplateHash,
LastUpdateTime: newStatus.CanaryStatus.LastUpdateTime,
OnlyTrafficRouting: cs.onlyTrafficRouting,
}
manager := NewTrafficRoutingManager(cli)
retry, err := manager.RouteAllTrafficToNewVersion(c)
if err != nil {
t.Fatalf("RouteAllTrafficToNewVersion first failed: %s", err.Error())
}
if retry != cs.retry {
t.Fatalf("RouteAllTrafficToNewVersion expect(%v), but get(%v)", cs.retry, retry)
}
ss, ig = cs.expectObj()
for _, obj := range ss {
checkObjEqual(cli, t, obj)
}
for _, obj := range ig {
checkObjEqual(cli, t, obj)
}
ss, ig = cs.expectNotFound()
for _, obj := range ss {
checkNotFound(cli, t, obj)
}
for _, obj := range ig {
checkNotFound(cli, t, obj)
}
// if done, no need check again
if !cs.retry {
return
}
// the second call, it should be no error and no retry
time.Sleep(1 * time.Second)
retry, err = manager.RouteAllTrafficToNewVersion(c)
if err != nil {
t.Fatalf("RouteAllTrafficToNewVersion failed: %s", err.Error())
} else if retry {
t.Fatalf("RouteAllTrafficToNewVersion failed: retry should be false")
}
ss, ig = cs.expectObj()
for _, obj := range ss {
checkObjEqual(cli, t, obj)
}
for _, obj := range ig {
checkObjEqual(cli, t, obj)
}
})
}

View File

@ -87,19 +87,19 @@ func NewControllerFinder(c client.Client) *ControllerFinder {
func (r *ControllerFinder) GetWorkloadForRef(rollout *rolloutv1beta1.Rollout) (*Workload, error) {
workloadRef := rollout.Spec.WorkloadRef
if rollout.Spec.Strategy.GetRollingStyle() == rolloutv1beta1.CanaryRollingStyle {
for _, finder := range append(r.canaryStyleFinders(), r.partitionStyleFinders()...) {
workload, err := finder(rollout.Namespace, &workloadRef)
if workload != nil || err != nil {
return workload, err
}
}
} else {
for _, finder := range r.partitionStyleFinders() {
workload, err := finder(rollout.Namespace, &workloadRef)
if workload != nil || err != nil {
return workload, err
}
var finders []ControllerFinderFunc
switch rollout.Spec.Strategy.GetRollingStyle() {
case rolloutv1beta1.CanaryRollingStyle:
finders = append(r.canaryStyleFinders(), r.partitionStyleFinders()...)
case rolloutv1beta1.BlueGreenRollingStyle:
finders = r.bluegreenStyleFinders()
default:
finders = r.partitionStyleFinders()
}
for _, finder := range finders {
workload, err := finder(rollout.Namespace, &workloadRef)
if workload != nil || err != nil {
return workload, err
}
}
@ -115,6 +115,10 @@ func (r *ControllerFinder) partitionStyleFinders() []ControllerFinderFunc {
return []ControllerFinderFunc{r.getKruiseCloneSet, r.getAdvancedDeployment, r.getStatefulSetLikeWorkload, r.getKruiseDaemonSet}
}
func (r *ControllerFinder) bluegreenStyleFinders() []ControllerFinderFunc {
return []ControllerFinderFunc{r.getKruiseCloneSet, r.getAdvancedDeployment}
}
var (
ControllerKindRS = apps.SchemeGroupVersion.WithKind("ReplicaSet")
ControllerKindDep = apps.SchemeGroupVersion.WithKind("Deployment")

View File

@ -0,0 +1,450 @@
package util
import (
"context"
"fmt"
// "reflect"
"strconv"
"testing"
"math/rand"
appsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1"
rolloutv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1"
rolloutv1beta1 "github.com/openkruise/rollouts/api/v1beta1"
apps "k8s.io/api/apps/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)
var namespace string = "unit-test"
var demoRollout rolloutv1beta1.Rollout = rolloutv1beta1.Rollout{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Name: "rollout-demo",
Labels: map[string]string{},
Annotations: map[string]string{},
},
Spec: rolloutv1beta1.RolloutSpec{
WorkloadRef: rolloutv1beta1.ObjectRef{
APIVersion: "apps/v1",
Kind: "Deployment",
Name: "deployment-demo",
},
Strategy: rolloutv1beta1.RolloutStrategy{
BlueGreen: &rolloutv1beta1.BlueGreenStrategy{},
},
},
Status: rolloutv1beta1.RolloutStatus{},
}
func TestGetWorkloadForRef(t *testing.T) {
cases := []struct {
name string
getRollout func() *rolloutv1beta1.Rollout
getWorkload func() (*apps.Deployment, *apps.ReplicaSet, *appsv1alpha1.CloneSet)
expectWorkload func() *Workload
err error
}{
{
name: "cloneset, not in rollout progress",
getRollout: func() *rolloutv1beta1.Rollout {
rollout := demoRollout.DeepCopy()
rollout.Spec.WorkloadRef = rolloutv1beta1.ObjectRef{
APIVersion: "apps.kruise.io/v1alpha1",
Kind: "CloneSet",
Name: "cloneset-demo",
}
return rollout
},
getWorkload: func() (*apps.Deployment, *apps.ReplicaSet, *appsv1alpha1.CloneSet) {
return nil, nil, cloneset.DeepCopy()
},
expectWorkload: func() *Workload {
return &Workload{
TypeMeta: metav1.TypeMeta{
Kind: "CloneSet",
APIVersion: "apps.kruise.io/v1alpha1",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: "unit-test",
Name: "cloneset-demo",
Annotations: map[string]string{
"rollouts.kruise.io/unit-test-anno": "true",
},
Labels: map[string]string{
"rollouts.kruise.io/unit-test-label": "true",
"rollouts.kruise.io/stable-revision": "stable",
},
},
Replicas: 10,
StableRevision: "version1",
CanaryRevision: "version2",
PodTemplateHash: "version2",
RevisionLabelKey: "pod-template-hash",
IsStatusConsistent: true,
}
},
},
{
name: "cloneset,in rollout progress",
getRollout: func() *rolloutv1beta1.Rollout {
rollout := demoRollout.DeepCopy()
rollout.Spec.WorkloadRef = rolloutv1beta1.ObjectRef{
APIVersion: "apps.kruise.io/v1alpha1",
Kind: "CloneSet",
Name: "cloneset-demo",
}
return rollout
},
getWorkload: func() (*apps.Deployment, *apps.ReplicaSet, *appsv1alpha1.CloneSet) {
cs := cloneset.DeepCopy()
cs.Annotations[InRolloutProgressingAnnotation] = "true"
return nil, nil, cs
},
expectWorkload: func() *Workload {
return &Workload{
TypeMeta: metav1.TypeMeta{
Kind: "CloneSet",
APIVersion: "apps.kruise.io/v1alpha1",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: "unit-test",
Name: "cloneset-demo",
Annotations: map[string]string{
"rollouts.kruise.io/unit-test-anno": "true",
},
Labels: map[string]string{
"rollouts.kruise.io/unit-test-label": "true",
"rollouts.kruise.io/stable-revision": "stable",
},
},
Replicas: 10,
StableRevision: "version1",
CanaryRevision: "version2",
PodTemplateHash: "version2",
RevisionLabelKey: "pod-template-hash",
IsStatusConsistent: true,
InRolloutProgressing: true,
}
},
},
{
name: "in rollback progress",
getRollout: func() *rolloutv1beta1.Rollout {
rollout := demoRollout.DeepCopy()
rollout.Spec.WorkloadRef = rolloutv1beta1.ObjectRef{
APIVersion: "apps.kruise.io/v1alpha1",
Kind: "CloneSet",
Name: "cloneset-demo",
}
return rollout
},
getWorkload: func() (*apps.Deployment, *apps.ReplicaSet, *appsv1alpha1.CloneSet) {
cs := cloneset.DeepCopy()
cs.Annotations[InRolloutProgressingAnnotation] = "true"
cs.Status.CurrentRevision = "version2"
cs.Status.UpdateRevision = "version2"
cs.Status.UpdatedReplicas = 5
return nil, nil, cs
},
expectWorkload: func() *Workload {
return &Workload{
TypeMeta: metav1.TypeMeta{
Kind: "CloneSet",
APIVersion: "apps.kruise.io/v1alpha1",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: "unit-test",
Name: "cloneset-demo",
Annotations: map[string]string{
"rollouts.kruise.io/unit-test-anno": "true",
},
Labels: map[string]string{
"rollouts.kruise.io/unit-test-label": "true",
"rollouts.kruise.io/stable-revision": "stable",
},
},
Replicas: 10,
StableRevision: "version2",
CanaryRevision: "version2",
PodTemplateHash: "version2",
RevisionLabelKey: "pod-template-hash",
IsStatusConsistent: true,
InRolloutProgressing: true,
IsInRollback: true,
}
},
},
{
name: "deployment: not in rollout progress",
getRollout: func() *rolloutv1beta1.Rollout {
rollout := demoRollout.DeepCopy()
rollout.Spec.WorkloadRef = rolloutv1beta1.ObjectRef{
APIVersion: "apps/v1",
Kind: "Deployment",
Name: "deployment-demo",
}
return rollout
},
getWorkload: func() (*apps.Deployment, *apps.ReplicaSet, *appsv1alpha1.CloneSet) {
dep := deployment.DeepCopy()
dep.Labels[rolloutv1alpha1.DeploymentStableRevisionLabel] = "stable"
rs := generateRS(*dep)
rs.Namespace = namespace
rs.Spec.Replicas = dep.Spec.Replicas
rs.Labels[apps.DefaultDeploymentUniqueLabelKey] = "c9dcf87d5"
return dep, &rs, nil
},
expectWorkload: func() *Workload {
return &Workload{
TypeMeta: metav1.TypeMeta{
Kind: "Deployment",
APIVersion: "apps/v1",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: "unit-test",
Name: "deployment-demo",
Annotations: map[string]string{
"rollouts.kruise.io/unit-test-anno": "true",
},
Labels: map[string]string{
"rollouts.kruise.io/unit-test-label": "true",
"rollouts.kruise.io/stable-revision": "stable",
},
},
Replicas: 10,
StableRevision: "stable",
CanaryRevision: "c9dcf87d5",
PodTemplateHash: "",
RevisionLabelKey: "pod-template-hash",
IsStatusConsistent: true,
}
},
},
{
name: "in rollout progress",
getRollout: func() *rolloutv1beta1.Rollout {
rollout := demoRollout.DeepCopy()
rollout.Spec.WorkloadRef = rolloutv1beta1.ObjectRef{
APIVersion: "apps/v1",
Kind: "Deployment",
Name: "deployment-demo",
}
return rollout
},
getWorkload: func() (*apps.Deployment, *apps.ReplicaSet, *appsv1alpha1.CloneSet) {
dep := deployment.DeepCopy()
dep.Labels[rolloutv1alpha1.DeploymentStableRevisionLabel] = "stable"
dep.Annotations[InRolloutProgressingAnnotation] = "true"
rs := generateRS(*dep)
rs.Namespace = namespace
rs.Spec.Replicas = dep.Spec.Replicas
rs.Labels[apps.DefaultDeploymentUniqueLabelKey] = "c9dcf87d5"
return dep, &rs, nil
},
expectWorkload: func() *Workload {
return &Workload{
TypeMeta: metav1.TypeMeta{
Kind: "Deployment",
APIVersion: "apps/v1",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: "unit-test",
Name: "deployment-demo",
},
Replicas: 10,
StableRevision: "stable",
CanaryRevision: "c9dcf87d5",
PodTemplateHash: "c9dcf87d5",
RevisionLabelKey: "pod-template-hash",
IsStatusConsistent: true,
InRolloutProgressing: true,
}
},
},
{
name: "in rollback",
getRollout: func() *rolloutv1beta1.Rollout {
rollout := demoRollout.DeepCopy()
rollout.Spec.WorkloadRef = rolloutv1beta1.ObjectRef{
APIVersion: "apps/v1",
Kind: "Deployment",
Name: "deployment-demo",
}
return rollout
},
getWorkload: func() (*apps.Deployment, *apps.ReplicaSet, *appsv1alpha1.CloneSet) {
dep := deployment.DeepCopy()
dep.Labels[rolloutv1alpha1.DeploymentStableRevisionLabel] = "stable"
dep.Annotations[InRolloutProgressingAnnotation] = "true"
rs := generateRS(*dep)
rs.Namespace = namespace
rs.Spec.Replicas = dep.Spec.Replicas
// the newst revision is stable
rs.Labels[apps.DefaultDeploymentUniqueLabelKey] = "stable"
return dep, &rs, nil
},
expectWorkload: func() *Workload {
return &Workload{
TypeMeta: metav1.TypeMeta{
Kind: "Deployment",
APIVersion: "apps/v1",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: "unit-test",
Name: "deployment-demo",
},
Replicas: 10,
StableRevision: "stable",
CanaryRevision: "c9dcf87d5",
PodTemplateHash: "stable",
RevisionLabelKey: "pod-template-hash",
IsStatusConsistent: true,
InRolloutProgressing: true,
IsInRollback: true,
}
},
},
{
name: "not consistent",
getRollout: func() *rolloutv1beta1.Rollout {
rollout := demoRollout.DeepCopy()
rollout.Spec.WorkloadRef = rolloutv1beta1.ObjectRef{
APIVersion: "apps/v1",
Kind: "Deployment",
Name: "deployment-demo",
}
return rollout
},
getWorkload: func() (*apps.Deployment, *apps.ReplicaSet, *appsv1alpha1.CloneSet) {
dep := deployment.DeepCopy()
// modify generation
dep.Generation = 12
dep.Labels[rolloutv1alpha1.DeploymentStableRevisionLabel] = "stable"
dep.Annotations[InRolloutProgressingAnnotation] = "true"
rs := generateRS(*dep)
rs.Namespace = namespace
rs.Spec.Replicas = dep.Spec.Replicas
rs.Labels[apps.DefaultDeploymentUniqueLabelKey] = "c9dcf87d5"
return dep, &rs, nil
},
expectWorkload: func() *Workload {
return &Workload{
IsStatusConsistent: false,
}
},
},
}
for _, cs := range cases {
t.Run(cs.name, func(t *testing.T) {
rollout := cs.getRollout()
dp, rs, cloneset := cs.getWorkload()
cli := fake.NewClientBuilder().WithScheme(scheme).WithObjects(rollout).Build()
if dp != nil {
_ = cli.Create(context.TODO(), rs)
}
if rs != nil {
_ = cli.Create(context.TODO(), dp)
}
if cloneset != nil {
_ = cli.Create(context.TODO(), cloneset)
}
finder := NewControllerFinder(cli)
workload, err := finder.GetWorkloadForRef(rollout)
if !checkWorkloadEqual(workload, cs.expectWorkload()) {
t.Fatal("expected workload not equal got workload")
}
if res := checkErrorEqual(err, cs.err); res != "" {
t.Fatal(res)
}
})
}
}
func checkErrorEqual(g, e error) string {
gotError, expectedError := "none", "none"
if g != nil {
gotError = g.Error()
}
if e != nil {
gotError = e.Error()
}
if gotError != expectedError {
return fmt.Sprintf("expected error %s, but got error %s,", expectedError, gotError)
}
return ""
}
// checkWorkloadEqual compares two Workload pointers for equality.
func checkWorkloadEqual(a, b *Workload) bool {
if a == nil && b == nil {
return true
}
if a == nil || b == nil {
return false
}
// Compare TypeMeta
if a.Kind != b.Kind || a.APIVersion != b.APIVersion {
return false
}
// Compare ObjectMeta for specified fields (Namespace, Name, Annotations, Labels)
if !objectMetaEqual(a.ObjectMeta, b.ObjectMeta) {
return false
}
// Compare other fields
return a.Replicas == b.Replicas &&
a.StableRevision == b.StableRevision &&
a.CanaryRevision == b.CanaryRevision &&
a.PodTemplateHash == b.PodTemplateHash &&
a.RevisionLabelKey == b.RevisionLabelKey &&
a.IsInRollback == b.IsInRollback &&
a.InRolloutProgressing == b.InRolloutProgressing &&
a.IsStatusConsistent == b.IsStatusConsistent
}
// objectMetaEqual compares the specified fields of ObjectMeta.
func objectMetaEqual(a, b metav1.ObjectMeta) bool {
return a.Namespace == b.Namespace &&
a.Name == b.Name
}
func generateRS(deployment apps.Deployment) apps.ReplicaSet {
template := deployment.Spec.Template.DeepCopy()
return apps.ReplicaSet{
ObjectMeta: metav1.ObjectMeta{
UID: randomUID(),
Name: randomName("replicaset"),
Labels: template.Labels,
OwnerReferences: []metav1.OwnerReference{*newDControllerRef(&deployment)},
},
Spec: apps.ReplicaSetSpec{
Replicas: new(int32),
Template: *template,
Selector: &metav1.LabelSelector{MatchLabels: template.Labels},
},
}
}
func randomUID() types.UID {
return types.UID(strconv.FormatInt(rand.Int63(), 10))
}
func randomName(prefix string) string {
return fmt.Sprintf("%s-%s", prefix, strconv.FormatInt(5, 10))
}
func newDControllerRef(d *apps.Deployment) *metav1.OwnerReference {
isController := true
return &metav1.OwnerReference{
APIVersion: "apps/v1",
Kind: "Deployment",
Name: d.GetName(),
UID: d.GetUID(),
Controller: &isController,
}
}

View File

@ -160,6 +160,10 @@ func (h *RolloutCreateUpdateHandler) validateRolloutUpdate(oldObj, newObj *appsv
if oldObj.Spec.Strategy.GetRollingStyle() != newObj.Spec.Strategy.GetRollingStyle() {
return field.ErrorList{field.Forbidden(field.NewPath("Spec.Strategy.Canary|BlueGreen"), "Rollout style and enableExtraWorkloadForCanary are immutable")}
}
// forbid adding or removing steps during rollout so that the code can be simpler
if len(oldObj.Spec.Strategy.GetSteps()) != len(newObj.Spec.Strategy.GetSteps()) {
return field.ErrorList{field.Forbidden(field.NewPath("Spec.Strategy.Canary|BlueGreen"), "Amount of Rollout steps are immutable")}
}
}
/*if newObj.Status.CanaryStatus != nil && newObj.Status.CanaryStatus.CurrentStepState == appsv1beta1.CanaryStepStateReady {