287 lines
13 KiB
Go
287 lines
13 KiB
Go
/*
|
|
Copyright 2022 The Kruise Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package rollout
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
rolloutv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1"
|
|
"github.com/openkruise/rollouts/pkg/util"
|
|
corev1 "k8s.io/api/core/v1"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/runtime/schema"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/client-go/util/retry"
|
|
"k8s.io/klog/v2"
|
|
)
|
|
|
|
func (r *rolloutContext) runCanary() error {
|
|
canaryStatus := r.newStatus.CanaryStatus
|
|
// init canary status
|
|
if canaryStatus.CanaryRevision == "" {
|
|
canaryStatus.CurrentStepState = rolloutv1alpha1.CanaryStepStateUpgrade
|
|
canaryStatus.CanaryRevision = r.workload.CanaryRevision
|
|
canaryStatus.CurrentStepIndex = 1
|
|
canaryStatus.RolloutHash = r.rollout.Annotations[util.RolloutHashAnnotation]
|
|
}
|
|
|
|
// update canary status
|
|
canaryStatus.CanaryReplicas = r.workload.CanaryReplicas
|
|
canaryStatus.CanaryReadyReplicas = r.workload.CanaryReadyReplicas
|
|
switch canaryStatus.CurrentStepState {
|
|
case rolloutv1alpha1.CanaryStepStateUpgrade:
|
|
klog.Infof("rollout(%s/%s) run canary strategy, and state(%s)", r.rollout.Namespace, r.rollout.Name, rolloutv1alpha1.CanaryStepStateUpgrade)
|
|
// If the last step is 100%, there is no need to execute the canary process at this time
|
|
if r.rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1].Weight == 100 {
|
|
klog.Infof("rollout(%s/%s) last step is 100%, there is no need to execute the canary process at this time, and set state=%s",
|
|
r.rollout.Namespace, r.rollout.Name, canaryStatus.CurrentStepIndex-1, canaryStatus.CurrentStepIndex, rolloutv1alpha1.CanaryStepStateCompleted)
|
|
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
|
|
canaryStatus.CurrentStepState = rolloutv1alpha1.CanaryStepStateCompleted
|
|
} else {
|
|
done, err := r.doCanaryUpgrade()
|
|
if err != nil {
|
|
return err
|
|
} else if done {
|
|
canaryStatus.CurrentStepState = rolloutv1alpha1.CanaryStepStateTrafficRouting
|
|
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
|
|
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", r.rollout.Namespace, r.rollout.Name,
|
|
canaryStatus.CurrentStepIndex, rolloutv1alpha1.CanaryStepStateUpgrade, canaryStatus.CurrentStepState)
|
|
}
|
|
}
|
|
|
|
case rolloutv1alpha1.CanaryStepStateTrafficRouting:
|
|
klog.Infof("rollout(%s/%s) run canary strategy, and state(%s)", r.rollout.Namespace, r.rollout.Name, rolloutv1alpha1.CanaryStepStateTrafficRouting)
|
|
done, err := r.doCanaryTrafficRouting()
|
|
if err != nil {
|
|
return err
|
|
} else if done {
|
|
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
|
|
canaryStatus.CurrentStepState = rolloutv1alpha1.CanaryStepStateMetricsAnalysis
|
|
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", r.rollout.Namespace, r.rollout.Name,
|
|
canaryStatus.CurrentStepIndex, rolloutv1alpha1.CanaryStepStateTrafficRouting, canaryStatus.CurrentStepState)
|
|
}
|
|
expectedTime := time.Now().Add(time.Duration(defaultGracePeriodSeconds) * time.Second)
|
|
r.recheckTime = &expectedTime
|
|
|
|
case rolloutv1alpha1.CanaryStepStateMetricsAnalysis:
|
|
klog.Infof("rollout(%s/%s) run canary strategy, and state(%s)", r.rollout.Namespace, r.rollout.Name, rolloutv1alpha1.CanaryStepStateMetricsAnalysis)
|
|
done, err := r.doCanaryMetricsAnalysis()
|
|
if err != nil {
|
|
return err
|
|
} else if done {
|
|
canaryStatus.CurrentStepState = rolloutv1alpha1.CanaryStepStatePaused
|
|
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", r.rollout.Namespace, r.rollout.Name,
|
|
canaryStatus.CurrentStepIndex, rolloutv1alpha1.CanaryStepStateMetricsAnalysis, canaryStatus.CurrentStepState)
|
|
}
|
|
|
|
case rolloutv1alpha1.CanaryStepStatePaused:
|
|
klog.Infof("rollout(%s/%s) run canary strategy, and state(%s)", r.rollout.Namespace, r.rollout.Name, rolloutv1alpha1.CanaryStepStatePaused)
|
|
done, err := r.doCanaryPaused()
|
|
if err != nil {
|
|
return err
|
|
} else if done {
|
|
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
|
|
canaryStatus.CurrentStepState = rolloutv1alpha1.CanaryStepStateReady
|
|
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", r.rollout.Namespace, r.rollout.Name,
|
|
canaryStatus.CurrentStepIndex, rolloutv1alpha1.CanaryStepStatePaused, canaryStatus.CurrentStepState)
|
|
}
|
|
|
|
case rolloutv1alpha1.CanaryStepStateReady:
|
|
klog.Infof("rollout(%s/%s) run canary strategy, and state(%s)", r.rollout.Namespace, r.rollout.Name, rolloutv1alpha1.CanaryStepStateReady)
|
|
// run next step
|
|
if len(r.rollout.Spec.Strategy.Canary.Steps) > int(canaryStatus.CurrentStepIndex) {
|
|
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
|
|
canaryStatus.CurrentStepIndex++
|
|
canaryStatus.CurrentStepState = rolloutv1alpha1.CanaryStepStateUpgrade
|
|
klog.Infof("rollout(%s/%s) canary step from(%d) -> to(%d)", r.rollout.Namespace, r.rollout.Name, canaryStatus.CurrentStepIndex-1, canaryStatus.CurrentStepIndex)
|
|
} else {
|
|
klog.Infof("rollout(%s/%s) canary run all steps, and completed", r.rollout.Namespace, r.rollout.Name)
|
|
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
|
|
canaryStatus.CurrentStepState = rolloutv1alpha1.CanaryStepStateCompleted
|
|
}
|
|
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", r.rollout.Namespace, r.rollout.Name,
|
|
canaryStatus.CurrentStepIndex, rolloutv1alpha1.CanaryStepStateReady, canaryStatus.CurrentStepState)
|
|
// canary completed
|
|
case rolloutv1alpha1.CanaryStepStateCompleted:
|
|
klog.Infof("rollout(%s/%s) run canary strategy, and state(%s)", r.rollout.Namespace, r.rollout.Name, rolloutv1alpha1.CanaryStepStateCompleted)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (r *rolloutContext) doCanaryUpgrade() (bool, error) {
|
|
// only traffic routing
|
|
/*if len(r.rollout.Spec.Strategy.Canary.Steps) == 0 {
|
|
if r.workload.CanaryReadyReplicas > 0 {
|
|
klog.Infof("rollout(%s/%s) workload(%s) canaryAvailable(%d), and go to the next stage",
|
|
r.rollout.Namespace, r.rollout.Name, r.workload.Name, r.workload.CanaryReadyReplicas)
|
|
return true, nil
|
|
}
|
|
klog.Infof("rollout(%s/%s) workload(%s) canaryAvailable(%d), and wait a moment",
|
|
r.rollout.Namespace, r.rollout.Name, r.workload.Name, r.workload.CanaryReadyReplicas)
|
|
return false, nil
|
|
}*/
|
|
|
|
// verify whether batchRelease configuration is the latest
|
|
steps := len(r.rollout.Spec.Strategy.Canary.Steps)
|
|
canaryStatus := r.newStatus.CanaryStatus
|
|
isLatest, err := r.batchControl.Verify(canaryStatus.CurrentStepIndex)
|
|
if err != nil {
|
|
return false, err
|
|
} else if !isLatest {
|
|
return false, nil
|
|
}
|
|
|
|
// fetch batchRelease
|
|
batch, err := r.batchControl.FetchBatchRelease()
|
|
if err != nil {
|
|
return false, err
|
|
} else if batch.Status.ObservedReleasePlanHash != util.HashReleasePlanBatches(&batch.Spec.ReleasePlan) ||
|
|
batch.Generation != batch.Status.ObservedGeneration {
|
|
klog.Infof("rollout(%s/%s) batchReleasePlan is not consistent, and wait a moment", r.rollout.Namespace, r.rollout.Name)
|
|
return false, nil
|
|
}
|
|
batchData := util.DumpJSON(batch.Status)
|
|
cond := util.GetRolloutCondition(*r.newStatus, rolloutv1alpha1.RolloutConditionProgressing)
|
|
cond.Message = fmt.Sprintf("Rollout is in step(%d/%d), and upgrade workload new versions", canaryStatus.CurrentStepIndex, steps)
|
|
r.newStatus.Message = cond.Message
|
|
// promote workload next batch release
|
|
if *batch.Spec.ReleasePlan.BatchPartition+1 < canaryStatus.CurrentStepIndex {
|
|
r.recorder.Eventf(r.rollout, corev1.EventTypeNormal, "Progressing", fmt.Sprintf("start upgrade step(%d) canary pods with new versions", canaryStatus.CurrentStepIndex))
|
|
klog.Infof("rollout(%s/%s) will promote batch from(%d) -> to(%d)", r.rollout.Namespace, r.rollout.Name, *batch.Spec.ReleasePlan.BatchPartition+1, canaryStatus.CurrentStepIndex)
|
|
return r.batchControl.Promote(canaryStatus.CurrentStepIndex, false)
|
|
}
|
|
|
|
// check whether batchRelease is ready
|
|
if batch.Status.CanaryStatus.CurrentBatchState != rolloutv1alpha1.ReadyBatchState ||
|
|
batch.Status.CanaryStatus.CurrentBatch+1 < canaryStatus.CurrentStepIndex {
|
|
klog.Infof("rollout(%s/%s) batch(%s) state(%s), and wait a moment",
|
|
r.rollout.Namespace, r.rollout.Name, batchData, batch.Status.CanaryStatus.CurrentBatchState)
|
|
return false, nil
|
|
}
|
|
r.recorder.Eventf(r.rollout, corev1.EventTypeNormal, "Progressing", fmt.Sprintf("upgrade step(%d) canary pods with new versions done", canaryStatus.CurrentStepIndex))
|
|
klog.Infof("rollout(%s/%s) batch(%s) state(%s), and success",
|
|
r.rollout.Namespace, r.rollout.Name, batchData, batch.Status.CanaryStatus.CurrentBatchState)
|
|
return true, nil
|
|
}
|
|
|
|
func (r *rolloutContext) doCanaryMetricsAnalysis() (bool, error) {
|
|
// todo
|
|
return true, nil
|
|
}
|
|
|
|
func (r *rolloutContext) doCanaryPaused() (bool, error) {
|
|
// No step set, need manual confirmation
|
|
if len(r.rollout.Spec.Strategy.Canary.Steps) == 0 {
|
|
klog.Infof("rollout(%s/%s) don't contains steps, and need manual confirmation", r.rollout.Namespace, r.rollout.Name)
|
|
return false, nil
|
|
}
|
|
canaryStatus := r.newStatus.CanaryStatus
|
|
currentStep := r.rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
|
|
steps := len(r.rollout.Spec.Strategy.Canary.Steps)
|
|
cond := util.GetRolloutCondition(*r.newStatus, rolloutv1alpha1.RolloutConditionProgressing)
|
|
// need manual confirmation
|
|
if currentStep.Pause.Duration == nil {
|
|
klog.Infof("rollout(%s/%s) don't set pause duration, and need manual confirmation", r.rollout.Namespace, r.rollout.Name)
|
|
cond.Message = fmt.Sprintf("Rollout is in step(%d/%d), and you need manually confirm to enter the next step", canaryStatus.CurrentStepIndex, steps)
|
|
r.newStatus.Message = cond.Message
|
|
return false, nil
|
|
}
|
|
cond.Message = fmt.Sprintf("Rollout is in step(%d/%d), and wait duration(%d seconds) to enter the next step", canaryStatus.CurrentStepIndex, steps, *currentStep.Pause.Duration)
|
|
r.newStatus.Message = cond.Message
|
|
// wait duration time, then go to next step
|
|
duration := time.Second * time.Duration(*currentStep.Pause.Duration)
|
|
expectedTime := canaryStatus.LastUpdateTime.Add(duration)
|
|
if expectedTime.Before(time.Now()) {
|
|
klog.Infof("rollout(%s/%s) canary step(%d) paused duration(%d seconds), and go to the next step",
|
|
r.rollout.Namespace, r.rollout.Name, canaryStatus.CurrentStepIndex, *currentStep.Pause.Duration)
|
|
return true, nil
|
|
}
|
|
r.recheckTime = &expectedTime
|
|
return false, nil
|
|
}
|
|
|
|
// cleanup after rollout is completed or finished
|
|
func (r *rolloutContext) doCanaryFinalising() (bool, error) {
|
|
// when CanaryStatus is nil, which means canary action hasn't started yet, don't need doing cleanup
|
|
if r.newStatus.CanaryStatus == nil {
|
|
return true, nil
|
|
}
|
|
// 1. rollout progressing complete, allow workload paused=false in webhook
|
|
err := r.removeRolloutStateInWorkload()
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
// 2. restore stable service, remove podRevision selector
|
|
done, err := r.restoreStableService()
|
|
if err != nil || !done {
|
|
return done, err
|
|
}
|
|
// 3. upgrade stable deployment, set paused=false
|
|
// isComplete indicates whether rollout progressing complete, and wait for all pods are ready
|
|
// else indicates rollout is canceled
|
|
done, err = r.batchControl.Promote(-1, r.isComplete)
|
|
if err != nil || !done {
|
|
return done, err
|
|
}
|
|
// 4. route all traffic to stable service
|
|
done, err = r.doFinalisingTrafficRouting()
|
|
if err != nil || !done {
|
|
return done, err
|
|
}
|
|
// 5. delete batchRelease crd
|
|
done, err = r.batchControl.Finalize()
|
|
if err != nil {
|
|
klog.Errorf("rollout(%s/%s) DoFinalising batchRelease failed: %s", r.rollout.Namespace, r.rollout.Name, err.Error())
|
|
return false, err
|
|
} else if !done {
|
|
return false, nil
|
|
}
|
|
klog.Infof("rollout(%s/%s) batchRelease Finalize success", r.rollout.Namespace, r.rollout.Name)
|
|
return true, nil
|
|
}
|
|
|
|
func (r *rolloutContext) removeRolloutStateInWorkload() error {
|
|
if r.workload == nil || r.rollout.Spec.ObjectRef.WorkloadRef == nil {
|
|
return nil
|
|
}
|
|
if _, ok := r.workload.Annotations[util.InRolloutProgressingAnnotation]; !ok {
|
|
return nil
|
|
}
|
|
workloadRef := r.rollout.Spec.ObjectRef.WorkloadRef
|
|
workloadGVK := schema.FromAPIVersionAndKind(workloadRef.APIVersion, workloadRef.Kind)
|
|
err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
|
|
obj := util.GetEmptyWorkloadObject(workloadGVK)
|
|
if err := r.Get(context.TODO(), types.NamespacedName{Name: r.workload.Name, Namespace: r.workload.Namespace}, obj); err != nil {
|
|
klog.Errorf("getting updated workload(%s.%s) failed: %s", r.workload.Namespace, r.workload.Name, err.Error())
|
|
return err
|
|
}
|
|
annotations := obj.GetAnnotations()
|
|
delete(annotations, util.InRolloutProgressingAnnotation)
|
|
obj.SetAnnotations(annotations)
|
|
return r.Update(context.TODO(), obj)
|
|
})
|
|
if err != nil {
|
|
klog.Errorf("update rollout(%s/%s) workload(%s) failed: %s", r.rollout.Namespace, r.rollout.Name, r.workload.Name, err.Error())
|
|
return err
|
|
}
|
|
klog.Infof("remove rollout(%s/%s) workload(%s) annotation[%s] success", r.rollout.Namespace, r.rollout.Name, r.workload.Name, util.InRolloutProgressingAnnotation)
|
|
return nil
|
|
}
|