rollouts/pkg/controller/rollout/trafficrouting.go

293 lines
13 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
Copyright 2022 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package rollout
import (
"context"
"fmt"
"time"
rolloutv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1"
"github.com/openkruise/rollouts/pkg/controller/rollout/trafficrouting"
"github.com/openkruise/rollouts/pkg/controller/rollout/trafficrouting/nginx"
"github.com/openkruise/rollouts/pkg/util"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"
utilpointer "k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
)
func (r *rolloutContext) doCanaryTrafficRouting() (bool, error) {
if len(r.rollout.Spec.Strategy.Canary.TrafficRoutings) == 0 {
return true, nil
}
if r.rollout.Spec.Strategy.Canary.TrafficRoutings[0].GracePeriodSeconds <= 0 {
r.rollout.Spec.Strategy.Canary.TrafficRoutings[0].GracePeriodSeconds = defaultGracePeriodSeconds
}
canaryStatus := r.newStatus.CanaryStatus
if r.newStatus.StableRevision == "" || canaryStatus.PodTemplateHash == "" {
klog.Warningf("rollout(%s/%s) stableRevision or podTemplateHash can't be empty, and wait a moment", r.rollout.Namespace, r.rollout.Name)
return false, nil
}
//fetch stable service
sName := r.rollout.Spec.Strategy.Canary.TrafficRoutings[0].Service
r.stableService = &corev1.Service{}
err := r.Get(context.TODO(), client.ObjectKey{Namespace: r.rollout.Namespace, Name: sName}, r.stableService)
if err != nil {
klog.Errorf("rollout(%s/%s) get stable service(%s) failed: %s", r.rollout.Namespace, r.rollout.Name, sName, err.Error())
// not found, wait a moment, retry
if errors.IsNotFound(err) {
return false, nil
}
return false, err
}
canaryStatus.CanaryService = fmt.Sprintf("%s-canary", sName)
// fetch canary service
// todo for the time being, we do not consider the scenario where the user only changes the stable service definition during rollout progressing
r.canaryService = &corev1.Service{}
err = r.Get(context.TODO(), client.ObjectKey{Namespace: r.rollout.Namespace, Name: canaryStatus.CanaryService}, r.canaryService)
if err != nil && !errors.IsNotFound(err) {
klog.Errorf("rollout(%s/%s) get canary service(%s) failed: %s", r.rollout.Namespace, r.rollout.Name, canaryStatus.CanaryService, err.Error())
return false, err
} else if errors.IsNotFound(err) {
klog.Infof("rollout(%s/%s) canary service(%s) Not Found, and create it", r.rollout.Namespace, r.rollout.Name, canaryStatus.CanaryService)
if err = r.createCanaryService(); err != nil {
return false, err
}
data := util.DumpJSON(r.canaryService)
klog.Infof("create rollout(%s/%s) canary service(%s) success", r.rollout.Namespace, r.rollout.Name, data)
}
// update service selector
// update service selector specific revision pods
if r.canaryService.Spec.Selector[r.podRevisionLabelKey()] != canaryStatus.PodTemplateHash {
body := fmt.Sprintf(`{"spec":{"selector":{"%s":"%s"}}}`, r.podRevisionLabelKey(), canaryStatus.PodTemplateHash)
if err = r.Patch(context.TODO(), r.canaryService, client.RawPatch(types.StrategicMergePatchType, []byte(body))); err != nil {
klog.Errorf("rollout(%s/%s) patch canary service(%s) failed: %s", r.rollout.Namespace, r.rollout.Name, r.canaryService.Name, err.Error())
return false, err
}
// update canary service time, and wait 3 seconds, just to be safe
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
klog.Infof("add rollout(%s/%s) canary service(%s) selector(%s=%s) success",
r.rollout.Namespace, r.rollout.Name, r.canaryService.Name, r.podRevisionLabelKey(), canaryStatus.PodTemplateHash)
}
if r.stableService.Spec.Selector[r.podRevisionLabelKey()] != r.newStatus.StableRevision {
body := fmt.Sprintf(`{"spec":{"selector":{"%s":"%s"}}}`, r.podRevisionLabelKey(), r.newStatus.StableRevision)
if err = r.Patch(context.TODO(), r.stableService, client.RawPatch(types.StrategicMergePatchType, []byte(body))); err != nil {
klog.Errorf("rollout(%s/%s) patch stable service(%s) failed: %s", r.rollout.Namespace, r.rollout.Name, r.stableService.Name, err.Error())
return false, err
}
// update stable service time, and wait 3 seconds, just to be safe
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
klog.Infof("add rollout(%s/%s) stable service(%s) selector(%s=%s) success",
r.rollout.Namespace, r.rollout.Name, r.stableService.Name, r.podRevisionLabelKey(), r.newStatus.StableRevision)
return false, nil
}
// After restore stable service configuration, give the ingress provider 3 seconds to take effect
if verifyTime := canaryStatus.LastUpdateTime.Add(time.Second * time.Duration(r.rollout.Spec.Strategy.Canary.TrafficRoutings[0].GracePeriodSeconds)); verifyTime.After(time.Now()) {
klog.Infof("update rollout(%s/%s) stable service(%s) done, and wait 3 seconds", r.rollout.Namespace, r.rollout.Name, r.stableService.Name)
return false, nil
}
// route traffic configuration
trController, err := r.newTrafficRoutingController(r)
if err != nil {
klog.Errorf("rollout(%s/%s) newTrafficRoutingController failed: %s", r.rollout.Namespace, r.rollout.Name, err.Error())
return false, err
}
var desiredWeight int32
if len(r.rollout.Spec.Strategy.Canary.Steps) > 0 {
desiredWeight = r.rollout.Spec.Strategy.Canary.Steps[r.newStatus.CanaryStatus.CurrentStepIndex-1].Weight
}
steps := len(r.rollout.Spec.Strategy.Canary.Steps)
cond := util.GetRolloutCondition(*r.newStatus, rolloutv1alpha1.RolloutConditionProgressing)
cond.Message = fmt.Sprintf("Rollout is in step(%d/%d), and route traffic weight(%d)", canaryStatus.CurrentStepIndex, steps, desiredWeight)
verify, err := trController.Verify(desiredWeight)
if err != nil {
return false, err
} else if !verify {
r.recorder.Eventf(r.rollout, corev1.EventTypeNormal, "Progressing", fmt.Sprintf("traffic route weight(%d) done", desiredWeight))
return false, trController.SetRoutes(desiredWeight)
}
klog.Infof("rollout(%s/%s) do step(%d) trafficRouting(%d) success", r.rollout.Namespace, r.rollout.Name, r.newStatus.CanaryStatus.CurrentStepIndex, desiredWeight)
return true, nil
}
func (r *rolloutContext) restoreStableService() (bool, error) {
if len(r.rollout.Spec.Strategy.Canary.TrafficRoutings) == 0 {
return true, nil
}
if r.rollout.Spec.Strategy.Canary.TrafficRoutings[0].GracePeriodSeconds <= 0 {
r.rollout.Spec.Strategy.Canary.TrafficRoutings[0].GracePeriodSeconds = defaultGracePeriodSeconds
}
//fetch stable service
sName := r.rollout.Spec.Strategy.Canary.TrafficRoutings[0].Service
r.stableService = &corev1.Service{}
err := r.Get(context.TODO(), client.ObjectKey{Namespace: r.rollout.Namespace, Name: sName}, r.stableService)
if err != nil {
if errors.IsNotFound(err) {
return true, nil
}
klog.Errorf("rollout(%s/%s) get stable service(%s) failed: %s", r.rollout.Namespace, r.rollout.Name, sName, err.Error())
return false, err
}
if r.newStatus.CanaryStatus == nil {
r.newStatus.CanaryStatus = &rolloutv1alpha1.CanaryStatus{}
}
//restore stable service configurationremove hash revision selector
if r.stableService.Spec.Selector != nil && r.stableService.Spec.Selector[r.podRevisionLabelKey()] != "" {
body := fmt.Sprintf(`{"spec":{"selector":{"%s":null}}}`, r.podRevisionLabelKey())
if err = r.Patch(context.TODO(), r.stableService, client.RawPatch(types.StrategicMergePatchType, []byte(body))); err != nil {
klog.Errorf("rollout(%s/%s) patch stable service(%s) failed: %s", r.rollout.Namespace, r.rollout.Name, r.stableService.Name, err.Error())
return false, err
}
klog.Infof("remove rollout(%s/%s) stable service(%s) pod revision selector success, and retry later", r.rollout.Namespace, r.rollout.Name, r.stableService.Name)
r.newStatus.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
return false, nil
}
// After restore stable service configuration, give the ingress provider 3 seconds to take effect
if r.newStatus.CanaryStatus.LastUpdateTime != nil {
if verifyTime := r.newStatus.CanaryStatus.LastUpdateTime.Add(time.Second * time.Duration(r.rollout.Spec.Strategy.Canary.TrafficRoutings[0].GracePeriodSeconds)); verifyTime.After(time.Now()) {
klog.Infof("restore rollout(%s/%s) stable service(%s) done, and wait a moment", r.rollout.Namespace, r.rollout.Name, r.stableService.Name)
return false, nil
}
}
klog.Infof("rollout(%s/%s) doFinalising restore stable service(%s) success", r.rollout.Namespace, r.rollout.Name, r.stableService.Name)
return true, nil
}
func (r *rolloutContext) doFinalisingTrafficRouting() (bool, error) {
if len(r.rollout.Spec.Strategy.Canary.TrafficRoutings) == 0 {
return true, nil
}
if r.rollout.Spec.Strategy.Canary.TrafficRoutings[0].GracePeriodSeconds <= 0 {
r.rollout.Spec.Strategy.Canary.TrafficRoutings[0].GracePeriodSeconds = defaultGracePeriodSeconds
}
if r.newStatus.CanaryStatus == nil {
r.newStatus.CanaryStatus = &rolloutv1alpha1.CanaryStatus{}
}
// 1. restore ingress and route traffic to stable service
trController, err := r.newTrafficRoutingController(r)
if err != nil {
klog.Errorf("rollout(%s/%s) newTrafficRoutingController failed: %s", r.rollout.Namespace, r.rollout.Name, err.Error())
return false, err
}
verify, err := trController.Verify(-1)
if err != nil {
return false, err
} else if !verify {
r.newStatus.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
err = trController.SetRoutes(0)
if err != nil && errors.IsNotFound(err) {
klog.Warningf("rollout(%s/%s) VerifyTrafficRouting(-1), and stable ingress not found", r.rollout.Namespace, r.rollout.Name)
return false, nil
}
return false, err
}
// After do TrafficRouting configuration, give the ingress provider 3 seconds to take effect
if r.newStatus.CanaryStatus.LastUpdateTime != nil {
if verifyTime := r.newStatus.CanaryStatus.LastUpdateTime.Add(time.Second * time.Duration(r.rollout.Spec.Strategy.Canary.TrafficRoutings[0].GracePeriodSeconds)); verifyTime.After(time.Now()) {
klog.Infof("rollout(%s/%s) doFinalisingTrafficRouting done, and wait a moment", r.rollout.Namespace, r.rollout.Name)
return false, nil
}
}
// DoFinalising, such as delete nginx canary ingress
if err = trController.Finalise(); err != nil {
return false, err
}
// 2. remove canary service
if r.newStatus.CanaryStatus.CanaryService == "" {
return true, nil
}
cService := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Namespace: r.rollout.Namespace,
Name: r.newStatus.CanaryStatus.CanaryService,
},
}
err = r.Delete(context.TODO(), cService)
if err != nil && !errors.IsNotFound(err) {
klog.Errorf("rollout(%s/%s) remove canary service(%s) failed: %s", r.rollout.Namespace, r.rollout.Name, cService.Name, err.Error())
return false, err
}
klog.Infof("rollout(%s/%s) remove canary service(%s) success", r.rollout.Namespace, r.rollout.Name, cService.Name)
return true, nil
}
func (r *rolloutContext) newTrafficRoutingController(roCtx *rolloutContext) (trafficrouting.Controller, error) {
canary := roCtx.rollout.Spec.Strategy.Canary
switch canary.TrafficRoutings[0].Type {
case "nginx":
gvk := schema.GroupVersionKind{Group: rolloutv1alpha1.GroupVersion.Group, Version: rolloutv1alpha1.GroupVersion.Version, Kind: "Rollout"}
return nginx.NewNginxTrafficRouting(r.Client, r.newStatus, nginx.Config{
RolloutName: r.rollout.Name,
RolloutNs: r.rollout.Namespace,
CanaryService: r.canaryService,
StableService: r.stableService,
TrafficConf: r.rollout.Spec.Strategy.Canary.TrafficRoutings[0].Ingress,
OwnerRef: *metav1.NewControllerRef(r.rollout, gvk),
})
}
return nil, fmt.Errorf("TrafficRouting(%s) not support", canary.TrafficRoutings[0].Type)
}
func (r *rolloutContext) createCanaryService() error {
r.canaryService = &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Namespace: r.rollout.Namespace,
Name: r.newStatus.CanaryStatus.CanaryService,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: r.rollout.APIVersion,
Kind: r.rollout.Kind,
Name: r.rollout.Name,
UID: r.rollout.UID,
Controller: utilpointer.BoolPtr(true),
BlockOwnerDeletion: utilpointer.BoolPtr(true),
},
},
},
Spec: *r.stableService.Spec.DeepCopy(),
}
// set field nil
r.canaryService.Spec.ClusterIP = ""
r.canaryService.Spec.ClusterIPs = nil
r.canaryService.Spec.ExternalIPs = nil
r.canaryService.Spec.IPFamilyPolicy = nil
r.canaryService.Spec.IPFamilies = nil
r.canaryService.Spec.LoadBalancerIP = ""
r.canaryService.Spec.Selector[r.podRevisionLabelKey()] = r.newStatus.CanaryStatus.PodTemplateHash
err := r.Create(context.TODO(), r.canaryService)
if err != nil && !errors.IsAlreadyExists(err) {
klog.Errorf("create rollout(%s/%s) canary service(%s) failed: %s", r.rollout.Namespace, r.rollout.Name, r.canaryService.Name, err.Error())
return err
}
return nil
}