mirror of https://github.com/openkruise/kruise.git
319 lines
11 KiB
Go
319 lines
11 KiB
Go
/*
|
|
Copyright 2019 The Kruise Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package cloneset
|
|
|
|
import (
|
|
"context"
|
|
"reflect"
|
|
"strings"
|
|
"time"
|
|
|
|
v1 "k8s.io/api/core/v1"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/labels"
|
|
"k8s.io/apimachinery/pkg/runtime/schema"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/client-go/util/workqueue"
|
|
"k8s.io/klog/v2"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/event"
|
|
"sigs.k8s.io/controller-runtime/pkg/handler"
|
|
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
|
|
|
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
|
|
clonesetcore "github.com/openkruise/kruise/pkg/controller/cloneset/core"
|
|
clonesetutils "github.com/openkruise/kruise/pkg/controller/cloneset/utils"
|
|
"github.com/openkruise/kruise/pkg/features"
|
|
"github.com/openkruise/kruise/pkg/util/expectations"
|
|
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
|
|
)
|
|
|
|
var (
|
|
// initialingRateLimiter calculates the delay duration for existing Pods
|
|
// triggered Create event when the Informer cache has just synced.
|
|
initialingRateLimiter = workqueue.NewItemExponentialFailureRateLimiter(3*time.Second, 30*time.Second)
|
|
)
|
|
|
|
type podEventHandler struct {
|
|
client.Reader
|
|
}
|
|
|
|
var _ handler.EventHandler = &podEventHandler{}
|
|
|
|
func (e *podEventHandler) Create(ctx context.Context, evt event.CreateEvent, q workqueue.RateLimitingInterface) {
|
|
pod := evt.Object.(*v1.Pod)
|
|
if pod.DeletionTimestamp != nil {
|
|
// on a restart of the controller manager, it's possible a new pod shows up in a state that
|
|
// is already pending deletion. Prevent the pod from being a creation observation.
|
|
e.Delete(ctx, event.DeleteEvent{Object: evt.Object}, q)
|
|
return
|
|
}
|
|
|
|
// If it has a ControllerRef, that's all that matters.
|
|
if controllerRef := metav1.GetControllerOf(pod); controllerRef != nil {
|
|
req := resolveControllerRef(pod.Namespace, controllerRef)
|
|
if req == nil {
|
|
return
|
|
}
|
|
klog.V(4).InfoS("Pod created", "pod", klog.KObj(pod), "owner", req)
|
|
|
|
isSatisfied, _, _ := clonesetutils.ScaleExpectations.SatisfiedExpectations(req.String())
|
|
clonesetutils.ScaleExpectations.ObserveScale(req.String(), expectations.Create, pod.Name)
|
|
if isSatisfied {
|
|
// If the scale expectation is satisfied, it should be an existing Pod and the Informer
|
|
// cache should have just synced.
|
|
q.AddAfter(*req, initialingRateLimiter.When(req))
|
|
} else {
|
|
// Otherwise, add it immediately and reset the rate limiter
|
|
initialingRateLimiter.Forget(req)
|
|
q.Add(*req)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Otherwise, it's an orphan. Get a list of all matching CloneSets and sync
|
|
// them to see if anyone wants to adopt it.
|
|
// DO NOT observe creation because no controller should be waiting for an
|
|
// orphan.
|
|
csList := e.getPodCloneSets(pod)
|
|
if len(csList) == 0 {
|
|
return
|
|
}
|
|
klog.V(4).InfoS("Orphan Pod created", "pod", klog.KObj(pod), "owner", e.joinCloneSetNames(csList))
|
|
for _, cs := range csList {
|
|
q.Add(reconcile.Request{NamespacedName: types.NamespacedName{
|
|
Name: cs.GetName(),
|
|
Namespace: cs.GetNamespace(),
|
|
}})
|
|
}
|
|
}
|
|
|
|
func (e *podEventHandler) Update(ctx context.Context, evt event.UpdateEvent, q workqueue.RateLimitingInterface) {
|
|
oldPod := evt.ObjectOld.(*v1.Pod)
|
|
curPod := evt.ObjectNew.(*v1.Pod)
|
|
if curPod.ResourceVersion == oldPod.ResourceVersion {
|
|
// Periodic resync will send update events for all known pods.
|
|
// Two different versions of the same pod will always have different RVs.
|
|
return
|
|
}
|
|
|
|
labelChanged := !reflect.DeepEqual(curPod.Labels, oldPod.Labels)
|
|
if curPod.DeletionTimestamp != nil {
|
|
// when a pod is deleted gracefully it's deletion timestamp is first modified to reflect a grace period,
|
|
// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
|
|
// for modification of the deletion timestamp and expect an rs to create more replicas asap, not wait
|
|
// until the kubelet actually deletes the pod. This is different from the Phase of a pod changing, because
|
|
// an rs never initiates a phase change, and so is never asleep waiting for the same.
|
|
e.Delete(ctx, event.DeleteEvent{Object: evt.ObjectNew}, q)
|
|
if labelChanged {
|
|
// we don't need to check the oldPod.DeletionTimestamp because DeletionTimestamp cannot be unset.
|
|
e.Delete(ctx, event.DeleteEvent{Object: evt.ObjectOld}, q)
|
|
}
|
|
return
|
|
}
|
|
|
|
curControllerRef := metav1.GetControllerOf(curPod)
|
|
oldControllerRef := metav1.GetControllerOf(oldPod)
|
|
controllerRefChanged := !reflect.DeepEqual(curControllerRef, oldControllerRef)
|
|
if controllerRefChanged && oldControllerRef != nil {
|
|
// The ControllerRef was changed. Sync the old controller, if any.
|
|
if req := resolveControllerRef(oldPod.Namespace, oldControllerRef); req != nil {
|
|
q.Add(*req)
|
|
}
|
|
}
|
|
|
|
// If it has a ControllerRef, that's all that matters.
|
|
if curControllerRef != nil {
|
|
req := resolveControllerRef(curPod.Namespace, curControllerRef)
|
|
if req == nil {
|
|
return
|
|
}
|
|
|
|
if utilfeature.DefaultFeatureGate.Enabled(features.CloneSetEventHandlerOptimization) {
|
|
if !controllerRefChanged && !labelChanged && e.shouldIgnoreUpdate(req, oldPod, curPod) {
|
|
return
|
|
}
|
|
}
|
|
|
|
klog.V(4).InfoS("Pod updated", "pod", klog.KObj(curPod), "owner", req)
|
|
q.Add(*req)
|
|
return
|
|
}
|
|
|
|
// Otherwise, it's an orphan. If anything changed, sync matching controllers
|
|
// to see if anyone wants to adopt it now.
|
|
if labelChanged || controllerRefChanged {
|
|
csList := e.getPodCloneSets(curPod)
|
|
if len(csList) == 0 {
|
|
return
|
|
}
|
|
klog.V(4).InfoS("Orphan Pod updated", "pod", klog.KObj(curPod), "owner", e.joinCloneSetNames(csList))
|
|
for _, cs := range csList {
|
|
q.Add(reconcile.Request{NamespacedName: types.NamespacedName{
|
|
Name: cs.GetName(),
|
|
Namespace: cs.GetNamespace(),
|
|
}})
|
|
}
|
|
}
|
|
}
|
|
|
|
func (e *podEventHandler) shouldIgnoreUpdate(req *reconcile.Request, oldPod, curPod *v1.Pod) bool {
|
|
cs := &appsv1alpha1.CloneSet{}
|
|
if err := e.Get(context.TODO(), req.NamespacedName, cs); err != nil {
|
|
return false
|
|
}
|
|
|
|
return clonesetcore.New(cs).IgnorePodUpdateEvent(oldPod, curPod)
|
|
}
|
|
|
|
func (e *podEventHandler) Delete(ctx context.Context, evt event.DeleteEvent, q workqueue.RateLimitingInterface) {
|
|
pod, ok := evt.Object.(*v1.Pod)
|
|
if !ok {
|
|
klog.ErrorS(nil, "Skipped pod deletion event", "deleteStateUnknown", evt.DeleteStateUnknown, "obj", evt.Object)
|
|
return
|
|
}
|
|
clonesetutils.ResourceVersionExpectations.Delete(pod)
|
|
|
|
controllerRef := metav1.GetControllerOf(pod)
|
|
if controllerRef == nil {
|
|
// No controller should care about orphans being deleted.
|
|
return
|
|
}
|
|
req := resolveControllerRef(pod.Namespace, controllerRef)
|
|
if req == nil {
|
|
return
|
|
}
|
|
|
|
klog.V(4).InfoS("Pod deleted", "pod", klog.KObj(pod), "owner", req)
|
|
clonesetutils.ScaleExpectations.ObserveScale(req.String(), expectations.Delete, pod.Name)
|
|
q.Add(*req)
|
|
}
|
|
|
|
func (e *podEventHandler) Generic(ctx context.Context, evt event.GenericEvent, q workqueue.RateLimitingInterface) {
|
|
|
|
}
|
|
|
|
func resolveControllerRef(namespace string, controllerRef *metav1.OwnerReference) *reconcile.Request {
|
|
// Parse the Group out of the OwnerReference to compare it to what was parsed out of the requested OwnerType
|
|
refGV, err := schema.ParseGroupVersion(controllerRef.APIVersion)
|
|
if err != nil {
|
|
klog.ErrorS(err, "Could not parse APIVersion in OwnerReference", "ownerRef", controllerRef)
|
|
return nil
|
|
}
|
|
|
|
// Compare the OwnerReference Group and Kind against the OwnerType Group and Kind specified by the user.
|
|
// If the two match, create a Request for the objected referred to by
|
|
// the OwnerReference. Use the Name from the OwnerReference and the Namespace from the
|
|
// object in the event.
|
|
if controllerRef.Kind == clonesetutils.ControllerKind.Kind && refGV.Group == clonesetutils.ControllerKind.Group {
|
|
// Match found - add a Request for the object referred to in the OwnerReference
|
|
req := reconcile.Request{NamespacedName: types.NamespacedName{
|
|
Namespace: namespace,
|
|
Name: controllerRef.Name,
|
|
}}
|
|
return &req
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (e *podEventHandler) getPodCloneSets(pod *v1.Pod) []appsv1alpha1.CloneSet {
|
|
csList := appsv1alpha1.CloneSetList{}
|
|
if err := e.List(context.TODO(), &csList, client.InNamespace(pod.Namespace)); err != nil {
|
|
return nil
|
|
}
|
|
|
|
var csMatched []appsv1alpha1.CloneSet
|
|
for _, cs := range csList.Items {
|
|
selector, err := metav1.LabelSelectorAsSelector(cs.Spec.Selector)
|
|
if err != nil || selector.Empty() || !selector.Matches(labels.Set(pod.Labels)) {
|
|
continue
|
|
}
|
|
|
|
csMatched = append(csMatched, cs)
|
|
}
|
|
|
|
if len(csMatched) > 1 {
|
|
// ControllerRef will ensure we don't do anything crazy, but more than one
|
|
// item in this list nevertheless constitutes user error.
|
|
klog.InfoS("Error! More than one CloneSet is selecting pod", "pod", klog.KObj(pod), "cloneSets", e.joinCloneSetNames(csMatched))
|
|
}
|
|
return csMatched
|
|
}
|
|
|
|
func (e *podEventHandler) joinCloneSetNames(csList []appsv1alpha1.CloneSet) string {
|
|
var names []string
|
|
for _, cs := range csList {
|
|
names = append(names, cs.Name)
|
|
}
|
|
return strings.Join(names, ",")
|
|
}
|
|
|
|
type pvcEventHandler struct {
|
|
}
|
|
|
|
var _ handler.EventHandler = &pvcEventHandler{}
|
|
|
|
func (e *pvcEventHandler) Create(ctx context.Context, evt event.CreateEvent, q workqueue.RateLimitingInterface) {
|
|
pvc := evt.Object.(*v1.PersistentVolumeClaim)
|
|
if pvc.DeletionTimestamp != nil {
|
|
e.Delete(ctx, event.DeleteEvent{Object: evt.Object}, q)
|
|
return
|
|
}
|
|
|
|
if controllerRef := metav1.GetControllerOf(pvc); controllerRef != nil {
|
|
if req := resolveControllerRef(pvc.Namespace, controllerRef); req != nil {
|
|
isSatisfied, _, _ := clonesetutils.ScaleExpectations.SatisfiedExpectations(req.String())
|
|
clonesetutils.ScaleExpectations.ObserveScale(req.String(), expectations.Create, pvc.Name)
|
|
if isSatisfied {
|
|
// If the scale expectation is satisfied, it should be an existing Pod and the Informer
|
|
// cache should have just synced.
|
|
q.AddAfter(*req, initialingRateLimiter.When(req))
|
|
} else {
|
|
// Otherwise, add it immediately and reset the rate limiter
|
|
initialingRateLimiter.Forget(req)
|
|
q.Add(*req)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (e *pvcEventHandler) Update(ctx context.Context, evt event.UpdateEvent, q workqueue.RateLimitingInterface) {
|
|
pvc := evt.ObjectNew.(*v1.PersistentVolumeClaim)
|
|
if pvc.DeletionTimestamp != nil {
|
|
e.Delete(ctx, event.DeleteEvent{Object: evt.ObjectNew}, q)
|
|
}
|
|
}
|
|
|
|
func (e *pvcEventHandler) Delete(ctx context.Context, evt event.DeleteEvent, q workqueue.RateLimitingInterface) {
|
|
pvc, ok := evt.Object.(*v1.PersistentVolumeClaim)
|
|
if !ok {
|
|
klog.ErrorS(nil, "Skipped pvc deletion event", "deleteStateUnknown", evt.DeleteStateUnknown, "obj", evt.Object)
|
|
return
|
|
}
|
|
|
|
if controllerRef := metav1.GetControllerOf(pvc); controllerRef != nil {
|
|
if req := resolveControllerRef(pvc.Namespace, controllerRef); req != nil {
|
|
clonesetutils.ScaleExpectations.ObserveScale(req.String(), expectations.Delete, pvc.Name)
|
|
q.Add(*req)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (e *pvcEventHandler) Generic(ctx context.Context, evt event.GenericEvent, q workqueue.RateLimitingInterface) {
|
|
|
|
}
|