mirror of https://github.com/openkruise/kruise.git
454 lines
17 KiB
Go
454 lines
17 KiB
Go
/*
|
|
Copyright 2023 The Kruise Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package imagelistpulljob
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"hash/fnv"
|
|
"reflect"
|
|
"time"
|
|
|
|
appsv1 "k8s.io/api/apps/v1"
|
|
corev1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/fields"
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
|
"k8s.io/apimachinery/pkg/util/rand"
|
|
"k8s.io/client-go/tools/record"
|
|
"k8s.io/client-go/util/retry"
|
|
"k8s.io/klog/v2"
|
|
hashutil "k8s.io/kubernetes/pkg/util/hash"
|
|
"k8s.io/kubernetes/pkg/util/slice"
|
|
"k8s.io/utils/clock"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/controller"
|
|
"sigs.k8s.io/controller-runtime/pkg/handler"
|
|
"sigs.k8s.io/controller-runtime/pkg/manager"
|
|
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
|
"sigs.k8s.io/controller-runtime/pkg/source"
|
|
|
|
appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1"
|
|
"github.com/openkruise/kruise/pkg/features"
|
|
"github.com/openkruise/kruise/pkg/util"
|
|
utilclient "github.com/openkruise/kruise/pkg/util/client"
|
|
utildiscovery "github.com/openkruise/kruise/pkg/util/discovery"
|
|
"github.com/openkruise/kruise/pkg/util/expectations"
|
|
utilfeature "github.com/openkruise/kruise/pkg/util/feature"
|
|
"github.com/openkruise/kruise/pkg/util/fieldindex"
|
|
)
|
|
|
|
var (
|
|
concurrentReconciles = 3
|
|
controllerKind = appsv1alpha1.SchemeGroupVersion.WithKind("ImageListPullJob")
|
|
slowStartInitialBatchSize = 1
|
|
controllerName = "imagelistpulljob-controller"
|
|
resourceVersionExpectations = expectations.NewResourceVersionExpectation()
|
|
scaleExpectations = expectations.NewScaleExpectations()
|
|
)
|
|
|
|
// Add creates a new ImageListPullJob Controller and adds it to the Manager with default RBAC. The Manager will set fields on the Controller
|
|
// and Start it when the Manager is Started.
|
|
func Add(mgr manager.Manager) error {
|
|
if !utildiscovery.DiscoverGVK(controllerKind) || !utilfeature.DefaultFeatureGate.Enabled(features.KruiseDaemon) ||
|
|
!utilfeature.DefaultFeatureGate.Enabled(features.ImagePullJobGate) {
|
|
return nil
|
|
}
|
|
return add(mgr, newReconciler(mgr))
|
|
}
|
|
|
|
// newReconciler returns a new reconcile.Reconciler
|
|
func newReconciler(mgr manager.Manager) *ReconcileImageListPullJob {
|
|
return &ReconcileImageListPullJob{
|
|
Client: utilclient.NewClientFromManager(mgr, controllerName),
|
|
scheme: mgr.GetScheme(),
|
|
clock: clock.RealClock{},
|
|
recorder: mgr.GetEventRecorderFor(controllerName),
|
|
}
|
|
}
|
|
|
|
// add a new Controller to mgr with r as the reconcile.Reconciler
|
|
func add(mgr manager.Manager, r *ReconcileImageListPullJob) error {
|
|
// Create a new controller
|
|
c, err := controller.New(controllerName, mgr, controller.Options{Reconciler: r,
|
|
MaxConcurrentReconciles: concurrentReconciles, CacheSyncTimeout: util.GetControllerCacheSyncTimeout()})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Watch for changes to ImageListPullJob
|
|
err = c.Watch(source.Kind(mgr.GetCache(), &appsv1alpha1.ImageListPullJob{}), &handler.EnqueueRequestForObject{})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Watch for changes to ImagePullJob
|
|
// todo the imagelistpulljob(status) will not change if the pull job status does not change significantly (ex. number of failed nodeimage changes from 1 to 2)
|
|
err = c.Watch(source.Kind(mgr.GetCache(), &appsv1alpha1.ImagePullJob{}), &imagePullJobEventHandler{
|
|
enqueueHandler: handler.EnqueueRequestForOwner(
|
|
mgr.GetScheme(), mgr.GetRESTMapper(), &appsv1alpha1.ImageListPullJob{}, handler.OnlyControllerOwner()),
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var _ reconcile.Reconciler = &ReconcileImageListPullJob{}
|
|
|
|
// ReconcileImageListPullJob reconciles a ImageListPullJob object
|
|
type ReconcileImageListPullJob struct {
|
|
client.Client
|
|
scheme *runtime.Scheme
|
|
clock clock.Clock
|
|
recorder record.EventRecorder
|
|
}
|
|
|
|
// +kubebuilder:rbac:groups=apps.kruise.io,resources=imagelistpulljobs,verbs=get;list;watch;create;update;patch;delete
|
|
// +kubebuilder:rbac:groups=apps.kruise.io,resources=imagelistpulljobs/status,verbs=get;update;patch
|
|
// +kubebuilder:rbac:groups=apps.kruise.io,resources=imagelistpulljobs/finalizers,verbs=update
|
|
|
|
// Reconcile reads that state of the cluster for a ImageListPullJob object and makes changes based on the state read
|
|
// and what is in the ImageListPullJob.Spec
|
|
// Automatically generate RBAC rules to allow the Controller to read and write ImageListPullJob
|
|
func (r *ReconcileImageListPullJob) Reconcile(_ context.Context, request reconcile.Request) (res reconcile.Result, err error) {
|
|
klog.V(5).InfoS("Starting to process ImageListPullJob", "imageListPullJob", request)
|
|
|
|
// 1.Fetch the ImageListPullJob instance
|
|
job := &appsv1alpha1.ImageListPullJob{}
|
|
err = r.Get(context.TODO(), request.NamespacedName, job)
|
|
if err != nil {
|
|
if errors.IsNotFound(err) {
|
|
// Object not found, return. Created objects are automatically garbage collected.
|
|
// For additional cleanup logic use finalizers.
|
|
return reconcile.Result{}, nil
|
|
}
|
|
return reconcile.Result{}, err
|
|
}
|
|
|
|
hash, err := r.refreshJobTemplateHash(job)
|
|
if err != nil {
|
|
return reconcile.Result{}, fmt.Errorf("refresh job template hash error: %v", err)
|
|
}
|
|
|
|
// The Job has been finished
|
|
if job.Status.CompletionTime != nil {
|
|
var leftTime time.Duration
|
|
if job.Spec.CompletionPolicy.TTLSecondsAfterFinished != nil {
|
|
leftTime = time.Duration(*job.Spec.CompletionPolicy.TTLSecondsAfterFinished)*time.Second - time.Since(job.Status.CompletionTime.Time)
|
|
if leftTime <= 0 {
|
|
klog.InfoS("Deleting ImageListPullJob for ttlSecondsAfterFinished", "imageListPullJob", klog.KObj(job))
|
|
if err = r.Delete(context.TODO(), job); err != nil {
|
|
return reconcile.Result{}, fmt.Errorf("delete ImageListPullJob error: %v", err)
|
|
}
|
|
return reconcile.Result{}, nil
|
|
}
|
|
}
|
|
return reconcile.Result{RequeueAfter: leftTime}, nil
|
|
}
|
|
|
|
if scaleSatisfied, unsatisfiedDuration, scaleDirtyImagePullJobs := scaleExpectations.SatisfiedExpectations(request.String()); !scaleSatisfied {
|
|
if unsatisfiedDuration >= expectations.ExpectationTimeout {
|
|
klog.InfoS("Expectation unsatisfied overtime for ImageListPullJob", "imageListPullJob", request, "scaleDirtyImagePullJobs", scaleDirtyImagePullJobs, "overtime", unsatisfiedDuration)
|
|
return reconcile.Result{}, nil
|
|
}
|
|
klog.V(4).InfoS("Not satisfied scale for ImageListPullJob", "imageListPullJob", request, "scaleDirtyImagePullJobs", scaleDirtyImagePullJobs)
|
|
return reconcile.Result{RequeueAfter: expectations.ExpectationTimeout - unsatisfiedDuration}, nil
|
|
}
|
|
|
|
// 2. Get ImagePullJob owned by this job
|
|
imagePullJobsMap, err := r.getOwnedImagePullJob(job)
|
|
if err != nil {
|
|
return reconcile.Result{}, fmt.Errorf("failed to get imagePullJob: %v", err)
|
|
}
|
|
|
|
// If resourceVersion expectations have not satisfied yet, just skip this reconcile
|
|
for _, imagePullJob := range imagePullJobsMap {
|
|
resourceVersionExpectations.Observe(imagePullJob)
|
|
if isSatisfied, unsatisfiedDuration := resourceVersionExpectations.IsSatisfied(imagePullJob); !isSatisfied {
|
|
if unsatisfiedDuration >= expectations.ExpectationTimeout {
|
|
klog.InfoS("Expectation unsatisfied overtime for ImageListPullJob", "imageListPullJob", request, "timeout", unsatisfiedDuration)
|
|
return reconcile.Result{}, nil
|
|
}
|
|
klog.V(4).InfoS("Not satisfied resourceVersion for ImageListPullJob", "imageListPullJob", request)
|
|
return reconcile.Result{RequeueAfter: expectations.ExpectationTimeout - unsatisfiedDuration}, nil
|
|
}
|
|
}
|
|
|
|
// 3. Calculate the new status for this job
|
|
newStatus := r.calculateStatus(job, imagePullJobsMap)
|
|
|
|
// 4. Compute ImagePullJobActions
|
|
needToCreate, needToDelete := r.computeImagePullJobActions(job, imagePullJobsMap, hash)
|
|
|
|
// 5. Sync ImagePullJob
|
|
err = r.syncImagePullJob(job, needToCreate, needToDelete)
|
|
if err != nil {
|
|
return reconcile.Result{}, err
|
|
}
|
|
|
|
// 6. Update status
|
|
if !util.IsJSONObjectEqual(&job.Status, newStatus) {
|
|
if err = r.updateStatus(job, newStatus); err != nil {
|
|
return reconcile.Result{}, fmt.Errorf("update ImageListPullJob status error: %v", err)
|
|
}
|
|
}
|
|
|
|
return reconcile.Result{}, nil
|
|
}
|
|
|
|
func (r *ReconcileImageListPullJob) refreshJobTemplateHash(job *appsv1alpha1.ImageListPullJob) (string, error) {
|
|
newHash := func(job *appsv1alpha1.ImageListPullJob) string {
|
|
jobTemplateHasher := fnv.New32a()
|
|
hashutil.DeepHashObject(jobTemplateHasher, job.Spec.ImagePullJobTemplate)
|
|
return rand.SafeEncodeString(fmt.Sprint(jobTemplateHasher.Sum32()))
|
|
}(job)
|
|
|
|
oldHash := job.Labels[appsv1.ControllerRevisionHashLabelKey]
|
|
if newHash == oldHash {
|
|
return newHash, nil
|
|
}
|
|
|
|
emptyJob := &appsv1alpha1.ImageListPullJob{}
|
|
emptyJob.SetName(job.Name)
|
|
emptyJob.SetNamespace(job.Namespace)
|
|
body := fmt.Sprintf(`{"metadata":{"labels":{"%s":"%s"}}}`, appsv1.ControllerRevisionHashLabelKey, newHash)
|
|
return newHash, r.Patch(context.TODO(), emptyJob, client.RawPatch(types.MergePatchType, []byte(body)))
|
|
}
|
|
|
|
func (r *ReconcileImageListPullJob) updateStatus(job *appsv1alpha1.ImageListPullJob, newStatus *appsv1alpha1.ImageListPullJobStatus) error {
|
|
return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
|
|
imageListPullJob := &appsv1alpha1.ImageListPullJob{}
|
|
if err := r.Get(context.TODO(), types.NamespacedName{Namespace: job.Namespace, Name: job.Name}, imageListPullJob); err != nil {
|
|
return err
|
|
}
|
|
imageListPullJob.Status = *newStatus
|
|
return r.Status().Update(context.TODO(), imageListPullJob)
|
|
})
|
|
}
|
|
|
|
func (r *ReconcileImageListPullJob) computeImagePullJobActions(job *appsv1alpha1.ImageListPullJob, imagePullJobs map[string]*appsv1alpha1.ImagePullJob, hash string) ([]*appsv1alpha1.ImagePullJob, []*appsv1alpha1.ImagePullJob) {
|
|
var needToDelete, needToCreate []*appsv1alpha1.ImagePullJob
|
|
//1. need to create
|
|
images, needToDelete := r.filterImagesAndImagePullJobs(job, imagePullJobs, hash)
|
|
needToCreate = r.newImagePullJobs(job, images, hash)
|
|
// some images delete from ImageListPullJob.Spec.Images
|
|
for image, imagePullJob := range imagePullJobs {
|
|
if !slice.ContainsString(job.Spec.Images, image, nil) {
|
|
needToDelete = append(needToDelete, imagePullJob)
|
|
}
|
|
}
|
|
return needToCreate, needToDelete
|
|
}
|
|
|
|
func (r *ReconcileImageListPullJob) calculateStatus(job *appsv1alpha1.ImageListPullJob, imagePullJobs map[string]*appsv1alpha1.ImagePullJob) *appsv1alpha1.ImageListPullJobStatus {
|
|
var active, completed, succeeded int32
|
|
// record the failed image status
|
|
var failedImageStatuses []*appsv1alpha1.FailedImageStatus
|
|
|
|
for _, imagePullJob := range imagePullJobs {
|
|
if imagePullJob.Status.StartTime == nil {
|
|
continue
|
|
}
|
|
|
|
if imagePullJob.Status.Active > 0 {
|
|
active = active + 1
|
|
}
|
|
|
|
if imagePullJob.Status.Failed > 0 {
|
|
failedImagePullJobStatus := &appsv1alpha1.FailedImageStatus{
|
|
ImagePullJob: imagePullJob.Name,
|
|
Name: imagePullJob.Spec.Image,
|
|
Message: fmt.Sprintf("Please check for details which nodes failed by 'kubectl get ImagePullJob %s'.", imagePullJob.Name),
|
|
}
|
|
failedImageStatuses = append(failedImageStatuses, failedImagePullJobStatus)
|
|
}
|
|
|
|
if imagePullJob.Status.Desired == (imagePullJob.Status.Failed + imagePullJob.Status.Succeeded) {
|
|
completed = completed + 1
|
|
}
|
|
|
|
if imagePullJob.Status.Desired == imagePullJob.Status.Succeeded {
|
|
succeeded = succeeded + 1
|
|
}
|
|
}
|
|
|
|
// 4. status
|
|
newStatus := &appsv1alpha1.ImageListPullJobStatus{
|
|
Desired: int32(len(job.Spec.Images)),
|
|
Active: active,
|
|
Completed: completed,
|
|
Succeeded: succeeded,
|
|
StartTime: job.Status.StartTime,
|
|
FailedImageStatuses: failedImageStatuses,
|
|
}
|
|
|
|
now := metav1.NewTime(r.clock.Now())
|
|
if newStatus.StartTime == nil {
|
|
newStatus.StartTime = &now
|
|
}
|
|
|
|
if job.Spec.CompletionPolicy.Type != appsv1alpha1.Never && newStatus.Desired == newStatus.Completed {
|
|
newStatus.CompletionTime = &now
|
|
}
|
|
return newStatus
|
|
}
|
|
|
|
func (r *ReconcileImageListPullJob) syncImagePullJob(job *appsv1alpha1.ImageListPullJob, needToCreate, needToDelete []*appsv1alpha1.ImagePullJob) error {
|
|
|
|
//1. manage creating
|
|
var errs []error
|
|
if len(needToCreate) > 0 {
|
|
var createdNum int
|
|
var createdErr error
|
|
|
|
createdNum, createdErr = util.SlowStartBatch(len(needToCreate), slowStartInitialBatchSize, func(idx int) error {
|
|
imagePullJob := needToCreate[idx]
|
|
key := types.NamespacedName{Namespace: job.Namespace, Name: job.Name}.String()
|
|
scaleExpectations.ExpectScale(key, expectations.Create, imagePullJob.Spec.Image)
|
|
err := r.Create(context.TODO(), imagePullJob)
|
|
if err != nil {
|
|
scaleExpectations.ObserveScale(key, expectations.Create, imagePullJob.Spec.Image)
|
|
}
|
|
return err
|
|
})
|
|
|
|
if createdErr == nil {
|
|
r.recorder.Eventf(job, corev1.EventTypeNormal, "Successful create ImagePullJob", "Create %d ImagePullJob", createdNum)
|
|
} else {
|
|
errs = append(errs, createdErr)
|
|
}
|
|
}
|
|
|
|
//2. manage deleting
|
|
if len(needToDelete) > 0 {
|
|
var deleteErrs []error
|
|
for _, imagePullJob := range needToDelete {
|
|
key := types.NamespacedName{Namespace: job.Namespace, Name: job.Name}.String()
|
|
scaleExpectations.ExpectScale(key, expectations.Delete, imagePullJob.Spec.Image)
|
|
if err := r.Delete(context.TODO(), imagePullJob); err != nil {
|
|
scaleExpectations.ObserveScale(key, expectations.Delete, imagePullJob.Spec.Image)
|
|
deleteErrs = append(deleteErrs, fmt.Errorf("fail to delete ImagePullJob (%s/%s) for : %s", imagePullJob.Namespace, imagePullJob.Name, err))
|
|
}
|
|
}
|
|
|
|
if len(deleteErrs) > 0 {
|
|
errs = append(errs, deleteErrs...)
|
|
} else {
|
|
r.recorder.Eventf(job, corev1.EventTypeNormal, "Successful delete ImagePullJob", "Delete %d ImagePullJob", len(needToDelete))
|
|
}
|
|
}
|
|
|
|
return utilerrors.NewAggregate(errs)
|
|
}
|
|
|
|
func (r *ReconcileImageListPullJob) filterImagesAndImagePullJobs(job *appsv1alpha1.ImageListPullJob, imagePullJobs map[string]*appsv1alpha1.ImagePullJob, hash string) ([]string, []*appsv1alpha1.ImagePullJob) {
|
|
var images, imagesInCurrentImagePullJob []string
|
|
var needToDelete []*appsv1alpha1.ImagePullJob
|
|
|
|
for image := range imagePullJobs {
|
|
imagesInCurrentImagePullJob = append(imagesInCurrentImagePullJob, image)
|
|
}
|
|
|
|
for _, image := range job.Spec.Images {
|
|
|
|
// should create imagePullJob for new image
|
|
if len(imagePullJobs) <= 0 || !slice.ContainsString(imagesInCurrentImagePullJob, image, nil) {
|
|
images = append(images, image)
|
|
continue
|
|
}
|
|
imagePullJob, ok := imagePullJobs[image]
|
|
if !ok {
|
|
klog.InfoS("Could not found imagePullJob for image name", "imageName", image)
|
|
continue
|
|
}
|
|
// should create new imagePullJob if the template is changed.
|
|
if !isConsistentVersion(imagePullJob, &job.Spec.ImagePullJobTemplate, hash) {
|
|
images = append(images, image)
|
|
// should delete old imagepulljob
|
|
needToDelete = append(needToDelete, imagePullJob)
|
|
}
|
|
}
|
|
|
|
return images, needToDelete
|
|
}
|
|
|
|
func (r *ReconcileImageListPullJob) newImagePullJobs(job *appsv1alpha1.ImageListPullJob, images []string, hash string) []*appsv1alpha1.ImagePullJob {
|
|
var needToCreate []*appsv1alpha1.ImagePullJob
|
|
if len(images) <= 0 {
|
|
return needToCreate
|
|
}
|
|
for _, image := range images {
|
|
imagePullJob := &appsv1alpha1.ImagePullJob{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Namespace: job.Namespace,
|
|
GenerateName: fmt.Sprintf("%s-", job.Name),
|
|
Labels: map[string]string{
|
|
appsv1.ControllerRevisionHashLabelKey: hash,
|
|
},
|
|
Annotations: make(map[string]string),
|
|
OwnerReferences: []metav1.OwnerReference{
|
|
*metav1.NewControllerRef(job, controllerKind),
|
|
},
|
|
},
|
|
Spec: appsv1alpha1.ImagePullJobSpec{
|
|
Image: image,
|
|
ImagePullJobTemplate: job.Spec.ImagePullJobTemplate,
|
|
},
|
|
}
|
|
needToCreate = append(needToCreate, imagePullJob)
|
|
}
|
|
|
|
return needToCreate
|
|
}
|
|
|
|
func (r *ReconcileImageListPullJob) getOwnedImagePullJob(job *appsv1alpha1.ImageListPullJob) (map[string]*appsv1alpha1.ImagePullJob, error) {
|
|
|
|
opts := &client.ListOptions{
|
|
Namespace: job.Namespace,
|
|
FieldSelector: fields.SelectorFromSet(fields.Set{fieldindex.IndexNameForOwnerRefUID: string(job.UID)}),
|
|
}
|
|
imagePullJobList := &appsv1alpha1.ImagePullJobList{}
|
|
err := r.List(context.TODO(), imagePullJobList, opts, utilclient.DisableDeepCopy)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
imagePullJobsMap := make(map[string]*appsv1alpha1.ImagePullJob)
|
|
for i := range imagePullJobList.Items {
|
|
imagePullJob := imagePullJobList.Items[i]
|
|
if imagePullJob.DeletionTimestamp.IsZero() {
|
|
imagePullJobsMap[imagePullJob.Spec.Image] = &imagePullJob
|
|
}
|
|
}
|
|
return imagePullJobsMap, nil
|
|
}
|
|
|
|
func isConsistentVersion(oldImagePullJob *appsv1alpha1.ImagePullJob, newTemplate *appsv1alpha1.ImagePullJobTemplate, hash string) bool {
|
|
oldHash, exists := oldImagePullJob.Labels[appsv1.ControllerRevisionHashLabelKey]
|
|
if oldHash == hash {
|
|
return true
|
|
}
|
|
if !exists && reflect.DeepEqual(oldImagePullJob.Spec.ImagePullJobTemplate, *newTemplate) {
|
|
return true
|
|
}
|
|
|
|
klog.V(4).InfoS("ImagePullJob specification changed", "imagePullJob", klog.KObj(oldImagePullJob))
|
|
return false
|
|
}
|