/* Copyright 2019 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package controllers import ( "context" "fmt" "time" "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/client-go/tools/record" "sigs.k8s.io/cluster-api/util/patch" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" operatorv1 "k8s.io/kubeadm/operator/api/v1alpha1" commandimpl "k8s.io/kubeadm/operator/commands" operatorerrors "k8s.io/kubeadm/operator/errors" ) // RuntimeTaskReconciler reconciles a RuntimeTask object type RuntimeTaskReconciler struct { client.Client NodeName string Operation string recorder record.EventRecorder Log logr.Logger } // +kubebuilder:rbac:groups=operator.kubeadm.x-k8s.io,resources=runtimetasks,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=operator.kubeadm.x-k8s.io,resources=runtimetasks/status,verbs=get;update;patch // SetupWithManager configures the controller for calling the reconciler func (r *RuntimeTaskReconciler) SetupWithManager(mgr ctrl.Manager) error { var mapFunc handler.ToRequestsFunc = func(o handler.MapObject) []reconcile.Request { return taskGroupToTaskRequests(r.Client, o) } err := ctrl.NewControllerManagedBy(mgr). For(&operatorv1.RuntimeTask{}). Watches( // force reconcile Task every time the parent TaskGroup changes &source.Kind{Type: &operatorv1.RuntimeTaskGroup{}}, &handler.EnqueueRequestsFromMapFunc{ToRequests: mapFunc}, ). Complete(r) r.recorder = mgr.GetEventRecorderFor("runtime-task-controller") return err } // Reconcile a runtimetask func (r *RuntimeTaskReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, rerr error) { ctx := context.Background() log := r.Log.WithValues("task", req.NamespacedName) // Fetch the Task instance task := &operatorv1.RuntimeTask{} if err := r.Client.Get(ctx, req.NamespacedName, task); err != nil { if apierrors.IsNotFound(err) { return ctrl.Result{}, nil } return ctrl.Result{}, err } // Ignore the Task if it doesn't target the node the controller is supervising if task.Spec.NodeName != r.NodeName { return ctrl.Result{}, nil } // Ignore the Task if it is already completed or failed if task.Status.CompletionTime != nil { return ctrl.Result{}, nil } // Fetch the parent TaskGroup instance taskgroup, err := getOwnerTaskGroup(ctx, r.Client, task.ObjectMeta) if err != nil { return ctrl.Result{}, err } // Fetch the parent Operation instance operation, err := getOwnerOperation(ctx, r.Client, taskgroup.ObjectMeta) if err != nil { return ctrl.Result{}, err } // If the controller is set to manage Task for a specific operation, ignore everything else if r.Operation != operation.Name { return ctrl.Result{}, nil } // Initialize the patch helper patchHelper, err := patch.NewHelper(task, r) if err != nil { return ctrl.Result{}, err } // Always attempt to Patch the Task object and status after each reconciliation. defer func() { if err := patchHelper.Patch(ctx, task); err != nil { log.Error(err, "failed to patch Task") if rerr == nil { rerr = err } } }() // Reconcile the Task if err := r.reconcileTask(operation, taskgroup, task, log); err != nil { return ctrl.Result{}, err } return ctrl.Result{}, nil } func (r *RuntimeTaskReconciler) reconcileTask(operation *operatorv1.Operation, taskgroup *operatorv1.RuntimeTaskGroup, task *operatorv1.RuntimeTask, log logr.Logger) (err error) { // gets relevant settings from top level objects executionMode := operation.Spec.GetTypedOperationExecutionMode() operationPaused := operation.Status.Paused // Reconcile recovery from errors recovered := r.reconcileRecovery(executionMode, task, log) // Reconcile paused override from top level objects r.reconcilePauseOverride(operationPaused, task) // Handle deleted Task if !task.DeletionTimestamp.IsZero() { err = r.reconcileDelete(task) if err != nil { return } } // Handle non-deleted/non-recovered Task // NB. in case of a task recovered from error, we are forcing another reconcile before actually // executing the next command so the user get evidence of what is happening if !recovered { err = r.reconcileNormal(executionMode, task, log) if err != nil { return } } // Always reconcile Task Phase at the end r.reconcilePhase(task) return } func (r *RuntimeTaskReconciler) reconcileRecovery(executionMode operatorv1.OperationExecutionMode, task *operatorv1.RuntimeTask, log logr.Logger) bool { // if there is no error, return if task.Status.ErrorReason == nil && task.Status.ErrorMessage == nil { return false } // if there is no error recovery strategy, return if task.Spec.GetTypedTaskRecoveryStrategy() == operatorv1.RuntimeTaskRecoveryUnknownStrategy { return false } switch task.Spec.GetTypedTaskRecoveryStrategy() { case operatorv1.RuntimeTaskRecoveryRetryingFailedCommandStrategy: log.WithValues("command", task.Status.CurrentCommand).Info("Retrying command after failure") r.recorder.Event(task, corev1.EventTypeNormal, "TaskErrorRetry", fmt.Sprintf("Retrying command %d after failure", task.Status.CurrentCommand)) case operatorv1.RuntimeTaskRecoverySkippingFailedCommandStrategy: log.WithValues("command", task.Status.CurrentCommand).Info("Skipping command after failure") r.recorder.Event(task, corev1.EventTypeNormal, "TaskErrorSkip", fmt.Sprintf("Skipping command %d after failure", task.Status.CurrentCommand)) // if all the commands are done, set the Task completion time if int(task.Status.CurrentCommand) >= len(task.Spec.Commands) { task.Status.SetCompletionTime() } else { // Move to the next command task.Status.NextCurrentCommand(task.Spec.Commands) if executionMode == operatorv1.OperationExecutionModeControlled { task.Status.Paused = true } } default: //TODO: error (if possible do validation before getting here) } // Reset the error task.Status.ResetError() // Reset the recovery mode so the user can choose again how to proceed at the next error task.Spec.RecoveryMode = "" return true } func (r *RuntimeTaskReconciler) reconcilePauseOverride(operationPaused bool, task *operatorv1.RuntimeTask) { // record paused override state change, if any pausedOverride := operationPaused recordPausedChange(r.recorder, task, task.Status.Paused, pausedOverride, "by top level objects") // update status with paused override setting from top level objects task.Status.Paused = pausedOverride } func (r *RuntimeTaskReconciler) reconcileNormal(executionMode operatorv1.OperationExecutionMode, task *operatorv1.RuntimeTask, log logr.Logger) error { // If the Task doesn't have finalizer, add it. //if !util.Contains(task.Finalizers, operatorv1.RuntimeTaskFinalizer) { // task.Finalizers = append(task.Finalizers, operatorv1.RuntimeTaskFinalizer) //} // if higher level object are paused, return if task.Status.Paused { return nil } // if nil, set the Task start time, initialize CurrentCommand and return // NB. we are returning here so the object get updated reporting start condition // before actual execution starts if task.Status.StartTime == nil { task.Status.SetStartTime() task.Status.NextCurrentCommand(task.Spec.Commands) return nil } // Proceed with the current command execution if executionMode == operatorv1.OperationExecutionModeDryRun { // if dry running wait for an arbitrary delay so the user will get a better perception of the Task execution order time.Sleep(3 * time.Second) } else { // else we should execute the CurrentCommand log.WithValues("command", task.Status.CurrentCommand).Info("running command") // transpose CurrentCommand (1 based) to index (0 based) and check index out of range index := int(task.Status.CurrentCommand) - 1 if index < 0 || index >= len(task.Spec.Commands) { task.Status.SetError( operatorerrors.NewRuntimeTaskIndexOutOfRangeError("command with index %d does not exists for task %s", index, task.Name), ) } // execute the command err := commandimpl.RunCommand(&task.Spec.Commands[index], log) // if the command returned an error, return if err != nil { log.WithValues("command", task.Status.CurrentCommand).WithValues("error", fmt.Sprintf("%+v", err)).Info("command failed") r.recorder.Event(task, corev1.EventTypeWarning, "CommandError", fmt.Sprintf("Command %d execution failed: %s", task.Status.CurrentCommand, fmt.Sprintf("%+v", err))) task.Status.SetError( operatorerrors.NewRuntimeTaskExecutionError("error executing command number %d for task %s: %+v", task.Status.CurrentCommand, task.Name, err), ) return nil } log.WithValues("command", task.Status.CurrentCommand).Info("command completed") r.recorder.Event(task, corev1.EventTypeNormal, "CommandCompleted", fmt.Sprintf("Command %d execution completed", task.Status.CurrentCommand)) } // if all the commands are done, set the Task completion time and return if int(task.Status.CurrentCommand) >= len(task.Spec.Commands) { task.Status.SetCompletionTime() return nil } // Otherwise, move to the next command task.Status.NextCurrentCommand(task.Spec.Commands) if executionMode == operatorv1.OperationExecutionModeControlled { task.Status.Paused = true } return nil } func (r *RuntimeTaskReconciler) reconcileDelete(task *operatorv1.RuntimeTask) error { // Task is deleted so remove the finalizer. //task.Finalizers = util.Filter(task.Finalizers, operatorv1.RuntimeTaskFinalizer) return nil } func (r *RuntimeTaskReconciler) reconcilePhase(task *operatorv1.RuntimeTask) { // Set the phase to "deleting" if the deletion timestamp is set. if !task.DeletionTimestamp.IsZero() { task.Status.SetTypedPhase(operatorv1.RuntimeTaskPhaseDeleted) return } // Set the phase to "failed" if any of Status.ErrorReason or Status.ErrorMessage is not-nil. if task.Status.ErrorReason != nil || task.Status.ErrorMessage != nil { task.Status.SetTypedPhase(operatorv1.RuntimeTaskPhaseFailed) return } // Set the phase to "succeeded" if completion date is set. if task.Status.CompletionTime != nil { task.Status.SetTypedPhase(operatorv1.RuntimeTaskPhaseSucceeded) return } // Set the phase to "paused" if paused is set. if task.Status.Paused { task.Status.SetTypedPhase(operatorv1.RuntimeTaskPhasePaused) return } // Set the phase to "running" if start date is set. if task.Status.StartTime != nil { task.Status.SetTypedPhase(operatorv1.RuntimeTaskPhaseRunning) return } // Set the phase to "pending" if nil. task.Status.SetTypedPhase(operatorv1.RuntimeTaskPhasePending) }