diff --git a/pkg/apis/kubeflow/v1/default.go b/pkg/apis/kubeflow/v1/default.go index 885412d..50ad382 100644 --- a/pkg/apis/kubeflow/v1/default.go +++ b/pkg/apis/kubeflow/v1/default.go @@ -44,12 +44,6 @@ func setDefaultsTypeWorker(spec *common.ReplicaSpec) { } func SetDefaults_MPIJob(mpiJob *MPIJob) { - // set default BackoffLimit - if mpiJob.Spec.BackoffLimit == nil { - mpiJob.Spec.BackoffLimit = new(int32) - *mpiJob.Spec.BackoffLimit = 6 - } - // Set default cleanpod policy to None. if mpiJob.Spec.CleanPodPolicy == nil { none := common.CleanPodPolicyNone diff --git a/pkg/apis/kubeflow/v1/openapi_generated.go b/pkg/apis/kubeflow/v1/openapi_generated.go index 44734b4..fa96b84 100644 --- a/pkg/apis/kubeflow/v1/openapi_generated.go +++ b/pkg/apis/kubeflow/v1/openapi_generated.go @@ -348,20 +348,6 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA Format: "int32", }, }, - "backoffLimit": { - SchemaProps: spec.SchemaProps{ - Description: "Specifies the number of retries before marking this job failed. Defaults to 6.", - Type: []string{"integer"}, - Format: "int32", - }, - }, - "activeDeadlineSeconds": { - SchemaProps: spec.SchemaProps{ - Description: "Specifies the duration in seconds relative to the start time that the job may be active before the system tries to terminate it. Note that this takes precedence over `BackoffLimit` field.", - Type: []string{"integer"}, - Format: "int64", - }, - }, "cleanPodPolicy": { SchemaProps: spec.SchemaProps{ Description: "CleanPodPolicy defines the policy that whether to kill pods after the job completes. Defaults to None.", @@ -391,7 +377,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA }, "runPolicy": { SchemaProps: spec.SchemaProps{ - Description: "`RunPolicy` encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active. The policies specified in `RunPolicy` take precedence over the following fields: `BackoffLimit` and `ActiveDeadlineSeconds`.", + Description: "`RunPolicy` encapsulates various runtime policies of the distributed training job, for example how to clean up resources and how long the job can stay active.", Ref: ref("github.com/kubeflow/common/pkg/apis/common/v1.RunPolicy"), }, }, diff --git a/pkg/apis/kubeflow/v1/types.go b/pkg/apis/kubeflow/v1/types.go index 5d70a66..54565e9 100644 --- a/pkg/apis/kubeflow/v1/types.go +++ b/pkg/apis/kubeflow/v1/types.go @@ -44,17 +44,6 @@ type MPIJobSpec struct { // +optional SlotsPerWorker *int32 `json:"slotsPerWorker,omitempty"` - // Specifies the number of retries before marking this job failed. - // Defaults to 6. - // +optional - BackoffLimit *int32 `json:"backoffLimit,omitempty"` - - // Specifies the duration in seconds relative to the start time that - // the job may be active before the system tries to terminate it. - // Note that this takes precedence over `BackoffLimit` field. - // +optional - ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty"` - // CleanPodPolicy defines the policy that whether to kill pods after the job completes. // Defaults to None. CleanPodPolicy *common.CleanPodPolicy `json:"cleanPodPolicy,omitempty"` @@ -69,8 +58,7 @@ type MPIJobSpec struct { // `RunPolicy` encapsulates various runtime policies of the distributed training // job, for example how to clean up resources and how long the job can stay - // active. The policies specified in `RunPolicy` take precedence over - // the following fields: `BackoffLimit` and `ActiveDeadlineSeconds`. + // active. RunPolicy *common.RunPolicy `json:"runPolicy,omitempty"` } diff --git a/pkg/apis/kubeflow/v1/zz_generated.deepcopy.go b/pkg/apis/kubeflow/v1/zz_generated.deepcopy.go index 5c754e0..b6da314 100644 --- a/pkg/apis/kubeflow/v1/zz_generated.deepcopy.go +++ b/pkg/apis/kubeflow/v1/zz_generated.deepcopy.go @@ -92,16 +92,6 @@ func (in *MPIJobSpec) DeepCopyInto(out *MPIJobSpec) { *out = new(int32) **out = **in } - if in.BackoffLimit != nil { - in, out := &in.BackoffLimit, &out.BackoffLimit - *out = new(int32) - **out = **in - } - if in.ActiveDeadlineSeconds != nil { - in, out := &in.ActiveDeadlineSeconds, &out.ActiveDeadlineSeconds - *out = new(int64) - **out = **in - } if in.CleanPodPolicy != nil { in, out := &in.CleanPodPolicy, &out.CleanPodPolicy *out = new(commonv1.CleanPodPolicy) diff --git a/pkg/controllers/v1/mpi_job_controller.go b/pkg/controllers/v1/mpi_job_controller.go index 2b7cd1c..df51c8b 100644 --- a/pkg/controllers/v1/mpi_job_controller.go +++ b/pkg/controllers/v1/mpi_job_controller.go @@ -1324,19 +1324,6 @@ func (c *MPIJobController) newLauncher(mpiJob *kubeflow.MPIJob, kubectlDeliveryI }, }, }) - backOffLimit := mpiJob.Spec.BackoffLimit - activeDeadlineSeconds := mpiJob.Spec.ActiveDeadlineSeconds - if mpiJob.Spec.RunPolicy != nil { - warnMsg := fmt.Sprintf( - "runPolicy is specified in MPIJobSpec so backOffLimit/activeDeadlineSeconds in MPIJobSpec will be overwritten") - glog.Warning(warnMsg) - if mpiJob.Spec.RunPolicy.BackoffLimit != nil { - backOffLimit = mpiJob.Spec.RunPolicy.BackoffLimit - } - if mpiJob.Spec.RunPolicy.ActiveDeadlineSeconds != nil { - activeDeadlineSeconds = mpiJob.Spec.RunPolicy.ActiveDeadlineSeconds - } - } return &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: launcherName, @@ -1347,8 +1334,8 @@ func (c *MPIJobController) newLauncher(mpiJob *kubeflow.MPIJob, kubectlDeliveryI }, }, Spec: batchv1.JobSpec{ - BackoffLimit: backOffLimit, - ActiveDeadlineSeconds: activeDeadlineSeconds, + BackoffLimit: mpiJob.Spec.RunPolicy.BackoffLimit, + ActiveDeadlineSeconds: mpiJob.Spec.RunPolicy.ActiveDeadlineSeconds, Template: *podSpec, }, }