/* Copyright 2017 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package sparkapplication import ( "fmt" "os" "os/exec" "strings" "testing" "time" "github.com/prometheus/client_golang/prometheus" prometheus_model "github.com/prometheus/client_model/go" "github.com/stretchr/testify/assert" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/informers" kubeclientfake "k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io/v1alpha1" crdclientfake "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/client/clientset/versioned/fake" crdinformers "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/client/informers/externalversions" "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/config" "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/util" ) func newFakeController(app *v1alpha1.SparkApplication, pods ...*apiv1.Pod) (*Controller, *record.FakeRecorder) { crdclientfake.AddToScheme(scheme.Scheme) crdClient := crdclientfake.NewSimpleClientset() kubeClient := kubeclientfake.NewSimpleClientset() informerFactory := crdinformers.NewSharedInformerFactory(crdClient, 0*time.Second) recorder := record.NewFakeRecorder(3) kubeClient.CoreV1().Nodes().Create(&apiv1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "node1", }, Status: apiv1.NodeStatus{ Addresses: []apiv1.NodeAddress{ { Type: apiv1.NodeExternalIP, Address: "12.34.56.78", }, }, }, }) podInformerFactory := informers.NewSharedInformerFactory(kubeClient, 0*time.Second) controller := newSparkApplicationController(crdClient, kubeClient, informerFactory, podInformerFactory, recorder, &util.MetricConfig{}, "") informer := informerFactory.Sparkoperator().V1alpha1().SparkApplications().Informer() if app != nil { informer.GetIndexer().Add(app) } podInformer := podInformerFactory.Core().V1().Pods().Informer() for _, pod := range pods { if pod != nil { podInformer.GetIndexer().Add(pod) } } return controller, recorder } func TestOnAdd(t *testing.T) { ctrl, _ := newFakeController(nil) app := &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Status: v1alpha1.SparkApplicationStatus{}, } ctrl.onAdd(app) item, _ := ctrl.queue.Get() defer ctrl.queue.Done(item) key, ok := item.(string) assert.True(t, ok) expectedKey, _ := cache.MetaNamespaceKeyFunc(app) assert.Equal(t, expectedKey, key) ctrl.queue.Forget(item) } func TestOnUpdate(t *testing.T) { ctrl, recorder := newFakeController(nil) appTemplate := &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", ResourceVersion: "1", }, Spec: v1alpha1.SparkApplicationSpec{ Mode: v1alpha1.ClusterMode, Image: stringptr("foo-image:v1"), Executor: v1alpha1.ExecutorSpec{ Instances: int32ptr(1), }, }, } // Case1: Same Spec. copyWithSameSpec := appTemplate.DeepCopy() copyWithSameSpec.Status.ExecutionAttempts = 3 copyWithSameSpec.ResourceVersion = "2" ctrl.onUpdate(appTemplate, copyWithSameSpec) // Verify that the SparkApplication was enqueued but no spec update events fired. item, _ := ctrl.queue.Get() key, ok := item.(string) assert.True(t, ok) expectedKey, _ := cache.MetaNamespaceKeyFunc(appTemplate) assert.Equal(t, expectedKey, key) ctrl.queue.Forget(item) ctrl.queue.Done(item) assert.Equal(t, 0, len(recorder.Events)) // Case2: Spec update failed. copyWithSpecUpdate := appTemplate.DeepCopy() copyWithSpecUpdate.Spec.Image = stringptr("foo-image:v2") copyWithSpecUpdate.ResourceVersion = "2" ctrl.onUpdate(appTemplate, copyWithSpecUpdate) // Verify that ppdate failed due to non-existance of SparkApplication. assert.Equal(t, 1, len(recorder.Events)) event := <-recorder.Events assert.True(t, strings.Contains(event, "SparkApplicationSpecUpdateFailed")) // Case3: Spec update successful. ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(appTemplate.Namespace).Create(appTemplate) ctrl.onUpdate(appTemplate, copyWithSpecUpdate) // Verify App was enqueued. item, _ = ctrl.queue.Get() key, ok = item.(string) assert.True(t, ok) expectedKey, _ = cache.MetaNamespaceKeyFunc(appTemplate) assert.Equal(t, expectedKey, key) ctrl.queue.Forget(item) ctrl.queue.Done(item) // Verify that update was succeeded. assert.Equal(t, 1, len(recorder.Events)) event = <-recorder.Events assert.True(t, strings.Contains(event, "SparkApplicationSpecUpdateProcessed")) // Verify the SparkApplication state was updated to InvalidatingState. app, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(appTemplate.Namespace).Get(appTemplate.Name, metav1.GetOptions{}) assert.Nil(t, err) assert.Equal(t, v1alpha1.InvalidatingState, app.Status.AppState.State) } func TestOnDelete(t *testing.T) { ctrl, recorder := newFakeController(nil) app := &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Status: v1alpha1.SparkApplicationStatus{}, } ctrl.onAdd(app) ctrl.queue.Get() ctrl.onDelete(app) ctrl.queue.ShutDown() item, _ := ctrl.queue.Get() defer ctrl.queue.Done(item) assert.True(t, item == nil) event := <-recorder.Events assert.True(t, strings.Contains(event, "SparkApplicationDeleted")) ctrl.queue.Forget(item) } func TestHelperProcessFailure(t *testing.T) { if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" { return } os.Exit(2) } func TestHelperProcessSuccess(t *testing.T) { if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" { return } os.Exit(0) } func fetchCounterValue(m *prometheus.CounterVec, labels map[string]string) float64 { pb := &prometheus_model.Metric{} m.With(labels).Write(pb) return pb.GetCounter().GetValue() } type metrics struct { submitMetricCount float64 runningMetricCount float64 successMetricCount float64 failedMetricCount float64 } type executorMetrics struct { runningMetricCount float64 successMetricCount float64 failedMetricCount float64 } func TestSyncSparkApplication_SubmissionFailed(t *testing.T) { os.Setenv(kubernetesServiceHostEnvVar, "localhost") os.Setenv(kubernetesServicePortEnvVar, "443") restartPolicyOnFailure := v1alpha1.RestartPolicy{ Type: v1alpha1.OnFailure, OnFailureRetries: int32ptr(1), OnFailureRetryInterval: int64ptr(100), OnSubmissionFailureRetryInterval: int64ptr(100), OnSubmissionFailureRetries: int32ptr(1), } app := &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyOnFailure, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.NewState, ErrorMessage: "", }, }, } ctrl, recorder := newFakeController(app) _, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Create(app) if err != nil { t.Fatal(err) } execCommand = func(command string, args ...string) *exec.Cmd { cs := []string{"-test.run=TestHelperProcessFailure", "--", command} cs = append(cs, args...) cmd := exec.Command(os.Args[0], cs...) cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1"} return cmd } // Attempt 1 err = ctrl.syncSparkApplication("default/foo") updatedApp, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Get(app.Name, metav1.GetOptions{}) assert.Equal(t, v1alpha1.FailedSubmissionState, updatedApp.Status.AppState.State) assert.Equal(t, int32(1), updatedApp.Status.SubmissionAttempts) assert.Equal(t, float64(0), fetchCounterValue(ctrl.metrics.sparkAppSubmitCount, map[string]string{})) assert.Equal(t, float64(1), fetchCounterValue(ctrl.metrics.sparkAppFailureCount, map[string]string{})) event := <-recorder.Events assert.True(t, strings.Contains(event, "SparkApplicationAdded")) event = <-recorder.Events assert.True(t, strings.Contains(event, "SparkApplicationSubmissionFailed")) // Attempt 2: Retry again updatedApp.Status.LastSubmissionAttemptTime = metav1.Time{Time: metav1.Now().Add(-100 * time.Second)} ctrl, recorder = newFakeController(updatedApp) _, err = ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Create(updatedApp) if err != nil { t.Fatal(err) } err = ctrl.syncSparkApplication("default/foo") // Verify App Failed again. updatedApp, err = ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Get(app.Name, metav1.GetOptions{}) assert.Nil(t, err) assert.Equal(t, v1alpha1.FailedSubmissionState, updatedApp.Status.AppState.State) assert.Equal(t, int32(2), updatedApp.Status.SubmissionAttempts) assert.Equal(t, float64(0), fetchCounterValue(ctrl.metrics.sparkAppSubmitCount, map[string]string{})) event = <-recorder.Events assert.True(t, strings.Contains(event, "SparkApplicationSubmissionFailed")) // Attempt 3: No more retries updatedApp.Status.LastSubmissionAttemptTime = metav1.Time{Time: metav1.Now().Add(-100 * time.Second)} ctrl, recorder = newFakeController(updatedApp) _, err = ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Create(updatedApp) if err != nil { t.Fatal(err) } err = ctrl.syncSparkApplication("default/foo") // Verify App Failed again. updatedApp, err = ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Get(app.Name, metav1.GetOptions{}) assert.Nil(t, err) assert.Equal(t, v1alpha1.FailedState, updatedApp.Status.AppState.State) // No more submission attempts made. assert.Equal(t, int32(2), updatedApp.Status.SubmissionAttempts) } func TestShouldRetry(t *testing.T) { type testcase struct { app *v1alpha1.SparkApplication shouldRetry bool } testFn := func(test testcase, t *testing.T) { shouldRetry := shouldRetry(test.app) assert.Equal(t, test.shouldRetry, shouldRetry) } restartPolicyAlways := v1alpha1.RestartPolicy{ Type: v1alpha1.Always, OnSubmissionFailureRetryInterval: int64ptr(100), OnFailureRetryInterval: int64ptr(100), } restartPolicyNever := v1alpha1.RestartPolicy{ Type: v1alpha1.Never, } restartPolicyOnFailure := v1alpha1.RestartPolicy{ Type: v1alpha1.OnFailure, OnFailureRetries: int32ptr(1), OnFailureRetryInterval: int64ptr(100), OnSubmissionFailureRetryInterval: int64ptr(100), OnSubmissionFailureRetries: int32ptr(2), } testcases := []testcase{ { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }}, shouldRetry: false, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyAlways, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.SucceedingState, }, }, }, shouldRetry: true, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyOnFailure, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.SucceedingState, }, }, }, shouldRetry: false, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyOnFailure, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailingState, }, }, }, shouldRetry: true, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyNever, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailingState, }, }, }, shouldRetry: false, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyNever, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailedSubmissionState, }, }, }, shouldRetry: false, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyOnFailure, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailedSubmissionState, }, }, }, shouldRetry: true, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyAlways, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.PendingRerunState, }, }, }, shouldRetry: false, }, } for _, test := range testcases { testFn(test, t) } } func TestSyncSparkApp_SubmissionSuccess(t *testing.T) { type testcase struct { app *v1alpha1.SparkApplication expectedState v1alpha1.ApplicationStateType } os.Setenv(kubernetesServiceHostEnvVar, "localhost") os.Setenv(kubernetesServicePortEnvVar, "443") testFn := func(test testcase, t *testing.T) { ctrl, _ := newFakeController(test.app) _, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(test.app.Namespace).Create(test.app) if err != nil { t.Fatal(err) } execCommand = func(command string, args ...string) *exec.Cmd { cs := []string{"-test.run=TestHelperProcessSuccess", "--", command} cs = append(cs, args...) cmd := exec.Command(os.Args[0], cs...) cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1"} return cmd } err = ctrl.syncSparkApplication(fmt.Sprintf("%s/%s", test.app.Namespace, test.app.Name)) assert.Nil(t, err) updatedApp, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(test.app.Namespace).Get(test.app.Name, metav1.GetOptions{}) assert.Nil(t, err) assert.Equal(t, test.expectedState, updatedApp.Status.AppState.State) if test.expectedState == v1alpha1.SubmittedState { assert.Equal(t, float64(1), fetchCounterValue(ctrl.metrics.sparkAppSubmitCount, map[string]string{})) } } restartPolicyAlways := v1alpha1.RestartPolicy{ Type: v1alpha1.Always, OnSubmissionFailureRetryInterval: int64ptr(100), OnFailureRetryInterval: int64ptr(100), } restartPolicyNever := v1alpha1.RestartPolicy{ Type: v1alpha1.Never, } restartPolicyOnFailure := v1alpha1.RestartPolicy{ Type: v1alpha1.OnFailure, OnFailureRetries: int32ptr(1), OnFailureRetryInterval: int64ptr(100), OnSubmissionFailureRetryInterval: int64ptr(100), OnSubmissionFailureRetries: int32ptr(2), } testcases := []testcase{ { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }}, expectedState: v1alpha1.SubmittedState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyAlways, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.SucceedingState, }, }, }, expectedState: v1alpha1.PendingRerunState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyAlways, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.PendingRerunState, }, }, }, expectedState: v1alpha1.SubmittedState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyAlways, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailedSubmissionState, }, LastSubmissionAttemptTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)}, }, }, expectedState: v1alpha1.FailedSubmissionState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyAlways, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailedSubmissionState, }, SubmissionAttempts: 1, LastSubmissionAttemptTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)}, }, }, expectedState: v1alpha1.SubmittedState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyAlways, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailingState, }, ExecutionAttempts: 1, CompletionTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)}, }, }, expectedState: v1alpha1.PendingRerunState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyAlways, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailingState, }, CompletionTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)}, }, }, expectedState: v1alpha1.FailingState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyNever, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.InvalidatingState, }, CompletionTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)}, }, }, expectedState: v1alpha1.PendingRerunState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyNever, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.SucceedingState, }, }, }, expectedState: v1alpha1.CompletedState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyNever, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.NewState, }, }, }, expectedState: v1alpha1.SubmittedState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailingState, }, ExecutionAttempts: 2, }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyOnFailure, }, }, expectedState: v1alpha1.FailedState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailingState, }, ExecutionAttempts: 1, CompletionTime: metav1.Now(), }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyOnFailure, }, }, expectedState: v1alpha1.FailingState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailingState, }, ExecutionAttempts: 1, CompletionTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)}, }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyOnFailure, }, }, expectedState: v1alpha1.PendingRerunState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailedSubmissionState, }, SubmissionAttempts: 3, }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyOnFailure, }, }, expectedState: v1alpha1.FailedState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailedSubmissionState, }, SubmissionAttempts: 1, LastSubmissionAttemptTime: metav1.Now(), }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyOnFailure, }, }, expectedState: v1alpha1.FailedSubmissionState, }, { app: &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "default", }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.FailedSubmissionState, }, SubmissionAttempts: 1, LastSubmissionAttemptTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)}, }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: restartPolicyOnFailure, }, }, expectedState: v1alpha1.SubmittedState, }, } for _, test := range testcases { testFn(test, t) } } func TestSyncSparkApplication_ExecutingState(t *testing.T) { type testcase struct { appName string oldAppStatus v1alpha1.ApplicationStateType oldExecutorStatus map[string]v1alpha1.ExecutorState driverPod *apiv1.Pod executorPod *apiv1.Pod expectedAppState v1alpha1.ApplicationStateType expectedExecutorState map[string]v1alpha1.ExecutorState expectedAppMetrics metrics expectedExecutorMetrics executorMetrics } os.Setenv(kubernetesServiceHostEnvVar, "localhost") os.Setenv(kubernetesServicePortEnvVar, "443") app := &v1alpha1.SparkApplication{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", Namespace: "test", }, Spec: v1alpha1.SparkApplicationSpec{ RestartPolicy: v1alpha1.RestartPolicy{ Type: v1alpha1.Never, }, }, Status: v1alpha1.SparkApplicationStatus{ AppState: v1alpha1.ApplicationState{ State: v1alpha1.SubmittedState, ErrorMessage: "", }, ExecutorState: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorRunningState}, }, } testcases := []testcase{ { appName: "foo-1", oldAppStatus: v1alpha1.SubmittedState, oldExecutorStatus: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorRunningState}, expectedAppState: v1alpha1.SubmittedState, expectedExecutorState: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorFailedState}, expectedAppMetrics: metrics{}, expectedExecutorMetrics: executorMetrics{ failedMetricCount: 1, }, }, { appName: "foo-2", oldAppStatus: v1alpha1.SubmittedState, oldExecutorStatus: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorRunningState}, driverPod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "foo-driver", Namespace: "test", Labels: map[string]string{ config.SparkRoleLabel: sparkDriverRole, config.SparkAppNameLabel: "foo-2", }, ResourceVersion: "1", }, Status: apiv1.PodStatus{ Phase: apiv1.PodRunning, }, }, executorPod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "exec-1", Namespace: "test", Labels: map[string]string{ config.SparkRoleLabel: sparkExecutorRole, config.SparkAppNameLabel: "foo-2", }, ResourceVersion: "1", }, Status: apiv1.PodStatus{ Phase: apiv1.PodSucceeded, }, }, expectedAppState: v1alpha1.RunningState, expectedExecutorState: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorCompletedState}, expectedAppMetrics: metrics{ runningMetricCount: 1, }, expectedExecutorMetrics: executorMetrics{ successMetricCount: 1, }, }, { appName: "foo-3", oldAppStatus: v1alpha1.RunningState, oldExecutorStatus: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorCompletedState}, driverPod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "foo-driver", Namespace: "test", Labels: map[string]string{ config.SparkRoleLabel: sparkDriverRole, config.SparkAppNameLabel: "foo-3", }, ResourceVersion: "1", }, Status: apiv1.PodStatus{ Phase: apiv1.PodFailed, }, }, executorPod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "exec-1", Namespace: "test", Labels: map[string]string{ config.SparkRoleLabel: sparkExecutorRole, config.SparkAppNameLabel: "foo-3", }, ResourceVersion: "1", }, Status: apiv1.PodStatus{ Phase: apiv1.PodSucceeded, }, }, expectedAppState: v1alpha1.FailingState, expectedExecutorState: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorCompletedState}, expectedAppMetrics: metrics{ failedMetricCount: 1, }, expectedExecutorMetrics: executorMetrics{}, }, { appName: "foo-3", oldAppStatus: v1alpha1.RunningState, oldExecutorStatus: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorCompletedState}, driverPod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "foo-driver", Namespace: "test", Labels: map[string]string{ config.SparkRoleLabel: sparkDriverRole, config.SparkAppNameLabel: "foo-3", }, ResourceVersion: "1", }, Status: apiv1.PodStatus{ Phase: apiv1.PodSucceeded, }, }, executorPod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "exec-1", Namespace: "test", Labels: map[string]string{ config.SparkRoleLabel: sparkExecutorRole, config.SparkAppNameLabel: "foo-3", }, ResourceVersion: "1", }, Status: apiv1.PodStatus{ Phase: apiv1.PodSucceeded, }, }, expectedAppState: v1alpha1.SucceedingState, expectedExecutorState: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorCompletedState}, expectedAppMetrics: metrics{ successMetricCount: 1, }, expectedExecutorMetrics: executorMetrics{}, }, } testFn := func(test testcase, t *testing.T) { app.Status.AppState.State = test.oldAppStatus app.Status.ExecutorState = test.oldExecutorStatus app.Name = test.appName app.Status.ExecutionAttempts = 1 ctrl, _ := newFakeController(app, test.driverPod, test.executorPod) _, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Create(app) if err != nil { t.Fatal(err) } if test.driverPod != nil { ctrl.kubeClient.CoreV1().Pods(app.GetNamespace()).Create(test.driverPod) } if test.executorPod != nil { ctrl.kubeClient.CoreV1().Pods(app.GetNamespace()).Create(test.executorPod) } err = ctrl.syncSparkApplication(fmt.Sprintf("%s/%s", app.GetNamespace(), app.GetName())) assert.Nil(t, err) updatedApp, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Get(app.Name, metav1.GetOptions{}) // Verify App/Executor Statuses assert.Equal(t, test.expectedAppState, updatedApp.Status.AppState.State) assert.Equal(t, test.expectedExecutorState, updatedApp.Status.ExecutorState) // Verify App Metrics assert.Equal(t, test.expectedAppMetrics.runningMetricCount, ctrl.metrics.sparkAppRunningCount.Value(map[string]string{})) assert.Equal(t, test.expectedAppMetrics.successMetricCount, fetchCounterValue(ctrl.metrics.sparkAppSuccessCount, map[string]string{})) assert.Equal(t, test.expectedAppMetrics.submitMetricCount, fetchCounterValue(ctrl.metrics.sparkAppSubmitCount, map[string]string{})) assert.Equal(t, test.expectedAppMetrics.failedMetricCount, fetchCounterValue(ctrl.metrics.sparkAppFailureCount, map[string]string{})) // Verify Executor Metrics assert.Equal(t, test.expectedExecutorMetrics.runningMetricCount, ctrl.metrics.sparkAppExecutorRunningCount.Value(map[string]string{})) assert.Equal(t, test.expectedExecutorMetrics.successMetricCount, fetchCounterValue(ctrl.metrics.sparkAppExecutorSuccessCount, map[string]string{})) assert.Equal(t, test.expectedExecutorMetrics.failedMetricCount, fetchCounterValue(ctrl.metrics.sparkAppExecutorFailureCount, map[string]string{})) } for _, test := range testcases { testFn(test, t) } } func TestHasRetryIntervalPassed(t *testing.T) { // Failure Cases assert.False(t, hasRetryIntervalPassed(nil, 3, metav1.Time{Time: metav1.Now().Add(-100 * time.Second)})) assert.False(t, hasRetryIntervalPassed(int64ptr(5), 0, metav1.Time{Time: metav1.Now().Add(-100 * time.Second)})) assert.False(t, hasRetryIntervalPassed(int64ptr(5), 3, metav1.Time{})) // Not enough time passed. assert.False(t, hasRetryIntervalPassed(int64ptr(50), 3, metav1.Time{Time: metav1.Now().Add(-100 * time.Second)})) assert.True(t, hasRetryIntervalPassed(int64ptr(50), 3, metav1.Time{Time: metav1.Now().Add(-151 * time.Second)})) } func stringptr(s string) *string { return &s } func int32ptr(n int32) *int32 { return &n } func int64ptr(n int64) *int64 { return &n }