spark-operator/pkg/controller/sparkapplication/controller_test.go

1057 lines
31 KiB
Go

/*
Copyright 2017 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sparkapplication
import (
"fmt"
"os"
"os/exec"
"strings"
"testing"
"time"
"github.com/prometheus/client_golang/prometheus"
prometheus_model "github.com/prometheus/client_model/go"
"github.com/stretchr/testify/assert"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/informers"
kubeclientfake "k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io/v1alpha1"
crdclientfake "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/client/clientset/versioned/fake"
crdinformers "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/client/informers/externalversions"
"github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/config"
"github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/util"
)
func newFakeController(app *v1alpha1.SparkApplication, pods ...*apiv1.Pod) (*Controller, *record.FakeRecorder) {
crdclientfake.AddToScheme(scheme.Scheme)
crdClient := crdclientfake.NewSimpleClientset()
kubeClient := kubeclientfake.NewSimpleClientset()
informerFactory := crdinformers.NewSharedInformerFactory(crdClient, 0*time.Second)
recorder := record.NewFakeRecorder(3)
kubeClient.CoreV1().Nodes().Create(&apiv1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
},
Status: apiv1.NodeStatus{
Addresses: []apiv1.NodeAddress{
{
Type: apiv1.NodeExternalIP,
Address: "12.34.56.78",
},
},
},
})
podInformerFactory := informers.NewSharedInformerFactory(kubeClient, 0*time.Second)
controller := newSparkApplicationController(crdClient, kubeClient, informerFactory, podInformerFactory, recorder,
&util.MetricConfig{}, "")
informer := informerFactory.Sparkoperator().V1alpha1().SparkApplications().Informer()
if app != nil {
informer.GetIndexer().Add(app)
}
podInformer := podInformerFactory.Core().V1().Pods().Informer()
for _, pod := range pods {
if pod != nil {
podInformer.GetIndexer().Add(pod)
}
}
return controller, recorder
}
func TestOnAdd(t *testing.T) {
ctrl, _ := newFakeController(nil)
app := &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Status: v1alpha1.SparkApplicationStatus{},
}
ctrl.onAdd(app)
item, _ := ctrl.queue.Get()
defer ctrl.queue.Done(item)
key, ok := item.(string)
assert.True(t, ok)
expectedKey, _ := cache.MetaNamespaceKeyFunc(app)
assert.Equal(t, expectedKey, key)
ctrl.queue.Forget(item)
}
func TestOnUpdate(t *testing.T) {
ctrl, recorder := newFakeController(nil)
appTemplate := &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
ResourceVersion: "1",
},
Spec: v1alpha1.SparkApplicationSpec{
Mode: v1alpha1.ClusterMode,
Image: stringptr("foo-image:v1"),
Executor: v1alpha1.ExecutorSpec{
Instances: int32ptr(1),
},
},
}
// Case1: Same Spec.
copyWithSameSpec := appTemplate.DeepCopy()
copyWithSameSpec.Status.ExecutionAttempts = 3
copyWithSameSpec.ResourceVersion = "2"
ctrl.onUpdate(appTemplate, copyWithSameSpec)
// Verify that the SparkApplication was enqueued but no spec update events fired.
item, _ := ctrl.queue.Get()
key, ok := item.(string)
assert.True(t, ok)
expectedKey, _ := cache.MetaNamespaceKeyFunc(appTemplate)
assert.Equal(t, expectedKey, key)
ctrl.queue.Forget(item)
ctrl.queue.Done(item)
assert.Equal(t, 0, len(recorder.Events))
// Case2: Spec update failed.
copyWithSpecUpdate := appTemplate.DeepCopy()
copyWithSpecUpdate.Spec.Image = stringptr("foo-image:v2")
copyWithSpecUpdate.ResourceVersion = "2"
ctrl.onUpdate(appTemplate, copyWithSpecUpdate)
// Verify that ppdate failed due to non-existance of SparkApplication.
assert.Equal(t, 1, len(recorder.Events))
event := <-recorder.Events
assert.True(t, strings.Contains(event, "SparkApplicationSpecUpdateFailed"))
// Case3: Spec update successful.
ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(appTemplate.Namespace).Create(appTemplate)
ctrl.onUpdate(appTemplate, copyWithSpecUpdate)
// Verify App was enqueued.
item, _ = ctrl.queue.Get()
key, ok = item.(string)
assert.True(t, ok)
expectedKey, _ = cache.MetaNamespaceKeyFunc(appTemplate)
assert.Equal(t, expectedKey, key)
ctrl.queue.Forget(item)
ctrl.queue.Done(item)
// Verify that update was succeeded.
assert.Equal(t, 1, len(recorder.Events))
event = <-recorder.Events
assert.True(t, strings.Contains(event, "SparkApplicationSpecUpdateProcessed"))
// Verify the SparkApplication state was updated to InvalidatingState.
app, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(appTemplate.Namespace).Get(appTemplate.Name, metav1.GetOptions{})
assert.Nil(t, err)
assert.Equal(t, v1alpha1.InvalidatingState, app.Status.AppState.State)
}
func TestOnDelete(t *testing.T) {
ctrl, recorder := newFakeController(nil)
app := &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Status: v1alpha1.SparkApplicationStatus{},
}
ctrl.onAdd(app)
ctrl.queue.Get()
ctrl.onDelete(app)
ctrl.queue.ShutDown()
item, _ := ctrl.queue.Get()
defer ctrl.queue.Done(item)
assert.True(t, item == nil)
event := <-recorder.Events
assert.True(t, strings.Contains(event, "SparkApplicationDeleted"))
ctrl.queue.Forget(item)
}
func TestHelperProcessFailure(t *testing.T) {
if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
return
}
os.Exit(2)
}
func TestHelperProcessSuccess(t *testing.T) {
if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
return
}
os.Exit(0)
}
func fetchCounterValue(m *prometheus.CounterVec, labels map[string]string) float64 {
pb := &prometheus_model.Metric{}
m.With(labels).Write(pb)
return pb.GetCounter().GetValue()
}
type metrics struct {
submitMetricCount float64
runningMetricCount float64
successMetricCount float64
failedMetricCount float64
}
type executorMetrics struct {
runningMetricCount float64
successMetricCount float64
failedMetricCount float64
}
func TestSyncSparkApplication_SubmissionFailed(t *testing.T) {
os.Setenv(kubernetesServiceHostEnvVar, "localhost")
os.Setenv(kubernetesServicePortEnvVar, "443")
restartPolicyOnFailure := v1alpha1.RestartPolicy{
Type: v1alpha1.OnFailure,
OnFailureRetries: int32ptr(1),
OnFailureRetryInterval: int64ptr(100),
OnSubmissionFailureRetryInterval: int64ptr(100),
OnSubmissionFailureRetries: int32ptr(1),
}
app := &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyOnFailure,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.NewState,
ErrorMessage: "",
},
},
}
ctrl, recorder := newFakeController(app)
_, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Create(app)
if err != nil {
t.Fatal(err)
}
execCommand = func(command string, args ...string) *exec.Cmd {
cs := []string{"-test.run=TestHelperProcessFailure", "--", command}
cs = append(cs, args...)
cmd := exec.Command(os.Args[0], cs...)
cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1"}
return cmd
}
// Attempt 1
err = ctrl.syncSparkApplication("default/foo")
updatedApp, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Get(app.Name, metav1.GetOptions{})
assert.Equal(t, v1alpha1.FailedSubmissionState, updatedApp.Status.AppState.State)
assert.Equal(t, int32(1), updatedApp.Status.SubmissionAttempts)
assert.Equal(t, float64(0), fetchCounterValue(ctrl.metrics.sparkAppSubmitCount, map[string]string{}))
assert.Equal(t, float64(1), fetchCounterValue(ctrl.metrics.sparkAppFailureCount, map[string]string{}))
event := <-recorder.Events
assert.True(t, strings.Contains(event, "SparkApplicationAdded"))
event = <-recorder.Events
assert.True(t, strings.Contains(event, "SparkApplicationSubmissionFailed"))
// Attempt 2: Retry again
updatedApp.Status.LastSubmissionAttemptTime = metav1.Time{Time: metav1.Now().Add(-100 * time.Second)}
ctrl, recorder = newFakeController(updatedApp)
_, err = ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Create(updatedApp)
if err != nil {
t.Fatal(err)
}
err = ctrl.syncSparkApplication("default/foo")
// Verify App Failed again.
updatedApp, err = ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Get(app.Name, metav1.GetOptions{})
assert.Nil(t, err)
assert.Equal(t, v1alpha1.FailedSubmissionState, updatedApp.Status.AppState.State)
assert.Equal(t, int32(2), updatedApp.Status.SubmissionAttempts)
assert.Equal(t, float64(0), fetchCounterValue(ctrl.metrics.sparkAppSubmitCount, map[string]string{}))
event = <-recorder.Events
assert.True(t, strings.Contains(event, "SparkApplicationSubmissionFailed"))
// Attempt 3: No more retries
updatedApp.Status.LastSubmissionAttemptTime = metav1.Time{Time: metav1.Now().Add(-100 * time.Second)}
ctrl, recorder = newFakeController(updatedApp)
_, err = ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Create(updatedApp)
if err != nil {
t.Fatal(err)
}
err = ctrl.syncSparkApplication("default/foo")
// Verify App Failed again.
updatedApp, err = ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Get(app.Name, metav1.GetOptions{})
assert.Nil(t, err)
assert.Equal(t, v1alpha1.FailedState, updatedApp.Status.AppState.State)
// No more submission attempts made.
assert.Equal(t, int32(2), updatedApp.Status.SubmissionAttempts)
}
func TestShouldRetry(t *testing.T) {
type testcase struct {
app *v1alpha1.SparkApplication
shouldRetry bool
}
testFn := func(test testcase, t *testing.T) {
shouldRetry := shouldRetry(test.app)
assert.Equal(t, test.shouldRetry, shouldRetry)
}
restartPolicyAlways := v1alpha1.RestartPolicy{
Type: v1alpha1.Always,
OnSubmissionFailureRetryInterval: int64ptr(100),
OnFailureRetryInterval: int64ptr(100),
}
restartPolicyNever := v1alpha1.RestartPolicy{
Type: v1alpha1.Never,
}
restartPolicyOnFailure := v1alpha1.RestartPolicy{
Type: v1alpha1.OnFailure,
OnFailureRetries: int32ptr(1),
OnFailureRetryInterval: int64ptr(100),
OnSubmissionFailureRetryInterval: int64ptr(100),
OnSubmissionFailureRetries: int32ptr(2),
}
testcases := []testcase{
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
}},
shouldRetry: false,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyAlways,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.SucceedingState,
},
},
},
shouldRetry: true,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyOnFailure,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.SucceedingState,
},
},
},
shouldRetry: false,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyOnFailure,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailingState,
},
},
},
shouldRetry: true,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyNever,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailingState,
},
},
},
shouldRetry: false,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyNever,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailedSubmissionState,
},
},
},
shouldRetry: false,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyOnFailure,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailedSubmissionState,
},
},
},
shouldRetry: true,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyAlways,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.PendingRerunState,
},
},
},
shouldRetry: false,
},
}
for _, test := range testcases {
testFn(test, t)
}
}
func TestSyncSparkApp_SubmissionSuccess(t *testing.T) {
type testcase struct {
app *v1alpha1.SparkApplication
expectedState v1alpha1.ApplicationStateType
}
os.Setenv(kubernetesServiceHostEnvVar, "localhost")
os.Setenv(kubernetesServicePortEnvVar, "443")
testFn := func(test testcase, t *testing.T) {
ctrl, _ := newFakeController(test.app)
_, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(test.app.Namespace).Create(test.app)
if err != nil {
t.Fatal(err)
}
execCommand = func(command string, args ...string) *exec.Cmd {
cs := []string{"-test.run=TestHelperProcessSuccess", "--", command}
cs = append(cs, args...)
cmd := exec.Command(os.Args[0], cs...)
cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1"}
return cmd
}
err = ctrl.syncSparkApplication(fmt.Sprintf("%s/%s", test.app.Namespace, test.app.Name))
assert.Nil(t, err)
updatedApp, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(test.app.Namespace).Get(test.app.Name, metav1.GetOptions{})
assert.Nil(t, err)
assert.Equal(t, test.expectedState, updatedApp.Status.AppState.State)
if test.expectedState == v1alpha1.SubmittedState {
assert.Equal(t, float64(1), fetchCounterValue(ctrl.metrics.sparkAppSubmitCount, map[string]string{}))
}
}
restartPolicyAlways := v1alpha1.RestartPolicy{
Type: v1alpha1.Always,
OnSubmissionFailureRetryInterval: int64ptr(100),
OnFailureRetryInterval: int64ptr(100),
}
restartPolicyNever := v1alpha1.RestartPolicy{
Type: v1alpha1.Never,
}
restartPolicyOnFailure := v1alpha1.RestartPolicy{
Type: v1alpha1.OnFailure,
OnFailureRetries: int32ptr(1),
OnFailureRetryInterval: int64ptr(100),
OnSubmissionFailureRetryInterval: int64ptr(100),
OnSubmissionFailureRetries: int32ptr(2),
}
testcases := []testcase{
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
}},
expectedState: v1alpha1.SubmittedState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyAlways,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.SucceedingState,
},
},
},
expectedState: v1alpha1.PendingRerunState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyAlways,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.PendingRerunState,
},
},
},
expectedState: v1alpha1.SubmittedState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyAlways,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailedSubmissionState,
},
LastSubmissionAttemptTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)},
},
},
expectedState: v1alpha1.FailedSubmissionState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyAlways,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailedSubmissionState,
},
SubmissionAttempts: 1,
LastSubmissionAttemptTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)},
},
},
expectedState: v1alpha1.SubmittedState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyAlways,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailingState,
},
ExecutionAttempts: 1,
CompletionTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)},
},
},
expectedState: v1alpha1.PendingRerunState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyAlways,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailingState,
},
CompletionTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)},
},
},
expectedState: v1alpha1.FailingState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyNever,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.InvalidatingState,
},
CompletionTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)},
},
},
expectedState: v1alpha1.PendingRerunState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyNever,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.SucceedingState,
},
},
},
expectedState: v1alpha1.CompletedState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyNever,
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.NewState,
},
},
},
expectedState: v1alpha1.SubmittedState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailingState,
},
ExecutionAttempts: 2,
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyOnFailure,
},
},
expectedState: v1alpha1.FailedState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailingState,
},
ExecutionAttempts: 1,
CompletionTime: metav1.Now(),
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyOnFailure,
},
},
expectedState: v1alpha1.FailingState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailingState,
},
ExecutionAttempts: 1,
CompletionTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)},
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyOnFailure,
},
},
expectedState: v1alpha1.PendingRerunState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailedSubmissionState,
},
SubmissionAttempts: 3,
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyOnFailure,
},
},
expectedState: v1alpha1.FailedState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailedSubmissionState,
},
SubmissionAttempts: 1,
LastSubmissionAttemptTime: metav1.Now(),
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyOnFailure,
},
},
expectedState: v1alpha1.FailedSubmissionState,
},
{
app: &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.FailedSubmissionState,
},
SubmissionAttempts: 1,
LastSubmissionAttemptTime: metav1.Time{Time: metav1.Now().Add(-2000 * time.Second)},
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: restartPolicyOnFailure,
},
},
expectedState: v1alpha1.SubmittedState,
},
}
for _, test := range testcases {
testFn(test, t)
}
}
func TestSyncSparkApplication_ExecutingState(t *testing.T) {
type testcase struct {
appName string
oldAppStatus v1alpha1.ApplicationStateType
oldExecutorStatus map[string]v1alpha1.ExecutorState
driverPod *apiv1.Pod
executorPod *apiv1.Pod
expectedAppState v1alpha1.ApplicationStateType
expectedExecutorState map[string]v1alpha1.ExecutorState
expectedAppMetrics metrics
expectedExecutorMetrics executorMetrics
}
os.Setenv(kubernetesServiceHostEnvVar, "localhost")
os.Setenv(kubernetesServicePortEnvVar, "443")
app := &v1alpha1.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "test",
},
Spec: v1alpha1.SparkApplicationSpec{
RestartPolicy: v1alpha1.RestartPolicy{
Type: v1alpha1.Never,
},
},
Status: v1alpha1.SparkApplicationStatus{
AppState: v1alpha1.ApplicationState{
State: v1alpha1.SubmittedState,
ErrorMessage: "",
},
ExecutorState: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorRunningState},
},
}
testcases := []testcase{
{
appName: "foo-1",
oldAppStatus: v1alpha1.SubmittedState,
oldExecutorStatus: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorRunningState},
expectedAppState: v1alpha1.SubmittedState,
expectedExecutorState: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorFailedState},
expectedAppMetrics: metrics{},
expectedExecutorMetrics: executorMetrics{
failedMetricCount: 1,
},
},
{
appName: "foo-2",
oldAppStatus: v1alpha1.SubmittedState,
oldExecutorStatus: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorRunningState},
driverPod: &apiv1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "foo-driver",
Namespace: "test",
Labels: map[string]string{
config.SparkRoleLabel: sparkDriverRole,
config.SparkAppNameLabel: "foo-2",
},
ResourceVersion: "1",
},
Status: apiv1.PodStatus{
Phase: apiv1.PodRunning,
},
},
executorPod: &apiv1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "exec-1",
Namespace: "test",
Labels: map[string]string{
config.SparkRoleLabel: sparkExecutorRole,
config.SparkAppNameLabel: "foo-2",
},
ResourceVersion: "1",
},
Status: apiv1.PodStatus{
Phase: apiv1.PodSucceeded,
},
},
expectedAppState: v1alpha1.RunningState,
expectedExecutorState: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorCompletedState},
expectedAppMetrics: metrics{
runningMetricCount: 1,
},
expectedExecutorMetrics: executorMetrics{
successMetricCount: 1,
},
},
{
appName: "foo-3",
oldAppStatus: v1alpha1.RunningState,
oldExecutorStatus: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorCompletedState},
driverPod: &apiv1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "foo-driver",
Namespace: "test",
Labels: map[string]string{
config.SparkRoleLabel: sparkDriverRole,
config.SparkAppNameLabel: "foo-3",
},
ResourceVersion: "1",
},
Status: apiv1.PodStatus{
Phase: apiv1.PodFailed,
},
},
executorPod: &apiv1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "exec-1",
Namespace: "test",
Labels: map[string]string{
config.SparkRoleLabel: sparkExecutorRole,
config.SparkAppNameLabel: "foo-3",
},
ResourceVersion: "1",
},
Status: apiv1.PodStatus{
Phase: apiv1.PodSucceeded,
},
},
expectedAppState: v1alpha1.FailingState,
expectedExecutorState: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorCompletedState},
expectedAppMetrics: metrics{
failedMetricCount: 1,
},
expectedExecutorMetrics: executorMetrics{},
},
{
appName: "foo-3",
oldAppStatus: v1alpha1.RunningState,
oldExecutorStatus: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorCompletedState},
driverPod: &apiv1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "foo-driver",
Namespace: "test",
Labels: map[string]string{
config.SparkRoleLabel: sparkDriverRole,
config.SparkAppNameLabel: "foo-3",
},
ResourceVersion: "1",
},
Status: apiv1.PodStatus{
Phase: apiv1.PodSucceeded,
},
},
executorPod: &apiv1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "exec-1",
Namespace: "test",
Labels: map[string]string{
config.SparkRoleLabel: sparkExecutorRole,
config.SparkAppNameLabel: "foo-3",
},
ResourceVersion: "1",
},
Status: apiv1.PodStatus{
Phase: apiv1.PodSucceeded,
},
},
expectedAppState: v1alpha1.SucceedingState,
expectedExecutorState: map[string]v1alpha1.ExecutorState{"exec-1": v1alpha1.ExecutorCompletedState},
expectedAppMetrics: metrics{
successMetricCount: 1,
},
expectedExecutorMetrics: executorMetrics{},
},
}
testFn := func(test testcase, t *testing.T) {
app.Status.AppState.State = test.oldAppStatus
app.Status.ExecutorState = test.oldExecutorStatus
app.Name = test.appName
app.Status.ExecutionAttempts = 1
ctrl, _ := newFakeController(app, test.driverPod, test.executorPod)
_, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Create(app)
if err != nil {
t.Fatal(err)
}
if test.driverPod != nil {
ctrl.kubeClient.CoreV1().Pods(app.GetNamespace()).Create(test.driverPod)
}
if test.executorPod != nil {
ctrl.kubeClient.CoreV1().Pods(app.GetNamespace()).Create(test.executorPod)
}
err = ctrl.syncSparkApplication(fmt.Sprintf("%s/%s", app.GetNamespace(), app.GetName()))
assert.Nil(t, err)
updatedApp, err := ctrl.crdClient.SparkoperatorV1alpha1().SparkApplications(app.Namespace).Get(app.Name, metav1.GetOptions{})
// Verify App/Executor Statuses
assert.Equal(t, test.expectedAppState, updatedApp.Status.AppState.State)
assert.Equal(t, test.expectedExecutorState, updatedApp.Status.ExecutorState)
// Verify App Metrics
assert.Equal(t, test.expectedAppMetrics.runningMetricCount, ctrl.metrics.sparkAppRunningCount.Value(map[string]string{}))
assert.Equal(t, test.expectedAppMetrics.successMetricCount, fetchCounterValue(ctrl.metrics.sparkAppSuccessCount, map[string]string{}))
assert.Equal(t, test.expectedAppMetrics.submitMetricCount, fetchCounterValue(ctrl.metrics.sparkAppSubmitCount, map[string]string{}))
assert.Equal(t, test.expectedAppMetrics.failedMetricCount, fetchCounterValue(ctrl.metrics.sparkAppFailureCount, map[string]string{}))
// Verify Executor Metrics
assert.Equal(t, test.expectedExecutorMetrics.runningMetricCount, ctrl.metrics.sparkAppExecutorRunningCount.Value(map[string]string{}))
assert.Equal(t, test.expectedExecutorMetrics.successMetricCount, fetchCounterValue(ctrl.metrics.sparkAppExecutorSuccessCount, map[string]string{}))
assert.Equal(t, test.expectedExecutorMetrics.failedMetricCount, fetchCounterValue(ctrl.metrics.sparkAppExecutorFailureCount, map[string]string{}))
}
for _, test := range testcases {
testFn(test, t)
}
}
func TestHasRetryIntervalPassed(t *testing.T) {
// Failure Cases
assert.False(t, hasRetryIntervalPassed(nil, 3, metav1.Time{Time: metav1.Now().Add(-100 * time.Second)}))
assert.False(t, hasRetryIntervalPassed(int64ptr(5), 0, metav1.Time{Time: metav1.Now().Add(-100 * time.Second)}))
assert.False(t, hasRetryIntervalPassed(int64ptr(5), 3, metav1.Time{}))
// Not enough time passed.
assert.False(t, hasRetryIntervalPassed(int64ptr(50), 3, metav1.Time{Time: metav1.Now().Add(-100 * time.Second)}))
assert.True(t, hasRetryIntervalPassed(int64ptr(50), 3, metav1.Time{Time: metav1.Now().Add(-151 * time.Second)}))
}
func stringptr(s string) *string {
return &s
}
func int32ptr(n int32) *int32 {
return &n
}
func int64ptr(n int64) *int64 {
return &n
}