Add ShuffleTrackingEnabled to DynamicAllocation struct to allow disabling shuffle tracking (#2511)

* Add ShuffleTrackingEnabled *bool to DynamicAllocation struct to allow disabling shuffle tracking

Signed-off-by: jbhalodia-slack <jbhalodia@salesforce.com>

* Run make generate

Signed-off-by: jbhalodia-slack <jbhalodia@salesforce.com>

* make manifests

Signed-off-by: jbhalodia-slack <jbhalodia@salesforce.com>

* make update-crd && make build-api-docs

Signed-off-by: jbhalodia-slack <jbhalodia@salesforce.com>

* Update internal/controller/sparkapplication/submission.go

Co-authored-by: Yi Chen <github@chenyicn.net>
Signed-off-by: jbhalodia-slack <jbhalodia@salesforce.com>

* Go fmt

Signed-off-by: jbhalodia-slack <jbhalodia@salesforce.com>

* Refactor defaultExecutorSpec func

Signed-off-by: jbhalodia-slack <jbhalodia@salesforce.com>

* Refactor dynamicAllocationOption func

Signed-off-by: jbhalodia-slack <jbhalodia@salesforce.com>

* Add IsDynamicAllocationEnabled func

Signed-off-by: jbhalodia-slack <jbhalodia@salesforce.com>

---------

Signed-off-by: jbhalodia-slack <jbhalodia@salesforce.com>
Co-authored-by: Yi Chen <github@chenyicn.net>
This commit is contained in:
jbhalodia-slack 2025-05-14 01:49:22 -04:00 committed by GitHub
parent 0cb98f79cf
commit ca37f6b7b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 153 additions and 17 deletions

View File

@ -703,6 +703,12 @@ type DynamicAllocation struct {
// MaxExecutors is the upper bound for the number of executors if dynamic allocation is enabled.
// +optional
MaxExecutors *int32 `json:"maxExecutors,omitempty"`
// ShuffleTrackingEnabled enables shuffle file tracking for executors, which allows dynamic allocation without
// the need for an external shuffle service. This option will try to keep alive executors that are storing
// shuffle data for active jobs. If external shuffle service is enabled, set ShuffleTrackingEnabled to false.
// ShuffleTrackingEnabled is true by default if dynamicAllocation.enabled is true.
// +optional
ShuffleTrackingEnabled *bool `json:"shuffleTrackingEnabled,omitempty"`
// ShuffleTrackingTimeout controls the timeout in milliseconds for executors that are holding
// shuffle data if shuffle tracking is enabled (true by default if dynamic allocation is enabled).
// +optional

View File

@ -279,6 +279,11 @@ func (in *DynamicAllocation) DeepCopyInto(out *DynamicAllocation) {
*out = new(int32)
**out = **in
}
if in.ShuffleTrackingEnabled != nil {
in, out := &in.ShuffleTrackingEnabled, &out.ShuffleTrackingEnabled
*out = new(bool)
**out = **in
}
if in.ShuffleTrackingTimeout != nil {
in, out := &in.ShuffleTrackingTimeout, &out.ShuffleTrackingTimeout
*out = new(int64)

View File

@ -5296,6 +5296,13 @@ spec:
of executors if dynamic allocation is enabled.
format: int32
type: integer
shuffleTrackingEnabled:
description: |-
ShuffleTrackingEnabled enables shuffle file tracking for executors, which allows dynamic allocation without
the need for an external shuffle service. This option will try to keep alive executors that are storing
shuffle data for active jobs. If external shuffle service is enabled, set ShuffleTrackingEnabled to false.
ShuffleTrackingEnabled is true by default if dynamicAllocation.enabled is true.
type: boolean
shuffleTrackingTimeout:
description: |-
ShuffleTrackingTimeout controls the timeout in milliseconds for executors that are holding

View File

@ -5238,6 +5238,13 @@ spec:
executors if dynamic allocation is enabled.
format: int32
type: integer
shuffleTrackingEnabled:
description: |-
ShuffleTrackingEnabled enables shuffle file tracking for executors, which allows dynamic allocation without
the need for an external shuffle service. This option will try to keep alive executors that are storing
shuffle data for active jobs. If external shuffle service is enabled, set ShuffleTrackingEnabled to false.
ShuffleTrackingEnabled is true by default if dynamicAllocation.enabled is true.
type: boolean
shuffleTrackingTimeout:
description: |-
ShuffleTrackingTimeout controls the timeout in milliseconds for executors that are holding

View File

@ -5296,6 +5296,13 @@ spec:
of executors if dynamic allocation is enabled.
format: int32
type: integer
shuffleTrackingEnabled:
description: |-
ShuffleTrackingEnabled enables shuffle file tracking for executors, which allows dynamic allocation without
the need for an external shuffle service. This option will try to keep alive executors that are storing
shuffle data for active jobs. If external shuffle service is enabled, set ShuffleTrackingEnabled to false.
ShuffleTrackingEnabled is true by default if dynamicAllocation.enabled is true.
type: boolean
shuffleTrackingTimeout:
description: |-
ShuffleTrackingTimeout controls the timeout in milliseconds for executors that are holding

View File

@ -5238,6 +5238,13 @@ spec:
executors if dynamic allocation is enabled.
format: int32
type: integer
shuffleTrackingEnabled:
description: |-
ShuffleTrackingEnabled enables shuffle file tracking for executors, which allows dynamic allocation without
the need for an external shuffle service. This option will try to keep alive executors that are storing
shuffle data for active jobs. If external shuffle service is enabled, set ShuffleTrackingEnabled to false.
ShuffleTrackingEnabled is true by default if dynamicAllocation.enabled is true.
type: boolean
shuffleTrackingTimeout:
description: |-
ShuffleTrackingTimeout controls the timeout in milliseconds for executors that are holding

View File

@ -741,6 +741,21 @@ int32
</tr>
<tr>
<td>
<code>shuffleTrackingEnabled</code><br/>
<em>
bool
</em>
</td>
<td>
<em>(Optional)</em>
<p>ShuffleTrackingEnabled enables shuffle file tracking for executors, which allows dynamic allocation without
the need for an external shuffle service. This option will try to keep alive executors that are storing
shuffle data for active jobs. If external shuffle service is enabled, set ShuffleTrackingEnabled to false.
ShuffleTrackingEnabled is true by default if dynamicAllocation.enabled is true.</p>
</td>
</tr>
<tr>
<td>
<code>shuffleTrackingTimeout</code><br/>
<em>
int64

View File

@ -989,10 +989,6 @@ func dynamicAllocationOption(app *v1beta2.SparkApplication) ([]string, error) {
args = append(args, "--conf",
fmt.Sprintf("%s=true", common.SparkDynamicAllocationEnabled))
// Turn on shuffle tracking if dynamic allocation is enabled.
args = append(args, "--conf",
fmt.Sprintf("%s=true", common.SparkDynamicAllocationShuffleTrackingEnabled))
if dynamicAllocation.InitialExecutors != nil {
args = append(args, "--conf",
fmt.Sprintf("%s=%d", common.SparkDynamicAllocationInitialExecutors, *dynamicAllocation.InitialExecutors))
@ -1005,6 +1001,12 @@ func dynamicAllocationOption(app *v1beta2.SparkApplication) ([]string, error) {
args = append(args, "--conf",
fmt.Sprintf("%s=%d", common.SparkDynamicAllocationMaxExecutors, *dynamicAllocation.MaxExecutors))
}
shuffleTrackingEnabled := true
if dynamicAllocation.ShuffleTrackingEnabled != nil {
shuffleTrackingEnabled = *dynamicAllocation.ShuffleTrackingEnabled
}
args = append(args, "--conf",
fmt.Sprintf("%s=%t", common.SparkDynamicAllocationShuffleTrackingEnabled, shuffleTrackingEnabled))
if dynamicAllocation.ShuffleTrackingTimeout != nil {
args = append(args, "--conf",
fmt.Sprintf("%s=%d", common.SparkDynamicAllocationShuffleTrackingTimeout, *dynamicAllocation.ShuffleTrackingTimeout))

View File

@ -18,7 +18,6 @@ package webhook
import (
"context"
"strconv"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
@ -86,17 +85,21 @@ func defaultDriverSpec(app *v1beta2.SparkApplication) {
}
func defaultExecutorSpec(app *v1beta2.SparkApplication) {
if app.Spec.Executor.Instances == nil {
// Check whether dynamic allocation is enabled in application spec.
enableDynamicAllocation := app.Spec.DynamicAllocation != nil && app.Spec.DynamicAllocation.Enabled
// Check whether dynamic allocation is enabled in spark conf.
if !enableDynamicAllocation && app.Spec.SparkConf != nil {
if dynamicConf, _ := strconv.ParseBool(app.Spec.SparkConf[common.SparkDynamicAllocationEnabled]); dynamicConf {
enableDynamicAllocation = true
}
if !enableDynamicAllocation && app.Spec.SparkConf[common.SparkExecutorInstances] == "" {
app.Spec.Executor.Instances = util.Int32Ptr(1)
}
}
isDynamicAllocationEnabled := util.IsDynamicAllocationEnabled(app)
if app.Spec.Executor.Instances == nil &&
app.Spec.SparkConf[common.SparkExecutorInstances] == "" &&
!isDynamicAllocationEnabled {
app.Spec.Executor.Instances = util.Int32Ptr(1)
}
// Set default for ShuffleTrackingEnabled to true if DynamicAllocation.enabled is true and
// DynamicAllocation.ShuffleTrackingEnabled is nil.
if isDynamicAllocationEnabled &&
app.Spec.DynamicAllocation != nil &&
app.Spec.DynamicAllocation.ShuffleTrackingEnabled == nil {
app.Spec.DynamicAllocation.ShuffleTrackingEnabled = util.BoolPtr(true)
}
}

View File

@ -20,6 +20,7 @@ import (
"crypto/md5"
"fmt"
"reflect"
"strconv"
"strings"
"time"
@ -495,3 +496,13 @@ func GetInitialExecutorNumber(app *v1beta2.SparkApplication) int32 {
return initialNumExecutors
}
// IsDynamicAllocationEnabled determines if Spark Dynamic Allocation is enabled in app.Spec.DynamicAllocation or in
// app.Spec.SparkConf. app.Spec.DynamicAllocation configs will take precedence over app.Spec.SparkConf configs.
func IsDynamicAllocationEnabled(app *v1beta2.SparkApplication) bool {
if app.Spec.DynamicAllocation != nil {
return app.Spec.DynamicAllocation.Enabled
}
dynamicAllocationConfVal, _ := strconv.ParseBool(app.Spec.SparkConf[common.SparkDynamicAllocationEnabled])
return dynamicAllocationConfVal
}

View File

@ -356,3 +356,69 @@ var _ = Describe("DriverStateToApplicationState", func() {
Expect(util.DriverStateToApplicationState(v1beta2.DriverStateUnknown)).To(Equal(v1beta2.ApplicationStateUnknown))
})
})
var _ = Describe("Check if IsDynamicAllocationEnabled", func() {
Context("when app.Spec.DynamicAllocation is True", func() {
app := &v1beta2.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app",
Namespace: "test-namespace",
},
Spec: v1beta2.SparkApplicationSpec{
DynamicAllocation: &v1beta2.DynamicAllocation{
Enabled: true,
},
},
}
It("Should return true", func() {
Expect(util.IsDynamicAllocationEnabled(app)).To(BeTrue())
})
})
Context("when app.Spec.DynamicAllocation is nil but True in app.Spec.SparkConf", func() {
app := &v1beta2.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app",
Namespace: "test-namespace",
},
Spec: v1beta2.SparkApplicationSpec{
SparkConf: map[string]string{
"spark.dynamicAllocation.enabled": "true",
},
},
}
It("Should return true", func() {
Expect(util.IsDynamicAllocationEnabled(app)).To(BeTrue())
})
})
Context("when app.Spec.DynamicAllocation is nil and not set in app.Spec.SparkConf", func() {
app := &v1beta2.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app",
Namespace: "test-namespace",
},
}
It("Should return false", func() {
Expect(util.IsDynamicAllocationEnabled(app)).To(BeFalse())
})
})
Context("when app.Spec.DynamicAllocation is True but false in app.Spec.SparkConf", func() {
app := &v1beta2.SparkApplication{
ObjectMeta: metav1.ObjectMeta{
Name: "test-app",
Namespace: "test-namespace",
},
Spec: v1beta2.SparkApplicationSpec{
DynamicAllocation: &v1beta2.DynamicAllocation{
Enabled: true,
},
SparkConf: map[string]string{
"spark.dynamicAllocation.enabled": "false",
},
},
}
It("Should return true because app.Spec.DynamicAllocation configs will take precedence over "+
"app.Spec.SparkConf configs", func() {
Expect(util.IsDynamicAllocationEnabled(app)).To(BeTrue())
})
})
})