deduplicate scale-up test setup
This commit is contained in:
parent
3e2d48d86d
commit
51f1660db7
|
|
@ -93,7 +93,7 @@ func (e *scaleUpExecutor) executeScaleUpsParallel(
|
|||
nodeInfos map[string]*schedulerframework.NodeInfo,
|
||||
now time.Time,
|
||||
) (errors.AutoscalerError, []cloudprovider.NodeGroup) {
|
||||
if err := assertUniqueNodeGroups(scaleUpInfos); err != nil {
|
||||
if err := checkUniqueNodeGroups(scaleUpInfos); err != nil {
|
||||
return err, extractNodeGroups(scaleUpInfos)
|
||||
}
|
||||
type errResult struct {
|
||||
|
|
@ -171,6 +171,15 @@ func combineConcurrentScaleUpErrors(errs []errors.AutoscalerError) errors.Autosc
|
|||
if len(errs) == 1 {
|
||||
return errs[0]
|
||||
}
|
||||
uniqueMessages := make(map[string]bool)
|
||||
uniqueTypes := make(map[errors.AutoscalerErrorType]bool)
|
||||
for _, err := range errs {
|
||||
uniqueTypes[err.Type()] = true
|
||||
uniqueMessages[err.Error()] = true
|
||||
}
|
||||
if len(uniqueTypes) == 1 && len(uniqueMessages) == 1 {
|
||||
return errs[0]
|
||||
}
|
||||
// sort to stabilize the results and easier log aggregation
|
||||
sort.Slice(errs, func(i, j int) bool {
|
||||
errA := errs[i]
|
||||
|
|
@ -180,44 +189,43 @@ func combineConcurrentScaleUpErrors(errs []errors.AutoscalerError) errors.Autosc
|
|||
}
|
||||
return errA.Type() < errB.Type()
|
||||
})
|
||||
firstErrMsg := errs[0].Error()
|
||||
firstErrType := errs[0].Type()
|
||||
singleError := true
|
||||
singleType := true
|
||||
for _, err := range errs {
|
||||
singleType = singleType && firstErrType == err.Type()
|
||||
singleError = singleError && singleType && firstErrMsg == err.Error()
|
||||
}
|
||||
if singleError {
|
||||
return errs[0]
|
||||
}
|
||||
var builder strings.Builder
|
||||
builder.WriteString(firstErrMsg)
|
||||
builder.WriteString(" ...other concurrent errors: [")
|
||||
concurrentErrs := make(map[string]bool)
|
||||
for _, err := range errs {
|
||||
var concurrentErr string
|
||||
if singleType {
|
||||
concurrentErr = err.Error()
|
||||
} else {
|
||||
concurrentErr = fmt.Sprintf("[%s] %s", err.Type(), err.Error())
|
||||
}
|
||||
n := len(concurrentErrs)
|
||||
if !concurrentErrs[concurrentErr] && n > 0 {
|
||||
if n > 1 {
|
||||
builder.WriteString(", ")
|
||||
}
|
||||
builder.WriteString(fmt.Sprintf("%q", concurrentErr))
|
||||
}
|
||||
concurrentErrs[concurrentErr] = true
|
||||
}
|
||||
builder.WriteString("]")
|
||||
return errors.NewAutoscalerError(firstErrType, builder.String())
|
||||
firstErr := errs[0]
|
||||
printErrorTypes := len(uniqueTypes) > 1
|
||||
message := formatMessageFromConcurrentErrors(errs, printErrorTypes)
|
||||
return errors.NewAutoscalerError(firstErr.Type(), message)
|
||||
}
|
||||
|
||||
// Asserts all groups are scaled only once.
|
||||
func formatMessageFromConcurrentErrors(errs []errors.AutoscalerError, printErrorTypes bool) string {
|
||||
firstErr := errs[0]
|
||||
var builder strings.Builder
|
||||
builder.WriteString(firstErr.Error())
|
||||
builder.WriteString(" ...and other concurrent errors: [")
|
||||
formattedErrs := map[errors.AutoscalerError]bool{
|
||||
firstErr: true,
|
||||
}
|
||||
for _, err := range errs {
|
||||
if _, has := formattedErrs[err]; has {
|
||||
continue
|
||||
}
|
||||
formattedErrs[err] = true
|
||||
var message string
|
||||
if printErrorTypes {
|
||||
message = fmt.Sprintf("[%s] %s", err.Type(), err.Error())
|
||||
} else {
|
||||
message = err.Error()
|
||||
}
|
||||
if len(formattedErrs) > 2 {
|
||||
builder.WriteString(", ")
|
||||
}
|
||||
builder.WriteString(fmt.Sprintf("%q", message))
|
||||
}
|
||||
builder.WriteString("]")
|
||||
return builder.String()
|
||||
}
|
||||
|
||||
// Checks if all groups are scaled only once.
|
||||
// Scaling one group multiple times concurrently may cause problems.
|
||||
func assertUniqueNodeGroups(scaleUpInfos []nodegroupset.ScaleUpInfo) errors.AutoscalerError {
|
||||
func checkUniqueNodeGroups(scaleUpInfos []nodegroupset.ScaleUpInfo) errors.AutoscalerError {
|
||||
uniqueGroups := make(map[string]bool)
|
||||
for _, info := range scaleUpInfos {
|
||||
if uniqueGroups[info.Group.Id()] {
|
||||
|
|
|
|||
|
|
@ -19,9 +19,9 @@ package orchestrator
|
|||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestCombinedConcurrentScaleUpErrors(t *testing.T) {
|
||||
|
|
@ -58,7 +58,7 @@ func TestCombinedConcurrentScaleUpErrors(t *testing.T) {
|
|||
},
|
||||
expectedErr: errors.NewAutoscalerError(
|
||||
errors.CloudProviderError,
|
||||
"provider error ...other concurrent errors: [\"[internalError] internal error\"]",
|
||||
"provider error ...and other concurrent errors: [\"[internalError] internal error\"]",
|
||||
),
|
||||
},
|
||||
{
|
||||
|
|
@ -69,7 +69,7 @@ func TestCombinedConcurrentScaleUpErrors(t *testing.T) {
|
|||
},
|
||||
expectedErr: errors.NewAutoscalerError(
|
||||
errors.CloudProviderError,
|
||||
"provider error ...other concurrent errors: [\"[internalError] internal error\"]",
|
||||
"provider error ...and other concurrent errors: [\"[internalError] internal error\"]",
|
||||
),
|
||||
},
|
||||
{
|
||||
|
|
@ -81,7 +81,7 @@ func TestCombinedConcurrentScaleUpErrors(t *testing.T) {
|
|||
},
|
||||
expectedErr: errors.NewAutoscalerError(
|
||||
errors.InternalError,
|
||||
"A ...other concurrent errors: [\"B\", \"C\"]"),
|
||||
"A ...and other concurrent errors: [\"B\", \"C\"]"),
|
||||
},
|
||||
{
|
||||
desc: "errors with the same type and some duplicated messages",
|
||||
|
|
@ -92,7 +92,7 @@ func TestCombinedConcurrentScaleUpErrors(t *testing.T) {
|
|||
},
|
||||
expectedErr: errors.NewAutoscalerError(
|
||||
errors.InternalError,
|
||||
"A ...other concurrent errors: [\"B\"]"),
|
||||
"A ...and other concurrent errors: [\"B\"]"),
|
||||
},
|
||||
{
|
||||
desc: "some duplicated errors",
|
||||
|
|
@ -104,7 +104,7 @@ func TestCombinedConcurrentScaleUpErrors(t *testing.T) {
|
|||
},
|
||||
expectedErr: errors.NewAutoscalerError(
|
||||
errors.CloudProviderError,
|
||||
"A ...other concurrent errors: [\"[cloudProviderError] B\", \"[internalError] A\"]"),
|
||||
"A ...and other concurrent errors: [\"[cloudProviderError] B\", \"[internalError] A\"]"),
|
||||
},
|
||||
{
|
||||
desc: "different errors with quotes in messages",
|
||||
|
|
@ -114,7 +114,7 @@ func TestCombinedConcurrentScaleUpErrors(t *testing.T) {
|
|||
},
|
||||
expectedErr: errors.NewAutoscalerError(
|
||||
errors.InternalError,
|
||||
"\"first\" ...other concurrent errors: [\"\\\"second\\\"\"]"),
|
||||
"\"first\" ...and other concurrent errors: [\"\\\"second\\\"\"]"),
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -52,8 +52,6 @@ import (
|
|||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/expander"
|
||||
)
|
||||
|
||||
var defaultOptions = config.AutoscalingOptions{
|
||||
|
|
@ -66,7 +64,7 @@ var defaultOptions = config.AutoscalingOptions{
|
|||
|
||||
// Scale up scenarios.
|
||||
func TestScaleUpOK(t *testing.T) {
|
||||
config := &ScaleTestConfig{
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "n1", Cpu: 100, Memory: 100, Gpu: 0, Ready: true, Group: "ng1"},
|
||||
{Name: "n2", Cpu: 1000, Memory: 1000, Gpu: 0, Ready: true, Group: "ng2"},
|
||||
|
|
@ -78,8 +76,7 @@ func TestScaleUpOK(t *testing.T) {
|
|||
ExtraPods: []PodConfig{
|
||||
{Name: "p-new", Cpu: 500, Memory: 0, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
},
|
||||
Options: defaultOptions,
|
||||
ExpansionOptionToChoose: GroupSizeChange{GroupName: "ng2", SizeChange: 1},
|
||||
ExpansionOptionToChoose: &GroupSizeChange{GroupName: "ng2", SizeChange: 1},
|
||||
}
|
||||
expectedResults := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "ng2", SizeChange: 1},
|
||||
|
|
@ -93,7 +90,7 @@ func TestScaleUpOK(t *testing.T) {
|
|||
|
||||
// There are triggering, remaining & awaiting pods.
|
||||
func TestMixedScaleUp(t *testing.T) {
|
||||
config := &ScaleTestConfig{
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "n1", Cpu: 100, Memory: 1000, Gpu: 0, Ready: true, Group: "ng1"},
|
||||
{Name: "n2", Cpu: 1000, Memory: 100, Gpu: 0, Ready: true, Group: "ng2"},
|
||||
|
|
@ -110,8 +107,7 @@ func TestMixedScaleUp(t *testing.T) {
|
|||
// can only be scheduled on ng1
|
||||
{Name: "awaiting", Cpu: 0, Memory: 200, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
},
|
||||
Options: defaultOptions,
|
||||
ExpansionOptionToChoose: GroupSizeChange{GroupName: "ng2", SizeChange: 1},
|
||||
ExpansionOptionToChoose: &GroupSizeChange{GroupName: "ng2", SizeChange: 1},
|
||||
}
|
||||
expectedResults := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "ng2", SizeChange: 1},
|
||||
|
|
@ -128,7 +124,7 @@ func TestMixedScaleUp(t *testing.T) {
|
|||
func TestScaleUpMaxCoresLimitHit(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxCoresTotal = 9
|
||||
config := &ScaleTestConfig{
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "n1", Cpu: 2000, Memory: 100, Gpu: 0, Ready: true, Group: "ng1"},
|
||||
{Name: "n2", Cpu: 4000, Memory: 1000, Gpu: 0, Ready: true, Group: "ng2"},
|
||||
|
|
@ -141,8 +137,8 @@ func TestScaleUpMaxCoresLimitHit(t *testing.T) {
|
|||
{Name: "p-new-1", Cpu: 2000, Memory: 0, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-2", Cpu: 2000, Memory: 0, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
},
|
||||
ExpansionOptionToChoose: GroupSizeChange{GroupName: "ng1", SizeChange: 2},
|
||||
Options: options,
|
||||
ExpansionOptionToChoose: &GroupSizeChange{GroupName: "ng1", SizeChange: 2},
|
||||
Options: &options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "ng1", SizeChange: 1},
|
||||
|
|
@ -157,7 +153,7 @@ func TestScaleUpMaxCoresLimitHit(t *testing.T) {
|
|||
func TestScaleUpMaxCoresLimitHitWithNotAutoscaledGroup(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxCoresTotal = 9
|
||||
config := &ScaleTestConfig{
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "n1", Cpu: 2000, Memory: 100, Gpu: 0, Ready: true, Group: "ng1"},
|
||||
{Name: "n2", Cpu: 4000, Memory: 1000, Gpu: 0, Ready: true, Group: ""},
|
||||
|
|
@ -170,8 +166,8 @@ func TestScaleUpMaxCoresLimitHitWithNotAutoscaledGroup(t *testing.T) {
|
|||
{Name: "p-new-1", Cpu: 2000, Memory: 0, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-2", Cpu: 2000, Memory: 0, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
},
|
||||
ExpansionOptionToChoose: GroupSizeChange{GroupName: "ng1", SizeChange: 2},
|
||||
Options: options,
|
||||
ExpansionOptionToChoose: &GroupSizeChange{GroupName: "ng1", SizeChange: 2},
|
||||
Options: &options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "ng1", SizeChange: 1},
|
||||
|
|
@ -186,7 +182,7 @@ func TestScaleUpMaxCoresLimitHitWithNotAutoscaledGroup(t *testing.T) {
|
|||
func TestScaleUpMaxMemoryLimitHit(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxMemoryTotal = 1300 * utils.MiB
|
||||
config := &ScaleTestConfig{
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "n1", Cpu: 2000, Memory: 100 * utils.MiB, Gpu: 0, Ready: true, Group: "ng1"},
|
||||
{Name: "n2", Cpu: 4000, Memory: 1000 * utils.MiB, Gpu: 0, Ready: true, Group: "ng2"},
|
||||
|
|
@ -200,8 +196,8 @@ func TestScaleUpMaxMemoryLimitHit(t *testing.T) {
|
|||
{Name: "p-new-2", Cpu: 2000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-3", Cpu: 2000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
},
|
||||
ExpansionOptionToChoose: GroupSizeChange{GroupName: "ng1", SizeChange: 3},
|
||||
Options: options,
|
||||
ExpansionOptionToChoose: &GroupSizeChange{GroupName: "ng1", SizeChange: 3},
|
||||
Options: &options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "ng1", SizeChange: 2},
|
||||
|
|
@ -216,7 +212,7 @@ func TestScaleUpMaxMemoryLimitHit(t *testing.T) {
|
|||
func TestScaleUpMaxMemoryLimitHitWithNotAutoscaledGroup(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxMemoryTotal = 1300 * utils.MiB
|
||||
config := &ScaleTestConfig{
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "n1", Cpu: 2000, Memory: 100 * utils.MiB, Gpu: 0, Ready: true, Group: "ng1"},
|
||||
{Name: "n2", Cpu: 4000, Memory: 1000 * utils.MiB, Gpu: 0, Ready: true, Group: ""},
|
||||
|
|
@ -230,8 +226,8 @@ func TestScaleUpMaxMemoryLimitHitWithNotAutoscaledGroup(t *testing.T) {
|
|||
{Name: "p-new-2", Cpu: 2000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-3", Cpu: 2000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
},
|
||||
ExpansionOptionToChoose: GroupSizeChange{GroupName: "ng1", SizeChange: 3},
|
||||
Options: options,
|
||||
ExpansionOptionToChoose: &GroupSizeChange{GroupName: "ng1", SizeChange: 3},
|
||||
Options: &options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "ng1", SizeChange: 2},
|
||||
|
|
@ -247,11 +243,7 @@ func TestScaleUpTwoGroups(t *testing.T) {
|
|||
options := defaultOptions
|
||||
options.BalanceSimilarNodeGroups = true
|
||||
options.ParallelScaleUp = true
|
||||
config := &GroupsScaleUpTestConfig{
|
||||
Groups: []NodeGroupConfig{
|
||||
{Name: "ng1", MaxSize: 2},
|
||||
{Name: "ng2", MaxSize: 2},
|
||||
},
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "ng1-n1", Cpu: 1500, Memory: 1000 * utils.MiB, Ready: true, Group: "ng1"},
|
||||
{Name: "ng2-n1", Cpu: 1500, Memory: 1000 * utils.MiB, Ready: true, Group: "ng2"},
|
||||
|
|
@ -267,25 +259,29 @@ func TestScaleUpTwoGroups(t *testing.T) {
|
|||
Options: &options,
|
||||
}
|
||||
testCases := []struct {
|
||||
desc string
|
||||
parallelScaleUp bool
|
||||
desc string
|
||||
parallel bool
|
||||
}{
|
||||
{
|
||||
desc: "synchronous scale up",
|
||||
parallelScaleUp: false,
|
||||
desc: "synchronous scale up",
|
||||
parallel: false,
|
||||
},
|
||||
{
|
||||
desc: "parallel scale up",
|
||||
parallelScaleUp: true,
|
||||
desc: "parallel scale up",
|
||||
parallel: true,
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
config.Options.ParallelScaleUp = tc.parallelScaleUp
|
||||
result := scaleUpGroups(t, config)
|
||||
assert.NoError(t, result.Error)
|
||||
assert.Equal(t, 2, result.TargetSizes["ng1"])
|
||||
assert.Equal(t, 2, result.TargetSizes["ng2"])
|
||||
config.Options.ParallelScaleUp = tc.parallel
|
||||
result := runSimpleScaleUpTest(t, config)
|
||||
assert.True(t, result.ScaleUpStatus.WasSuccessful())
|
||||
assert.Nil(t, result.ScaleUpError)
|
||||
assert.Equal(t, result.GroupTargetSizes, map[string]int{
|
||||
"ng1": 2,
|
||||
"ng2": 2,
|
||||
})
|
||||
assert.ElementsMatch(t, result.ScaleUpStatus.PodsTriggeredScaleUp, []string{"p3", "p4"})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -293,7 +289,7 @@ func TestScaleUpTwoGroups(t *testing.T) {
|
|||
func TestCloudProviderFailingToScaleUpGroups(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.BalanceSimilarNodeGroups = true
|
||||
config := &GroupsScaleUpTestConfig{
|
||||
config := &ScaleUpTestConfig{
|
||||
Groups: []NodeGroupConfig{
|
||||
{Name: "ng1", MaxSize: 2},
|
||||
{Name: "ng2", MaxSize: 2},
|
||||
|
|
@ -326,35 +322,35 @@ func TestCloudProviderFailingToScaleUpGroups(t *testing.T) {
|
|||
}
|
||||
testCases := []struct {
|
||||
desc string
|
||||
parallelScaleUp bool
|
||||
parallel bool
|
||||
onScaleUp testprovider.OnScaleUpFunc
|
||||
expectConcurrentErrors bool
|
||||
expectedTotalTargetSizes int
|
||||
}{
|
||||
{
|
||||
desc: "synchronous scale up - two failures",
|
||||
parallelScaleUp: false,
|
||||
parallel: false,
|
||||
onScaleUp: failAlwaysScaleUp,
|
||||
expectConcurrentErrors: false,
|
||||
expectedTotalTargetSizes: 3, // first error stops scale up process
|
||||
},
|
||||
{
|
||||
desc: "parallel scale up - two failures",
|
||||
parallelScaleUp: true,
|
||||
parallel: true,
|
||||
onScaleUp: failAlwaysScaleUp,
|
||||
expectConcurrentErrors: true,
|
||||
expectedTotalTargetSizes: 4,
|
||||
},
|
||||
{
|
||||
desc: "synchronous scale up - one failure",
|
||||
parallelScaleUp: false,
|
||||
parallel: false,
|
||||
onScaleUp: failOnceScaleUp(),
|
||||
expectConcurrentErrors: false,
|
||||
expectedTotalTargetSizes: 3,
|
||||
},
|
||||
{
|
||||
desc: "parallel scale up - one failure",
|
||||
parallelScaleUp: true,
|
||||
parallel: true,
|
||||
onScaleUp: failOnceScaleUp(),
|
||||
expectConcurrentErrors: false,
|
||||
expectedTotalTargetSizes: 4,
|
||||
|
|
@ -362,18 +358,272 @@ func TestCloudProviderFailingToScaleUpGroups(t *testing.T) {
|
|||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
config.Options.ParallelScaleUp = tc.parallelScaleUp
|
||||
config.Options.ParallelScaleUp = tc.parallel
|
||||
config.OnScaleUp = tc.onScaleUp
|
||||
result := scaleUpGroups(t, config)
|
||||
assert.Error(t, result.Error)
|
||||
assert.Equal(t, errors.CloudProviderError, result.Error.Type())
|
||||
assert.Equal(t, tc.expectConcurrentErrors, strings.Contains(result.Error.Error(), "...other concurrent errors"))
|
||||
assert.Equal(t, tc.expectedTotalTargetSizes, result.TargetSizes["ng1"]+result.TargetSizes["ng2"])
|
||||
result := runSimpleScaleUpTest(t, config)
|
||||
assert.False(t, result.ScaleUpStatus.WasSuccessful())
|
||||
assert.Equal(t, errors.CloudProviderError, result.ScaleUpError.Type())
|
||||
assert.Equal(t, tc.expectedTotalTargetSizes, result.GroupTargetSizes["ng1"]+result.GroupTargetSizes["ng2"])
|
||||
assert.Equal(t, tc.expectConcurrentErrors, strings.Contains(result.ScaleUpError.Error(), "...and other concurrent errors"))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func scaleUpGroups(t *testing.T, config *GroupsScaleUpTestConfig) *GroupsScaleUpTestResult {
|
||||
func TestScaleUpCapToMaxTotalNodesLimit(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxNodesTotal = 3
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "n1", Cpu: 2000, Memory: 100 * utils.MiB, Gpu: 0, Ready: true, Group: "ng1"},
|
||||
{Name: "n2", Cpu: 4000, Memory: 1000 * utils.MiB, Gpu: 0, Ready: true, Group: "ng2"},
|
||||
},
|
||||
Pods: []PodConfig{
|
||||
{Name: "p1", Cpu: 1000, Memory: 0, Gpu: 0, Node: "n1", ToleratesGpu: false},
|
||||
{Name: "p2", Cpu: 3000, Memory: 0, Gpu: 0, Node: "n2", ToleratesGpu: false},
|
||||
},
|
||||
ExtraPods: []PodConfig{
|
||||
{Name: "p-new-1", Cpu: 4000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-2", Cpu: 4000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-3", Cpu: 4000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
},
|
||||
ExpansionOptionToChoose: &GroupSizeChange{GroupName: "ng2", SizeChange: 3},
|
||||
Options: &options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "ng2", SizeChange: 1},
|
||||
ScaleUpStatus: ScaleUpStatusInfo{
|
||||
PodsTriggeredScaleUp: []string{"p-new-1", "p-new-2", "p-new-3"},
|
||||
},
|
||||
}
|
||||
|
||||
simpleScaleUpTest(t, config, results)
|
||||
}
|
||||
|
||||
func TestScaleUpCapToMaxTotalNodesLimitWithNotAutoscaledGroup(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxNodesTotal = 3
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "n1", Cpu: 2000, Memory: 100 * utils.MiB, Gpu: 0, Ready: true, Group: ""},
|
||||
{Name: "n2", Cpu: 4000, Memory: 1000 * utils.MiB, Gpu: 0, Ready: true, Group: "ng2"},
|
||||
},
|
||||
Pods: []PodConfig{
|
||||
{Name: "p1", Cpu: 1000, Memory: 0, Gpu: 0, Node: "n1", ToleratesGpu: false},
|
||||
{Name: "p2", Cpu: 3000, Memory: 0, Gpu: 0, Node: "n2", ToleratesGpu: false},
|
||||
},
|
||||
ExtraPods: []PodConfig{
|
||||
{Name: "p-new-1", Cpu: 4000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-2", Cpu: 4000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-3", Cpu: 4000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
},
|
||||
ExpansionOptionToChoose: &GroupSizeChange{GroupName: "ng2", SizeChange: 3},
|
||||
Options: &options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "ng2", SizeChange: 1},
|
||||
ScaleUpStatus: ScaleUpStatusInfo{
|
||||
PodsTriggeredScaleUp: []string{"p-new-1", "p-new-2", "p-new-3"},
|
||||
},
|
||||
}
|
||||
|
||||
simpleScaleUpTest(t, config, results)
|
||||
}
|
||||
|
||||
func TestWillConsiderGpuAndStandardPoolForPodWhichDoesNotRequireGpu(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxNodesTotal = 100
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "gpu-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Ready: true, Group: "gpu-pool"},
|
||||
{Name: "std-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Ready: true, Group: "std-pool"},
|
||||
},
|
||||
Pods: []PodConfig{
|
||||
{Name: "gpu-pod-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Node: "gpu-node-1", ToleratesGpu: true},
|
||||
{Name: "std-pod-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Node: "std-node-1", ToleratesGpu: false},
|
||||
},
|
||||
ExtraPods: []PodConfig{
|
||||
{Name: "extra-std-pod", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: true},
|
||||
},
|
||||
ExpansionOptionToChoose: &GroupSizeChange{GroupName: "std-pool", SizeChange: 1},
|
||||
Options: &options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "std-pool", SizeChange: 1},
|
||||
ExpansionOptions: []GroupSizeChange{
|
||||
{GroupName: "std-pool", SizeChange: 1},
|
||||
{GroupName: "gpu-pool", SizeChange: 1},
|
||||
},
|
||||
ScaleUpStatus: ScaleUpStatusInfo{
|
||||
PodsTriggeredScaleUp: []string{"extra-std-pod"},
|
||||
},
|
||||
}
|
||||
|
||||
simpleScaleUpTest(t, config, results)
|
||||
}
|
||||
|
||||
func TestWillConsiderOnlyGpuPoolForPodWhichDoesRequiresGpu(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxNodesTotal = 100
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "gpu-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Ready: true, Group: "gpu-pool"},
|
||||
{Name: "std-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Ready: true, Group: "std-pool"},
|
||||
},
|
||||
Pods: []PodConfig{
|
||||
{Name: "gpu-pod-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Node: "gpu-node-1", ToleratesGpu: true},
|
||||
{Name: "std-pod-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Node: "std-node-1", ToleratesGpu: false},
|
||||
},
|
||||
ExtraPods: []PodConfig{
|
||||
{Name: "extra-gpu-pod", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Node: "", ToleratesGpu: true},
|
||||
},
|
||||
ExpansionOptionToChoose: &GroupSizeChange{GroupName: "gpu-pool", SizeChange: 1},
|
||||
Options: &options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "gpu-pool", SizeChange: 1},
|
||||
ExpansionOptions: []GroupSizeChange{
|
||||
{GroupName: "gpu-pool", SizeChange: 1},
|
||||
},
|
||||
ScaleUpStatus: ScaleUpStatusInfo{
|
||||
PodsTriggeredScaleUp: []string{"extra-gpu-pod"},
|
||||
},
|
||||
}
|
||||
|
||||
simpleScaleUpTest(t, config, results)
|
||||
}
|
||||
|
||||
func TestWillConsiderAllPoolsWhichFitTwoPodsRequiringGpus(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxNodesTotal = 100
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "gpu-1-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Ready: true, Group: "gpu-1-pool"},
|
||||
{Name: "gpu-2-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 2, Ready: true, Group: "gpu-2-pool"},
|
||||
{Name: "gpu-4-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 4, Ready: true, Group: "gpu-4-pool"},
|
||||
{Name: "std-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Ready: true, Group: "std-pool"},
|
||||
},
|
||||
Pods: []PodConfig{
|
||||
{Name: "gpu-pod-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Node: "gpu-1-node-1", ToleratesGpu: true},
|
||||
{Name: "gpu-pod-2", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 2, Node: "gpu-2-node-1", ToleratesGpu: true},
|
||||
{Name: "gpu-pod-3", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 4, Node: "gpu-4-node-1", ToleratesGpu: true},
|
||||
{Name: "std-pod-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Node: "std-node-1", ToleratesGpu: false},
|
||||
},
|
||||
ExtraPods: []PodConfig{
|
||||
{Name: "extra-gpu-pod-1", Cpu: 1, Memory: 1 * utils.MiB, Gpu: 1, Node: "", ToleratesGpu: true}, // CPU and mem negligible
|
||||
{Name: "extra-gpu-pod-2", Cpu: 1, Memory: 1 * utils.MiB, Gpu: 1, Node: "", ToleratesGpu: true}, // CPU and mem negligible
|
||||
{Name: "extra-gpu-pod-3", Cpu: 1, Memory: 1 * utils.MiB, Gpu: 1, Node: "", ToleratesGpu: true}, // CPU and mem negligible
|
||||
},
|
||||
ExpansionOptionToChoose: &GroupSizeChange{GroupName: "gpu-1-pool", SizeChange: 3},
|
||||
Options: &options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "gpu-1-pool", SizeChange: 3},
|
||||
ExpansionOptions: []GroupSizeChange{
|
||||
{GroupName: "gpu-1-pool", SizeChange: 3},
|
||||
{GroupName: "gpu-2-pool", SizeChange: 2},
|
||||
{GroupName: "gpu-4-pool", SizeChange: 1},
|
||||
},
|
||||
ScaleUpStatus: ScaleUpStatusInfo{
|
||||
PodsTriggeredScaleUp: []string{"extra-gpu-pod-1", "extra-gpu-pod-2", "extra-gpu-pod-3"},
|
||||
},
|
||||
}
|
||||
|
||||
simpleScaleUpTest(t, config, results)
|
||||
}
|
||||
|
||||
// No scale up scenarios.
|
||||
func TestNoScaleUpMaxCoresLimitHit(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxCoresTotal = 7
|
||||
options.MaxMemoryTotal = 1150
|
||||
config := &ScaleUpTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "n1", Cpu: 2000, Memory: 100, Gpu: 0, Ready: true, Group: "ng1"},
|
||||
{Name: "n2", Cpu: 4000, Memory: 1000, Gpu: 0, Ready: true, Group: "ng2"},
|
||||
},
|
||||
Pods: []PodConfig{
|
||||
{Name: "p1", Cpu: 1000, Memory: 0, Gpu: 0, Node: "n1", ToleratesGpu: false},
|
||||
{Name: "p2", Cpu: 3000, Memory: 0, Gpu: 0, Node: "n2", ToleratesGpu: false},
|
||||
},
|
||||
ExtraPods: []PodConfig{
|
||||
{Name: "p-new-1", Cpu: 2000, Memory: 0, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-2", Cpu: 2000, Memory: 0, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
},
|
||||
Options: &options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
NoScaleUpReason: "max cluster cpu, memory limit reached",
|
||||
ScaleUpStatus: ScaleUpStatusInfo{
|
||||
PodsRemainUnschedulable: []string{"p-new-1", "p-new-2"},
|
||||
},
|
||||
}
|
||||
|
||||
simpleNoScaleUpTest(t, config, results)
|
||||
}
|
||||
|
||||
func simpleScaleUpTest(t *testing.T, config *ScaleUpTestConfig, expectedResults *ScaleTestResults) {
|
||||
results := runSimpleScaleUpTest(t, config)
|
||||
assert.NotNil(t, results.GroupSizeChanges[0], "Expected scale up event")
|
||||
assert.Equal(t, expectedResults.FinalOption, results.GroupSizeChanges[0])
|
||||
assert.True(t, results.ScaleUpStatus.WasSuccessful())
|
||||
nodeEventSeen := false
|
||||
for _, event := range results.Events {
|
||||
if strings.Contains(event, "TriggeredScaleUp") && strings.Contains(event, expectedResults.FinalOption.GroupName) {
|
||||
nodeEventSeen = true
|
||||
}
|
||||
if len(expectedResults.ScaleUpStatus.PodsRemainUnschedulable) == 0 {
|
||||
assert.NotRegexp(t, regexp.MustCompile("NotTriggerScaleUp"), event)
|
||||
}
|
||||
}
|
||||
assert.True(t, nodeEventSeen)
|
||||
if len(expectedResults.ExpansionOptions) > 0 {
|
||||
// Empty ExpansionOptions means we do not want to do any assertions
|
||||
// on contents of actual scaleUp options
|
||||
if config.ExpansionOptionToChoose != nil {
|
||||
// Check that option to choose is part of expected options.
|
||||
assert.Contains(t, expectedResults.ExpansionOptions, *config.ExpansionOptionToChoose, "final expected expansion option must be in expected expansion options")
|
||||
assert.Contains(t, results.ExpansionOptions, *config.ExpansionOptionToChoose, "final expected expansion option must be in expected expansion options")
|
||||
}
|
||||
assert.ElementsMatch(t, results.ExpansionOptions, expectedResults.ExpansionOptions,
|
||||
"actual and expected expansion options should be the same")
|
||||
}
|
||||
if expectedResults.GroupTargetSizes != nil {
|
||||
assert.Equal(t, expectedResults.GroupTargetSizes, results.GroupTargetSizes)
|
||||
}
|
||||
assert.ElementsMatch(t, results.ScaleUpStatus.PodsTriggeredScaleUp, expectedResults.ScaleUpStatus.PodsTriggeredScaleUp,
|
||||
"actual and expected triggering pods should be the same")
|
||||
assert.ElementsMatch(t, results.ScaleUpStatus.PodsRemainUnschedulable, expectedResults.ScaleUpStatus.PodsRemainUnschedulable,
|
||||
"actual and expected remaining pods should be the same")
|
||||
assert.ElementsMatch(t, results.ScaleUpStatus.PodsAwaitEvaluation, expectedResults.ScaleUpStatus.PodsAwaitEvaluation,
|
||||
"actual and expected awaiting evaluation pods should be the same")
|
||||
}
|
||||
|
||||
func simpleNoScaleUpTest(t *testing.T, config *ScaleUpTestConfig, expectedResults *ScaleTestResults) {
|
||||
results := runSimpleScaleUpTest(t, config)
|
||||
assert.Nil(t, results.GroupSizeChanges)
|
||||
assert.False(t, results.ScaleUpStatus.WasSuccessful())
|
||||
noScaleUpEventSeen := false
|
||||
for _, event := range results.Events {
|
||||
if strings.Contains(event, "NotTriggerScaleUp") {
|
||||
if strings.Contains(event, expectedResults.NoScaleUpReason) {
|
||||
noScaleUpEventSeen = true
|
||||
} else {
|
||||
// Surprisingly useful for debugging.
|
||||
fmt.Println("Event:", event)
|
||||
}
|
||||
}
|
||||
assert.NotRegexp(t, regexp.MustCompile("TriggeredScaleUp"), event)
|
||||
}
|
||||
assert.True(t, noScaleUpEventSeen)
|
||||
assert.ElementsMatch(t, results.ScaleUpStatus.PodsTriggeredScaleUp, expectedResults.ScaleUpStatus.PodsTriggeredScaleUp,
|
||||
"actual and expected triggering pods should be the same")
|
||||
assert.ElementsMatch(t, results.ScaleUpStatus.PodsRemainUnschedulable, expectedResults.ScaleUpStatus.PodsRemainUnschedulable,
|
||||
"actual and expected remaining pods should be the same")
|
||||
assert.ElementsMatch(t, results.ScaleUpStatus.PodsAwaitEvaluation, expectedResults.ScaleUpStatus.PodsAwaitEvaluation,
|
||||
"actual and expected awaiting evaluation pods should be the same")
|
||||
}
|
||||
|
||||
func runSimpleScaleUpTest(t *testing.T, config *ScaleUpTestConfig) *ScaleUpTestResult {
|
||||
now := time.Now()
|
||||
groupSizeChangesChannel := make(chan GroupSizeChange, 20)
|
||||
groupNodes := make(map[string][]*apiv1.Node)
|
||||
|
|
@ -440,18 +690,22 @@ func scaleUpGroups(t *testing.T, config *GroupsScaleUpTestConfig) *GroupsScaleUp
|
|||
}
|
||||
|
||||
// build orchestrator
|
||||
context, _ := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil)
|
||||
nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).
|
||||
context, err := NewScaleTestAutoscalingContext(options, &fake.Clientset{}, listers, provider, nil, nil)
|
||||
assert.NoError(t, err)
|
||||
nodeInfos, err := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).
|
||||
Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now)
|
||||
assert.NoError(t, err)
|
||||
clusterState := clusterstate.
|
||||
NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute))
|
||||
clusterState.UpdateNodes(nodes, nodeInfos, time.Now())
|
||||
processors := NewTestProcessors(&context)
|
||||
orchestrator := New()
|
||||
orchestrator.Initialize(&context, processors, clusterState, taints.TaintConfig{})
|
||||
expander := NewMockRepotingStrategy(t, config.ExpansionOptionToChoose)
|
||||
context.ExpanderStrategy = expander
|
||||
|
||||
// scale up
|
||||
scaleUpStatus, err := orchestrator.ScaleUp(extraPods, nodes, []*appsv1.DaemonSet{}, nodeInfos)
|
||||
scaleUpStatus, scaleUpErr := orchestrator.ScaleUp(extraPods, nodes, []*appsv1.DaemonSet{}, nodeInfos)
|
||||
processors.ScaleUpStatusProcessor.Process(&context, scaleUpStatus)
|
||||
|
||||
// aggregate group size changes
|
||||
|
|
@ -475,418 +729,13 @@ func scaleUpGroups(t *testing.T, config *GroupsScaleUpTestConfig) *GroupsScaleUp
|
|||
targetSizes[group.Id()], _ = group.TargetSize()
|
||||
}
|
||||
|
||||
return &GroupsScaleUpTestResult{
|
||||
Error: err,
|
||||
return &ScaleUpTestResult{
|
||||
ScaleUpError: scaleUpErr,
|
||||
ScaleUpStatus: simplifyScaleUpStatus(scaleUpStatus),
|
||||
GroupSizeChanges: groupSizeChanges,
|
||||
Events: events,
|
||||
TargetSizes: targetSizes,
|
||||
}
|
||||
}
|
||||
|
||||
func TestScaleUpCapToMaxTotalNodesLimit(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxNodesTotal = 3
|
||||
config := &ScaleTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "n1", Cpu: 2000, Memory: 100 * utils.MiB, Gpu: 0, Ready: true, Group: "ng1"},
|
||||
{Name: "n2", Cpu: 4000, Memory: 1000 * utils.MiB, Gpu: 0, Ready: true, Group: "ng2"},
|
||||
},
|
||||
Pods: []PodConfig{
|
||||
{Name: "p1", Cpu: 1000, Memory: 0, Gpu: 0, Node: "n1", ToleratesGpu: false},
|
||||
{Name: "p2", Cpu: 3000, Memory: 0, Gpu: 0, Node: "n2", ToleratesGpu: false},
|
||||
},
|
||||
ExtraPods: []PodConfig{
|
||||
{Name: "p-new-1", Cpu: 4000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-2", Cpu: 4000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-3", Cpu: 4000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
},
|
||||
ExpansionOptionToChoose: GroupSizeChange{GroupName: "ng2", SizeChange: 3},
|
||||
Options: options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "ng2", SizeChange: 1},
|
||||
ScaleUpStatus: ScaleUpStatusInfo{
|
||||
PodsTriggeredScaleUp: []string{"p-new-1", "p-new-2", "p-new-3"},
|
||||
},
|
||||
}
|
||||
|
||||
simpleScaleUpTest(t, config, results)
|
||||
}
|
||||
|
||||
func TestScaleUpCapToMaxTotalNodesLimitWithNotAutoscaledGroup(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxNodesTotal = 3
|
||||
config := &ScaleTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "n1", Cpu: 2000, Memory: 100 * utils.MiB, Gpu: 0, Ready: true, Group: ""},
|
||||
{Name: "n2", Cpu: 4000, Memory: 1000 * utils.MiB, Gpu: 0, Ready: true, Group: "ng2"},
|
||||
},
|
||||
Pods: []PodConfig{
|
||||
{Name: "p1", Cpu: 1000, Memory: 0, Gpu: 0, Node: "n1", ToleratesGpu: false},
|
||||
{Name: "p2", Cpu: 3000, Memory: 0, Gpu: 0, Node: "n2", ToleratesGpu: false},
|
||||
},
|
||||
ExtraPods: []PodConfig{
|
||||
{Name: "p-new-1", Cpu: 4000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-2", Cpu: 4000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-3", Cpu: 4000, Memory: 100 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
},
|
||||
ExpansionOptionToChoose: GroupSizeChange{GroupName: "ng2", SizeChange: 3},
|
||||
Options: options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "ng2", SizeChange: 1},
|
||||
ScaleUpStatus: ScaleUpStatusInfo{
|
||||
PodsTriggeredScaleUp: []string{"p-new-1", "p-new-2", "p-new-3"},
|
||||
},
|
||||
}
|
||||
|
||||
simpleScaleUpTest(t, config, results)
|
||||
}
|
||||
|
||||
func TestWillConsiderGpuAndStandardPoolForPodWhichDoesNotRequireGpu(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxNodesTotal = 100
|
||||
config := &ScaleTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "gpu-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Ready: true, Group: "gpu-pool"},
|
||||
{Name: "std-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Ready: true, Group: "std-pool"},
|
||||
},
|
||||
Pods: []PodConfig{
|
||||
{Name: "gpu-pod-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Node: "gpu-node-1", ToleratesGpu: true},
|
||||
{Name: "std-pod-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Node: "std-node-1", ToleratesGpu: false},
|
||||
},
|
||||
ExtraPods: []PodConfig{
|
||||
{Name: "extra-std-pod", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Node: "", ToleratesGpu: true},
|
||||
},
|
||||
ExpansionOptionToChoose: GroupSizeChange{GroupName: "std-pool", SizeChange: 1},
|
||||
Options: options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "std-pool", SizeChange: 1},
|
||||
ExpansionOptions: []GroupSizeChange{
|
||||
{GroupName: "std-pool", SizeChange: 1},
|
||||
{GroupName: "gpu-pool", SizeChange: 1},
|
||||
},
|
||||
ScaleUpStatus: ScaleUpStatusInfo{
|
||||
PodsTriggeredScaleUp: []string{"extra-std-pod"},
|
||||
},
|
||||
}
|
||||
|
||||
simpleScaleUpTest(t, config, results)
|
||||
}
|
||||
|
||||
func TestWillConsiderOnlyGpuPoolForPodWhichDoesRequiresGpu(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxNodesTotal = 100
|
||||
config := &ScaleTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "gpu-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Ready: true, Group: "gpu-pool"},
|
||||
{Name: "std-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Ready: true, Group: "std-pool"},
|
||||
},
|
||||
Pods: []PodConfig{
|
||||
{Name: "gpu-pod-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Node: "gpu-node-1", ToleratesGpu: true},
|
||||
{Name: "std-pod-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Node: "std-node-1", ToleratesGpu: false},
|
||||
},
|
||||
ExtraPods: []PodConfig{
|
||||
{Name: "extra-gpu-pod", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Node: "", ToleratesGpu: true},
|
||||
},
|
||||
ExpansionOptionToChoose: GroupSizeChange{GroupName: "gpu-pool", SizeChange: 1},
|
||||
Options: options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "gpu-pool", SizeChange: 1},
|
||||
ExpansionOptions: []GroupSizeChange{
|
||||
{GroupName: "gpu-pool", SizeChange: 1},
|
||||
},
|
||||
ScaleUpStatus: ScaleUpStatusInfo{
|
||||
PodsTriggeredScaleUp: []string{"extra-gpu-pod"},
|
||||
},
|
||||
}
|
||||
|
||||
simpleScaleUpTest(t, config, results)
|
||||
}
|
||||
|
||||
func TestWillConsiderAllPoolsWhichFitTwoPodsRequiringGpus(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxNodesTotal = 100
|
||||
config := &ScaleTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "gpu-1-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Ready: true, Group: "gpu-1-pool"},
|
||||
{Name: "gpu-2-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 2, Ready: true, Group: "gpu-2-pool"},
|
||||
{Name: "gpu-4-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 4, Ready: true, Group: "gpu-4-pool"},
|
||||
{Name: "std-node-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Ready: true, Group: "std-pool"},
|
||||
},
|
||||
Pods: []PodConfig{
|
||||
{Name: "gpu-pod-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 1, Node: "gpu-1-node-1", ToleratesGpu: true},
|
||||
{Name: "gpu-pod-2", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 2, Node: "gpu-2-node-1", ToleratesGpu: true},
|
||||
{Name: "gpu-pod-3", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 4, Node: "gpu-4-node-1", ToleratesGpu: true},
|
||||
{Name: "std-pod-1", Cpu: 2000, Memory: 1000 * utils.MiB, Gpu: 0, Node: "std-node-1", ToleratesGpu: false},
|
||||
},
|
||||
ExtraPods: []PodConfig{
|
||||
{Name: "extra-gpu-pod-1", Cpu: 1, Memory: 1 * utils.MiB, Gpu: 1, Node: "", ToleratesGpu: true}, // CPU and mem negligible
|
||||
{Name: "extra-gpu-pod-2", Cpu: 1, Memory: 1 * utils.MiB, Gpu: 1, Node: "", ToleratesGpu: true}, // CPU and mem negligible
|
||||
{Name: "extra-gpu-pod-3", Cpu: 1, Memory: 1 * utils.MiB, Gpu: 1, Node: "", ToleratesGpu: true}, // CPU and mem negligible
|
||||
},
|
||||
ExpansionOptionToChoose: GroupSizeChange{GroupName: "gpu-1-pool", SizeChange: 3},
|
||||
Options: options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
FinalOption: GroupSizeChange{GroupName: "gpu-1-pool", SizeChange: 3},
|
||||
ExpansionOptions: []GroupSizeChange{
|
||||
{GroupName: "gpu-1-pool", SizeChange: 3},
|
||||
{GroupName: "gpu-2-pool", SizeChange: 2},
|
||||
{GroupName: "gpu-4-pool", SizeChange: 1},
|
||||
},
|
||||
ScaleUpStatus: ScaleUpStatusInfo{
|
||||
PodsTriggeredScaleUp: []string{"extra-gpu-pod-1", "extra-gpu-pod-2", "extra-gpu-pod-3"},
|
||||
},
|
||||
}
|
||||
|
||||
simpleScaleUpTest(t, config, results)
|
||||
}
|
||||
|
||||
// No scale up scenarios.
|
||||
func TestNoScaleUpMaxCoresLimitHit(t *testing.T) {
|
||||
options := defaultOptions
|
||||
options.MaxCoresTotal = 7
|
||||
options.MaxMemoryTotal = 1150
|
||||
config := &ScaleTestConfig{
|
||||
Nodes: []NodeConfig{
|
||||
{Name: "n1", Cpu: 2000, Memory: 100, Gpu: 0, Ready: true, Group: "ng1"},
|
||||
{Name: "n2", Cpu: 4000, Memory: 1000, Gpu: 0, Ready: true, Group: "ng2"},
|
||||
},
|
||||
Pods: []PodConfig{
|
||||
{Name: "p1", Cpu: 1000, Memory: 0, Gpu: 0, Node: "n1", ToleratesGpu: false},
|
||||
{Name: "p2", Cpu: 3000, Memory: 0, Gpu: 0, Node: "n2", ToleratesGpu: false},
|
||||
},
|
||||
ExtraPods: []PodConfig{
|
||||
{Name: "p-new-1", Cpu: 2000, Memory: 0, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
{Name: "p-new-2", Cpu: 2000, Memory: 0, Gpu: 0, Node: "", ToleratesGpu: false},
|
||||
},
|
||||
Options: options,
|
||||
}
|
||||
results := &ScaleTestResults{
|
||||
NoScaleUpReason: "max cluster cpu, memory limit reached",
|
||||
ScaleUpStatus: ScaleUpStatusInfo{
|
||||
PodsRemainUnschedulable: []string{"p-new-1", "p-new-2"},
|
||||
},
|
||||
}
|
||||
|
||||
simpleNoScaleUpTest(t, config, results)
|
||||
}
|
||||
|
||||
// To implement expander.Strategy, BestOption method must have a struct receiver.
|
||||
// This prevents it from modifying fields of reportingStrategy, so we need a thin
|
||||
// pointer wrapper for mutable parts.
|
||||
type expanderResults struct {
|
||||
inputOptions []GroupSizeChange
|
||||
}
|
||||
|
||||
type reportingStrategy struct {
|
||||
initialNodeConfigs []NodeConfig
|
||||
optionToChoose GroupSizeChange
|
||||
results *expanderResults
|
||||
t *testing.T
|
||||
}
|
||||
|
||||
func (r reportingStrategy) BestOption(options []expander.Option, nodeInfo map[string]*schedulerframework.NodeInfo) *expander.Option {
|
||||
r.results.inputOptions = expanderOptionsToGroupSizeChanges(options)
|
||||
for _, option := range options {
|
||||
GroupSizeChange := expanderOptionToGroupSizeChange(option)
|
||||
if GroupSizeChange == r.optionToChoose {
|
||||
return &option
|
||||
}
|
||||
}
|
||||
assert.Fail(r.t, "did not find expansionOptionToChoose %s", r.optionToChoose)
|
||||
return nil
|
||||
}
|
||||
|
||||
func expanderOptionsToGroupSizeChanges(options []expander.Option) []GroupSizeChange {
|
||||
groupSizeChanges := make([]GroupSizeChange, 0, len(options))
|
||||
for _, option := range options {
|
||||
GroupSizeChange := expanderOptionToGroupSizeChange(option)
|
||||
groupSizeChanges = append(groupSizeChanges, GroupSizeChange)
|
||||
}
|
||||
return groupSizeChanges
|
||||
}
|
||||
|
||||
func expanderOptionToGroupSizeChange(option expander.Option) GroupSizeChange {
|
||||
groupName := option.NodeGroup.Id()
|
||||
groupSizeIncrement := option.NodeCount
|
||||
scaleUpOption := GroupSizeChange{GroupName: groupName, SizeChange: groupSizeIncrement}
|
||||
return scaleUpOption
|
||||
}
|
||||
|
||||
func runSimpleScaleUpTest(t *testing.T, config *ScaleTestConfig) *ScaleTestResults {
|
||||
expandedGroups := make(chan GroupSizeChange, 10)
|
||||
now := time.Now()
|
||||
|
||||
groups := make(map[string][]*apiv1.Node)
|
||||
nodes := make([]*apiv1.Node, 0, len(config.Nodes))
|
||||
for _, n := range config.Nodes {
|
||||
node := BuildTestNode(n.Name, n.Cpu, n.Memory)
|
||||
if n.Gpu > 0 {
|
||||
AddGpusToNode(node, n.Gpu)
|
||||
}
|
||||
SetNodeReadyState(node, n.Ready, now.Add(-2*time.Minute))
|
||||
nodes = append(nodes, node)
|
||||
if n.Group != "" {
|
||||
groups[n.Group] = append(groups[n.Group], node)
|
||||
}
|
||||
}
|
||||
|
||||
pods := make([]*apiv1.Pod, 0, len(config.Pods))
|
||||
for _, p := range config.Pods {
|
||||
pod := buildTestPod(p)
|
||||
pods = append(pods, pod)
|
||||
}
|
||||
|
||||
podLister := kube_util.NewTestPodLister(pods)
|
||||
listers := kube_util.NewListerRegistry(nil, nil, podLister, nil, nil, nil, nil, nil, nil, nil)
|
||||
|
||||
provider := testprovider.NewTestCloudProvider(func(nodeGroup string, increase int) error {
|
||||
expandedGroups <- GroupSizeChange{GroupName: nodeGroup, SizeChange: increase}
|
||||
return nil
|
||||
}, nil)
|
||||
|
||||
for name, nodesInGroup := range groups {
|
||||
provider.AddNodeGroup(name, 1, 10, len(nodesInGroup))
|
||||
for _, n := range nodesInGroup {
|
||||
provider.AddNode(name, n)
|
||||
}
|
||||
}
|
||||
|
||||
resourceLimiter := cloudprovider.NewResourceLimiter(
|
||||
map[string]int64{cloudprovider.ResourceNameCores: config.Options.MinCoresTotal, cloudprovider.ResourceNameMemory: config.Options.MinMemoryTotal},
|
||||
map[string]int64{cloudprovider.ResourceNameCores: config.Options.MaxCoresTotal, cloudprovider.ResourceNameMemory: config.Options.MaxMemoryTotal})
|
||||
provider.SetResourceLimiter(resourceLimiter)
|
||||
|
||||
assert.NotNil(t, provider)
|
||||
|
||||
// Create context with non-random expander strategy.
|
||||
context, err := NewScaleTestAutoscalingContext(config.Options, &fake.Clientset{}, listers, provider, nil, nil)
|
||||
assert.NoError(t, err)
|
||||
|
||||
expander := reportingStrategy{
|
||||
initialNodeConfigs: config.Nodes,
|
||||
optionToChoose: config.ExpansionOptionToChoose,
|
||||
results: &expanderResults{},
|
||||
t: t,
|
||||
}
|
||||
context.ExpanderStrategy = expander
|
||||
|
||||
nodeInfos, _ := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false).Process(&context, nodes, []*appsv1.DaemonSet{}, taints.TaintConfig{}, now)
|
||||
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute))
|
||||
clusterState.UpdateNodes(nodes, nodeInfos, time.Now())
|
||||
|
||||
extraPods := make([]*apiv1.Pod, 0, len(config.ExtraPods))
|
||||
for _, p := range config.ExtraPods {
|
||||
pod := buildTestPod(p)
|
||||
extraPods = append(extraPods, pod)
|
||||
}
|
||||
|
||||
processors := NewTestProcessors(&context)
|
||||
suOrchestrator := New()
|
||||
suOrchestrator.Initialize(&context, processors, clusterState, taints.TaintConfig{})
|
||||
scaleUpStatus, err := suOrchestrator.ScaleUp(extraPods, nodes, []*appsv1.DaemonSet{}, nodeInfos)
|
||||
processors.ScaleUpStatusProcessor.Process(&context, scaleUpStatus)
|
||||
|
||||
assert.NoError(t, err)
|
||||
|
||||
expandedGroup := getGroupSizeChangeFromChan(expandedGroups)
|
||||
var expandedGroupStruct GroupSizeChange
|
||||
if expandedGroup != nil {
|
||||
expandedGroupStruct = *expandedGroup
|
||||
}
|
||||
|
||||
events := []string{}
|
||||
for eventsLeft := true; eventsLeft; {
|
||||
select {
|
||||
case event := <-context.Recorder.(*kube_record.FakeRecorder).Events:
|
||||
events = append(events, event)
|
||||
default:
|
||||
eventsLeft = false
|
||||
}
|
||||
}
|
||||
|
||||
return &ScaleTestResults{
|
||||
ExpansionOptions: expander.results.inputOptions,
|
||||
FinalOption: expandedGroupStruct,
|
||||
ScaleUpStatus: simplifyScaleUpStatus(scaleUpStatus),
|
||||
Events: events,
|
||||
}
|
||||
}
|
||||
|
||||
func simpleNoScaleUpTest(t *testing.T, config *ScaleTestConfig, expectedResults *ScaleTestResults) {
|
||||
results := runSimpleScaleUpTest(t, config)
|
||||
|
||||
assert.Equal(t, GroupSizeChange{}, results.FinalOption)
|
||||
assert.False(t, results.ScaleUpStatus.WasSuccessful())
|
||||
noScaleUpEventSeen := false
|
||||
for _, event := range results.Events {
|
||||
if strings.Contains(event, "NotTriggerScaleUp") {
|
||||
if strings.Contains(event, expectedResults.NoScaleUpReason) {
|
||||
noScaleUpEventSeen = true
|
||||
} else {
|
||||
// Surprisingly useful for debugging.
|
||||
fmt.Println("Event:", event)
|
||||
}
|
||||
}
|
||||
assert.NotRegexp(t, regexp.MustCompile("TriggeredScaleUp"), event)
|
||||
}
|
||||
assert.True(t, noScaleUpEventSeen)
|
||||
assert.ElementsMatch(t, results.ScaleUpStatus.PodsTriggeredScaleUp, expectedResults.ScaleUpStatus.PodsTriggeredScaleUp,
|
||||
"actual and expected triggering pods should be the same")
|
||||
assert.ElementsMatch(t, results.ScaleUpStatus.PodsRemainUnschedulable, expectedResults.ScaleUpStatus.PodsRemainUnschedulable,
|
||||
"actual and expected remaining pods should be the same")
|
||||
assert.ElementsMatch(t, results.ScaleUpStatus.PodsAwaitEvaluation, expectedResults.ScaleUpStatus.PodsAwaitEvaluation,
|
||||
"actual and expected awaiting evaluation pods should be the same")
|
||||
}
|
||||
|
||||
func simpleScaleUpTest(t *testing.T, config *ScaleTestConfig, expectedResults *ScaleTestResults) {
|
||||
results := runSimpleScaleUpTest(t, config)
|
||||
|
||||
assert.NotNil(t, results.FinalOption, "Expected scale up event")
|
||||
assert.Equal(t, expectedResults.FinalOption, results.FinalOption)
|
||||
assert.True(t, results.ScaleUpStatus.WasSuccessful())
|
||||
nodeEventSeen := false
|
||||
for _, event := range results.Events {
|
||||
if strings.Contains(event, "TriggeredScaleUp") && strings.Contains(event, expectedResults.FinalOption.GroupName) {
|
||||
nodeEventSeen = true
|
||||
}
|
||||
if len(expectedResults.ScaleUpStatus.PodsRemainUnschedulable) == 0 {
|
||||
assert.NotRegexp(t, regexp.MustCompile("NotTriggerScaleUp"), event)
|
||||
}
|
||||
}
|
||||
assert.True(t, nodeEventSeen)
|
||||
|
||||
if len(expectedResults.ExpansionOptions) > 0 {
|
||||
// Empty ExpansionOptions means we do not want to do any assertions
|
||||
// on contents of actual scaleUp options
|
||||
|
||||
// Check that option to choose is part of expected options.
|
||||
assert.Contains(t, expectedResults.ExpansionOptions, config.ExpansionOptionToChoose, "final expected expansion option must be in expected expansion options")
|
||||
assert.Contains(t, results.ExpansionOptions, config.ExpansionOptionToChoose, "final expected expansion option must be in expected expansion options")
|
||||
|
||||
assert.ElementsMatch(t, results.ExpansionOptions, expectedResults.ExpansionOptions,
|
||||
"actual and expected expansion options should be the same")
|
||||
}
|
||||
|
||||
assert.ElementsMatch(t, results.ScaleUpStatus.PodsTriggeredScaleUp, expectedResults.ScaleUpStatus.PodsTriggeredScaleUp,
|
||||
"actual and expected triggering pods should be the same")
|
||||
assert.ElementsMatch(t, results.ScaleUpStatus.PodsRemainUnschedulable, expectedResults.ScaleUpStatus.PodsRemainUnschedulable,
|
||||
"actual and expected remaining pods should be the same")
|
||||
assert.ElementsMatch(t, results.ScaleUpStatus.PodsAwaitEvaluation, expectedResults.ScaleUpStatus.PodsAwaitEvaluation,
|
||||
"actual and expected awaiting evaluation pods should be the same")
|
||||
}
|
||||
|
||||
func getGroupSizeChangeFromChan(c chan GroupSizeChange) *GroupSizeChange {
|
||||
select {
|
||||
case val := <-c:
|
||||
return &val
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
return nil
|
||||
GroupTargetSizes: targetSizes,
|
||||
ExpansionOptions: expander.LastInputOptions(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1293,7 +1142,7 @@ func TestCheckDeltaWithinLimits(t *testing.T) {
|
|||
|
||||
func TestAuthErrorHandling(t *testing.T) {
|
||||
metrics.RegisterAll(false)
|
||||
config := &GroupsScaleUpTestConfig{
|
||||
config := &ScaleUpTestConfig{
|
||||
Groups: []NodeGroupConfig{
|
||||
{Name: "ng1", MaxSize: 2},
|
||||
},
|
||||
|
|
@ -1308,12 +1157,12 @@ func TestAuthErrorHandling(t *testing.T) {
|
|||
},
|
||||
Options: &defaultOptions,
|
||||
}
|
||||
results := scaleUpGroups(t, config)
|
||||
results := runSimpleScaleUpTest(t, config)
|
||||
expected := errors.NewAutoscalerError(
|
||||
errors.AutoscalerErrorType("authError"),
|
||||
"failed to increase node group size: auth error",
|
||||
)
|
||||
assert.Equal(t, expected, results.Error)
|
||||
assert.Equal(t, expected, results.ScaleUpError)
|
||||
assertLegacyRegistryEntry(t, "cluster_autoscaler_failed_scale_ups_total{reason=\"authError\"} 1")
|
||||
}
|
||||
|
||||
|
|
@ -1325,7 +1174,6 @@ func assertLegacyRegistryEntry(t *testing.T, entry string) {
|
|||
rr := httptest.NewRecorder()
|
||||
handler := http.HandlerFunc(legacyregistry.Handler().ServeHTTP)
|
||||
handler.ServeHTTP(rr, req)
|
||||
|
||||
// Check that the status code is what we expect.
|
||||
if status := rr.Code; status != http.StatusOK {
|
||||
t.Errorf("handler returned wrong status code: got %v want %v",
|
||||
|
|
|
|||
|
|
@ -22,10 +22,6 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/debuggingsnapshot"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
||||
testcloudprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/clusterstate/utils"
|
||||
|
|
@ -33,7 +29,10 @@ import (
|
|||
"k8s.io/autoscaler/cluster-autoscaler/context"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/podlistprocessor"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/deletiontracker"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/debuggingsnapshot"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/estimator"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/expander"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/expander/random"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/metrics"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/processors"
|
||||
|
|
@ -50,18 +49,17 @@ import (
|
|||
"k8s.io/autoscaler/cluster-autoscaler/processors/status"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/simulator/predicatechecker"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/backoff"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
|
||||
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/labels"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
kube_client "k8s.io/client-go/kubernetes"
|
||||
kube_record "k8s.io/client-go/tools/record"
|
||||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/backoff"
|
||||
)
|
||||
|
||||
// NodeConfig is a node config used in tests
|
||||
|
|
@ -110,28 +108,31 @@ type NodeGroupConfig struct {
|
|||
MaxSize int
|
||||
}
|
||||
|
||||
// GroupsScaleUpTestConfig represents a config of a scale test
|
||||
type GroupsScaleUpTestConfig struct {
|
||||
Groups []NodeGroupConfig
|
||||
Nodes []NodeConfig
|
||||
Pods []PodConfig
|
||||
ExtraPods []PodConfig
|
||||
OnScaleUp testcloudprovider.OnScaleUpFunc
|
||||
Options *config.AutoscalingOptions
|
||||
// ScaleUpTestConfig represents a config of a scale test
|
||||
type ScaleUpTestConfig struct {
|
||||
Groups []NodeGroupConfig
|
||||
Nodes []NodeConfig
|
||||
Pods []PodConfig
|
||||
ExtraPods []PodConfig
|
||||
OnScaleUp testcloudprovider.OnScaleUpFunc
|
||||
ExpansionOptionToChoose *GroupSizeChange
|
||||
Options *config.AutoscalingOptions
|
||||
}
|
||||
|
||||
// GroupsScaleUpTestResult represents a node groups scale up result
|
||||
type GroupsScaleUpTestResult struct {
|
||||
Error errors.AutoscalerError
|
||||
// ScaleUpTestResult represents a node groups scale up result
|
||||
type ScaleUpTestResult struct {
|
||||
ScaleUpError errors.AutoscalerError
|
||||
ScaleUpStatus ScaleUpStatusInfo
|
||||
GroupSizeChanges []GroupSizeChange
|
||||
ExpansionOptions []GroupSizeChange
|
||||
Events []string
|
||||
TargetSizes map[string]int
|
||||
GroupTargetSizes map[string]int
|
||||
}
|
||||
|
||||
// ScaleTestResults contains results of a scale test
|
||||
type ScaleTestResults struct {
|
||||
ExpansionOptions []GroupSizeChange
|
||||
GroupTargetSizes map[string]int
|
||||
FinalOption GroupSizeChange
|
||||
NoScaleUpReason string
|
||||
FinalScaleDowns []string
|
||||
|
|
@ -186,7 +187,8 @@ func NewTestProcessors(context *context.AutoscalingContext) *processors.Autoscal
|
|||
func NewScaleTestAutoscalingContext(
|
||||
options config.AutoscalingOptions, fakeClient kube_client.Interface,
|
||||
listers kube_util.ListerRegistry, provider cloudprovider.CloudProvider,
|
||||
processorCallbacks processor_callbacks.ProcessorCallbacks, debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter) (context.AutoscalingContext, error) {
|
||||
processorCallbacks processor_callbacks.ProcessorCallbacks, debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter,
|
||||
) (context.AutoscalingContext, error) {
|
||||
// Not enough buffer space causes the test to hang without printing any logs.
|
||||
// This is not useful.
|
||||
fakeRecorder := kube_record.NewFakeRecorder(100)
|
||||
|
|
@ -306,8 +308,8 @@ type MockAutoprovisioningNodeGroupListProcessor struct {
|
|||
|
||||
// Process extends the list of node groups
|
||||
func (p *MockAutoprovisioningNodeGroupListProcessor) Process(context *context.AutoscalingContext, nodeGroups []cloudprovider.NodeGroup, nodeInfos map[string]*schedulerframework.NodeInfo,
|
||||
unschedulablePods []*apiv1.Pod) ([]cloudprovider.NodeGroup, map[string]*schedulerframework.NodeInfo, error) {
|
||||
|
||||
unschedulablePods []*apiv1.Pod,
|
||||
) ([]cloudprovider.NodeGroup, map[string]*schedulerframework.NodeInfo, error) {
|
||||
machines, err := context.CloudProvider.GetAvailableMachineTypes()
|
||||
assert.NoError(p.T, err)
|
||||
|
||||
|
|
@ -332,3 +334,67 @@ func NewBackoff() backoff.Backoff {
|
|||
return backoff.NewIdBasedExponentialBackoff(5*time.Minute, /*InitialNodeGroupBackoffDuration*/
|
||||
30*time.Minute /*MaxNodeGroupBackoffDuration*/, 3*time.Hour /*NodeGroupBackoffResetTimeout*/)
|
||||
}
|
||||
|
||||
// To implement expander.Strategy, BestOption method must have a struct receiver.
|
||||
// This prevents it from modifying fields of reportingStrategy, so we need a thin
|
||||
// pointer wrapper for mutable parts.
|
||||
type expanderResults struct {
|
||||
inputOptions []GroupSizeChange
|
||||
}
|
||||
|
||||
// MockReportingStrategy implements expander.Strategy
|
||||
type MockReportingStrategy struct {
|
||||
defaultStrategy expander.Strategy
|
||||
optionToChoose *GroupSizeChange
|
||||
t *testing.T
|
||||
results *expanderResults
|
||||
}
|
||||
|
||||
// NewMockRepotingStrategy creates an expander strategy with reporting and mocking capabilities.
|
||||
func NewMockRepotingStrategy(t *testing.T, optionToChoose *GroupSizeChange) *MockReportingStrategy {
|
||||
return &MockReportingStrategy{
|
||||
defaultStrategy: random.NewStrategy(),
|
||||
results: &expanderResults{},
|
||||
optionToChoose: optionToChoose,
|
||||
t: t,
|
||||
}
|
||||
}
|
||||
|
||||
// LastInputOptions provides access to expansion options passed as an input in recent strategy execution
|
||||
func (r *MockReportingStrategy) LastInputOptions() []GroupSizeChange {
|
||||
return r.results.inputOptions
|
||||
}
|
||||
|
||||
// BestOption satisfies the Strategy interface. Picks the best option from those passed as an argument.
|
||||
// When parameter optionToChoose is defined, it's picked as the best one.
|
||||
// Otherwise, random option is used.
|
||||
func (r *MockReportingStrategy) BestOption(options []expander.Option, nodeInfo map[string]*schedulerframework.NodeInfo) *expander.Option {
|
||||
r.results.inputOptions = expanderOptionsToGroupSizeChanges(options)
|
||||
if r.optionToChoose == nil {
|
||||
return r.defaultStrategy.BestOption(options, nodeInfo)
|
||||
}
|
||||
for _, option := range options {
|
||||
groupSizeChange := expanderOptionToGroupSizeChange(option)
|
||||
if groupSizeChange == *r.optionToChoose {
|
||||
return &option
|
||||
}
|
||||
}
|
||||
assert.Fail(r.t, "did not find expansionOptionToChoose %+v", r.optionToChoose)
|
||||
return nil
|
||||
}
|
||||
|
||||
func expanderOptionsToGroupSizeChanges(options []expander.Option) []GroupSizeChange {
|
||||
groupSizeChanges := make([]GroupSizeChange, 0, len(options))
|
||||
for _, option := range options {
|
||||
groupSizeChange := expanderOptionToGroupSizeChange(option)
|
||||
groupSizeChanges = append(groupSizeChanges, groupSizeChange)
|
||||
}
|
||||
return groupSizeChanges
|
||||
}
|
||||
|
||||
func expanderOptionToGroupSizeChange(option expander.Option) GroupSizeChange {
|
||||
groupName := option.NodeGroup.Id()
|
||||
groupSizeIncrement := option.NodeCount
|
||||
scaleUpOption := GroupSizeChange{GroupName: groupName, SizeChange: groupSizeIncrement}
|
||||
return scaleUpOption
|
||||
}
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ var (
|
|||
maxTotalUnreadyPercentage = flag.Float64("max-total-unready-percentage", 45, "Maximum percentage of unready nodes in the cluster. After this is exceeded, CA halts operations")
|
||||
okTotalUnreadyCount = flag.Int("ok-total-unready-count", 3, "Number of allowed unready nodes, irrespective of max-total-unready-percentage")
|
||||
scaleUpFromZero = flag.Bool("scale-up-from-zero", true, "Should CA scale up when there are 0 ready nodes.")
|
||||
parallelScaleUp = flag.Bool("parallel-scale-up", false, "Whether to allow parallel node groups scale up")
|
||||
parallelScaleUp = flag.Bool("parallel-scale-up", false, "Whether to allow parallel node groups scale up. Experimental: may not work on some cloud providers, enable at your own risk.")
|
||||
maxNodeProvisionTime = flag.Duration("max-node-provision-time", 15*time.Minute, "The default maximum time CA waits for node to be provisioned - the value can be overridden per node group")
|
||||
maxPodEvictionTime = flag.Duration("max-pod-eviction-time", 2*time.Minute, "Maximum time CA tries to evict a pod before giving up")
|
||||
nodeGroupsFlag = multiStringFlag(
|
||||
|
|
|
|||
Loading…
Reference in New Issue