feature: use multi factor for overcommit plugin
Signed-off-by: googs1025 <googs1025@gmail.com>
This commit is contained in:
parent
ab23490e89
commit
14190b2f84
|
|
@ -296,6 +296,35 @@ func (r *Resource) Multi(ratio float64) *Resource {
|
|||
return r
|
||||
}
|
||||
|
||||
// ScaleResourcesWithRatios multiples the resource with ratio provided for CPU, Memory, and ScalarResources
|
||||
// if ratio is nil, use default ratio
|
||||
func (r *Resource) ScaleResourcesWithRatios(ratio map[string]float64, defaultRatio float64) *Resource {
|
||||
// if ratio is nil, use default ratio
|
||||
if ratio == nil {
|
||||
return r.Multi(defaultRatio)
|
||||
}
|
||||
|
||||
// apply ratio to CPU, Memory, and ScalarResources
|
||||
// if ratio is provided, use ratio if exists,
|
||||
// otherwise use default ratio
|
||||
applyRatio := func(resource *float64, resourceName string) {
|
||||
if ratioValue, ok := ratio[resourceName]; ok {
|
||||
*resource *= ratioValue
|
||||
} else {
|
||||
*resource *= defaultRatio
|
||||
}
|
||||
}
|
||||
|
||||
applyRatio(&r.MilliCPU, string(v1.ResourceCPU))
|
||||
applyRatio(&r.Memory, string(v1.ResourceMemory))
|
||||
|
||||
for rName, rQuant := range r.ScalarResources {
|
||||
applyRatio(&rQuant, string(rName))
|
||||
r.ScalarResources[rName] = rQuant
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// SetMaxResource compares with ResourceList and takes max value for each Resource.
|
||||
func (r *Resource) SetMaxResource(rr *Resource) {
|
||||
if r == nil || rr == nil {
|
||||
|
|
|
|||
|
|
@ -147,6 +147,60 @@ func TestSetMaxResource(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestScaleResourcesWithRatios(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
inputRatio map[string]float64
|
||||
defaultRatio float64
|
||||
resource *Resource
|
||||
expected *Resource
|
||||
}{
|
||||
{
|
||||
name: "scale with ratio",
|
||||
inputRatio: map[string]float64{
|
||||
"overcommit-factor": 1.2,
|
||||
"cpu": 1.5,
|
||||
"memory": 1.5,
|
||||
"ephemeral-storage": 1.2,
|
||||
"nvidia.com/gpu": 1.0,
|
||||
},
|
||||
resource: &Resource{
|
||||
MilliCPU: 4000,
|
||||
Memory: 2000,
|
||||
ScalarResources: map[v1.ResourceName]float64{"ephemeral-storage": 1000, "nvidia.com/gpu": 8},
|
||||
},
|
||||
defaultRatio: 1.2,
|
||||
expected: &Resource{
|
||||
MilliCPU: 6000,
|
||||
Memory: 3000,
|
||||
ScalarResources: map[v1.ResourceName]float64{"ephemeral-storage": 1200, "nvidia.com/gpu": 8},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "scale with default ratio",
|
||||
inputRatio: map[string]float64{},
|
||||
resource: &Resource{
|
||||
MilliCPU: 4000,
|
||||
Memory: 2000,
|
||||
ScalarResources: map[v1.ResourceName]float64{"ephemeral-storage": 1000, "nvidia.com/gpu": 8},
|
||||
},
|
||||
defaultRatio: 1.5,
|
||||
expected: &Resource{
|
||||
MilliCPU: 6000,
|
||||
Memory: 3000,
|
||||
ScalarResources: map[v1.ResourceName]float64{"ephemeral-storage": 1500, "nvidia.com/gpu": 12},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
outputResource := test.resource.ScaleResourcesWithRatios(test.inputRatio, test.defaultRatio)
|
||||
if !equality.Semantic.DeepEqual(test.expected, outputResource) {
|
||||
t.Errorf("expected: %#v, got: %#v", test.expected, outputResource)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsZero(t *testing.T) {
|
||||
tests := []struct {
|
||||
resource *Resource
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ limitations under the License.
|
|||
package overcommit
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
|
|
@ -32,28 +34,51 @@ const (
|
|||
// overCommitFactor is resource overCommit factor for enqueue action
|
||||
// It determines the number of `pending` pods that the scheduler will tolerate
|
||||
// when the resources of the cluster is insufficient
|
||||
// This field is used as the default key in factorMaps
|
||||
overCommitFactor = "overcommit-factor"
|
||||
// defaultOverCommitFactor defines the default overCommit resource factor for enqueue action
|
||||
defaultOverCommitFactor = 1.2
|
||||
)
|
||||
|
||||
const (
|
||||
// overCommitFactorPrefix is the prefix of resource overCommit factor
|
||||
// We use this prefix to segment the rules for custom resources
|
||||
// in the configuration file.
|
||||
overCommitFactorPrefix = "overcommit-factor."
|
||||
)
|
||||
|
||||
// overcommitFactors defines the resource overCommit factors
|
||||
type overcommitFactors struct {
|
||||
// factorMaps defines the resource overCommit factors
|
||||
// key: resource, example: "cpu", "memory", "ephemeral-storage", "nvidia.com/gpu"
|
||||
// value: overCommit factors
|
||||
// when initializing, we will store a default value into this map
|
||||
// key: "overcommit-factor", value: defaultOverCommitFactor
|
||||
factorMaps map[string]float64
|
||||
}
|
||||
|
||||
type overcommitPlugin struct {
|
||||
// Arguments given for the plugin
|
||||
pluginArguments framework.Arguments
|
||||
totalResource *api.Resource
|
||||
idleResource *api.Resource
|
||||
inqueueResource *api.Resource
|
||||
overCommitFactor float64
|
||||
// pluginArguments Arguments given for the plugin
|
||||
pluginArguments framework.Arguments
|
||||
totalResource *api.Resource
|
||||
idleResource *api.Resource
|
||||
inqueueResource *api.Resource
|
||||
// overCommitFactor is the different resource overCommit factors
|
||||
overCommitFactors *overcommitFactors
|
||||
}
|
||||
|
||||
// New function returns overcommit plugin object
|
||||
func New(arguments framework.Arguments) framework.Plugin {
|
||||
return &overcommitPlugin{
|
||||
pluginArguments: arguments,
|
||||
totalResource: api.EmptyResource(),
|
||||
idleResource: api.EmptyResource(),
|
||||
inqueueResource: api.EmptyResource(),
|
||||
overCommitFactor: defaultOverCommitFactor,
|
||||
pluginArguments: arguments,
|
||||
totalResource: api.EmptyResource(),
|
||||
idleResource: api.EmptyResource(),
|
||||
inqueueResource: api.EmptyResource(),
|
||||
overCommitFactors: &overcommitFactors{
|
||||
factorMaps: map[string]float64{
|
||||
overCommitFactor: defaultOverCommitFactor,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -62,25 +87,28 @@ func (op *overcommitPlugin) Name() string {
|
|||
}
|
||||
|
||||
/*
|
||||
User should give overcommit-factor through overcommit plugin arguments as format below:
|
||||
User should give overcommit factors through overcommit plugin arguments as format below:
|
||||
|
||||
Example:
|
||||
|
||||
actions: "enqueue, allocate, backfill"
|
||||
tiers:
|
||||
- plugins:
|
||||
- name: overcommit
|
||||
arguments:
|
||||
overcommit-factor: 1.0
|
||||
overcommit-factor.cpu: 1.2
|
||||
overcommit-factor.memory: 1.0
|
||||
overcommit-factor: 1.2
|
||||
*/
|
||||
func (op *overcommitPlugin) OnSessionOpen(ssn *framework.Session) {
|
||||
klog.V(5).Infof("Enter overcommit plugin ...")
|
||||
defer klog.V(5).Infof("Leaving overcommit plugin.")
|
||||
|
||||
op.pluginArguments.GetFloat64(&op.overCommitFactor, overCommitFactor)
|
||||
if op.overCommitFactor < 1.0 {
|
||||
klog.Warningf("Invalid input %f for overcommit-factor, reason: overcommit-factor cannot be less than 1,"+
|
||||
" using default value: %f.", op.overCommitFactor, defaultOverCommitFactor)
|
||||
op.overCommitFactor = defaultOverCommitFactor
|
||||
}
|
||||
// parse plugin arguments
|
||||
op.parse()
|
||||
|
||||
// validate plugin arguments
|
||||
op.validate()
|
||||
|
||||
op.totalResource.Add(ssn.TotalResource)
|
||||
// calculate idle resources of total cluster, overcommit resources included
|
||||
|
|
@ -88,7 +116,9 @@ func (op *overcommitPlugin) OnSessionOpen(ssn *framework.Session) {
|
|||
for _, node := range ssn.Nodes {
|
||||
used.Add(node.Used)
|
||||
}
|
||||
op.idleResource = op.totalResource.Clone().Multi(op.overCommitFactor).SubWithoutAssert(used)
|
||||
|
||||
op.idleResource = op.totalResource.Clone().
|
||||
ScaleResourcesWithRatios(op.overCommitFactors.factorMaps, op.overCommitFactors.factorMaps[overCommitFactor]).SubWithoutAssert(used)
|
||||
|
||||
for _, job := range ssn.Jobs {
|
||||
// calculate inqueue job resources
|
||||
|
|
@ -144,3 +174,58 @@ func (op *overcommitPlugin) OnSessionClose(ssn *framework.Session) {
|
|||
op.idleResource = nil
|
||||
op.inqueueResource = nil
|
||||
}
|
||||
|
||||
// parseFactor iterates through the arguments map and extracts values based on the keys with specific prefixes.
|
||||
// If a key matches overCommitFactor, its corresponding value is directly added to the target map.
|
||||
// For keys starting with overCommitFactorPrefix,
|
||||
// the suffix after the prefix is extracted and used as the key in the target map along with the corresponding value.
|
||||
func (op *overcommitPlugin) parseFactor(arguments framework.Arguments, target map[string]float64) {
|
||||
for key, value := range arguments {
|
||||
switch v := value.(type) {
|
||||
case float64:
|
||||
if key == overCommitFactor {
|
||||
// If the key is equal to overCommitFactor,
|
||||
// directly add the value to the target map
|
||||
target[overCommitFactor] = v
|
||||
}
|
||||
|
||||
if strings.HasPrefix(key, overCommitFactorPrefix) {
|
||||
// If the key starts with overCommitFactorPrefix
|
||||
// Extract the suffix after the prefix
|
||||
// Update target map with the extracted suffix and corresponding value
|
||||
suffix := strings.TrimPrefix(key, overCommitFactorPrefix)
|
||||
target[suffix] = v
|
||||
}
|
||||
case int:
|
||||
// Handle int values by converting them to float64
|
||||
floatValue := float64(v)
|
||||
if key == overCommitFactor {
|
||||
target[overCommitFactor] = floatValue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(key, overCommitFactorPrefix) {
|
||||
suffix := strings.TrimPrefix(key, overCommitFactorPrefix)
|
||||
target[suffix] = floatValue
|
||||
}
|
||||
default:
|
||||
// we should log the unexpected value type here to prevent panics
|
||||
klog.Warningf("Unexpected value type for key %s: %T\n", key, value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (op *overcommitPlugin) parse() {
|
||||
op.parseFactor(op.pluginArguments, op.overCommitFactors.factorMaps)
|
||||
}
|
||||
|
||||
// validate is used to validate the input parameters,
|
||||
// and if the input parameters are invalid, use the default value.
|
||||
func (op *overcommitPlugin) validate() {
|
||||
for k, v := range op.overCommitFactors.factorMaps {
|
||||
if v < 1.0 {
|
||||
klog.Warningf("Invalid input %f for %v overcommit factor, reason: %v overcommit factor cannot be less than 1,"+
|
||||
" using default value: %f.", v, k, k, defaultOverCommitFactor)
|
||||
op.overCommitFactors.factorMaps[k] = defaultOverCommitFactor
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
package overcommit
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/equality"
|
||||
|
||||
|
|
@ -16,7 +18,7 @@ import (
|
|||
)
|
||||
|
||||
func TestOvercommitPlugin(t *testing.T) {
|
||||
n1 := util.BuildNode("n1", api.BuildResourceList("2", "4Gi"), make(map[string]string))
|
||||
n1 := util.BuildNode("n1", api.BuildResourceList("2", "4Gi", api.ScalarResource{Name: "ephemeral-storage", Value: "32Gi"}, api.ScalarResource{Name: "nvidia.com/gpu", Value: "8"}), make(map[string]string))
|
||||
n2 := util.BuildNode("n2", api.BuildResourceList("4", "16Gi"), make(map[string]string))
|
||||
hugeResource := api.BuildResourceList("20000m", "20G")
|
||||
normalResource := api.BuildResourceList("2000m", "2G")
|
||||
|
|
@ -41,7 +43,7 @@ func TestOvercommitPlugin(t *testing.T) {
|
|||
}{
|
||||
{
|
||||
TestCommonStruct: uthelper.TestCommonStruct{
|
||||
Name: "overCommitFactor is more than 0",
|
||||
Name: "overCommitFactor is more than 1",
|
||||
Plugins: map[string]framework.PluginBuilder{PluginName: New},
|
||||
PodGroups: []*schedulingv1.PodGroup{pg1},
|
||||
Queues: []*schedulingv1.Queue{queue1},
|
||||
|
|
@ -54,7 +56,7 @@ func TestOvercommitPlugin(t *testing.T) {
|
|||
},
|
||||
{
|
||||
TestCommonStruct: uthelper.TestCommonStruct{
|
||||
Name: "overCommitFactor is less than 0",
|
||||
Name: "overCommitFactor is less than 1",
|
||||
Plugins: map[string]framework.PluginBuilder{PluginName: New},
|
||||
PodGroups: []*schedulingv1.PodGroup{pg1},
|
||||
Queues: []*schedulingv1.Queue{queue1},
|
||||
|
|
@ -91,6 +93,33 @@ func TestOvercommitPlugin(t *testing.T) {
|
|||
},
|
||||
expectedEnqueueAble: true,
|
||||
},
|
||||
{
|
||||
TestCommonStruct: uthelper.TestCommonStruct{
|
||||
Name: "overCommitFactor is more than 1 with different overcommit factors",
|
||||
Plugins: map[string]framework.PluginBuilder{PluginName: New},
|
||||
PodGroups: []*schedulingv1.PodGroup{pg1},
|
||||
Queues: []*schedulingv1.Queue{queue1},
|
||||
Nodes: []*v1.Node{n1, n2},
|
||||
},
|
||||
arguments: framework.Arguments{
|
||||
"overcommit-factor.cpu": 1.3,
|
||||
"overcommit-factor.memory": 1.4,
|
||||
"overcommit-factor.ephemeral-storage": 1.4,
|
||||
"overcommit-factor.nvidia.com/gpu": 1.3,
|
||||
},
|
||||
expectedEnqueueAble: true,
|
||||
},
|
||||
{
|
||||
TestCommonStruct: uthelper.TestCommonStruct{
|
||||
Name: "overCommitFactor is not set",
|
||||
Plugins: map[string]framework.PluginBuilder{PluginName: New},
|
||||
PodGroups: []*schedulingv1.PodGroup{pg3},
|
||||
Queues: []*schedulingv1.Queue{queue2},
|
||||
Nodes: []*v1.Node{n1, n2},
|
||||
},
|
||||
arguments: framework.Arguments{},
|
||||
expectedEnqueueAble: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
|
@ -118,5 +147,79 @@ func TestOvercommitPlugin(t *testing.T) {
|
|||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestParseFactor(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
arguments framework.Arguments
|
||||
expectedMaps map[string]float64
|
||||
}{
|
||||
{
|
||||
name: "overCommitFactor with float64 type",
|
||||
arguments: framework.Arguments{
|
||||
"overcommit-factor.cpu": 1.3,
|
||||
"overcommit-factor.memory": 1.4,
|
||||
"overcommit-factor.ephemeral-storage": 1.4,
|
||||
"overcommit-factor.nvidia.com/gpu": 1.3,
|
||||
},
|
||||
expectedMaps: map[string]float64{
|
||||
// default value
|
||||
"overcommit-factor": 1.2,
|
||||
"cpu": 1.3,
|
||||
"memory": 1.4,
|
||||
"ephemeral-storage": 1.4,
|
||||
"nvidia.com/gpu": 1.3,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "overCommitFactor with int type",
|
||||
arguments: framework.Arguments{
|
||||
"overcommit-factor.cpu": 2,
|
||||
"overcommit-factor.memory": 2,
|
||||
"overcommit-factor.ephemeral-storage": 2,
|
||||
"overcommit-factor.nvidia.com/gpu": 2,
|
||||
},
|
||||
expectedMaps: map[string]float64{
|
||||
// default value
|
||||
"overcommit-factor": 1.2,
|
||||
"cpu": 2.0,
|
||||
"memory": 2.0,
|
||||
"ephemeral-storage": 2.0,
|
||||
"nvidia.com/gpu": 2.0,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
op := New(test.arguments).(*overcommitPlugin)
|
||||
op.parseFactor(test.arguments, op.overCommitFactors.factorMaps)
|
||||
// Sort expected and resulting maps by keys for comparison
|
||||
expectedKeys := sortMapByKey(test.expectedMaps)
|
||||
resultKeys := sortMapByKey(op.overCommitFactors.factorMaps)
|
||||
|
||||
// Check if the sorted keys match
|
||||
if diff := cmp.Diff(expectedKeys, resultKeys); diff != "" {
|
||||
t.Errorf("sorted keys mismatch: %s", diff)
|
||||
}
|
||||
|
||||
// Check if the values match after sorting by keys
|
||||
for _, key := range expectedKeys {
|
||||
if test.expectedMaps[key] != op.overCommitFactors.factorMaps[key] {
|
||||
t.Errorf("value mismatch for key %s: expected %f, got %f",
|
||||
key, test.expectedMaps[key], op.overCommitFactors.factorMaps[key])
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func sortMapByKey(m map[string]float64) []string {
|
||||
keys := make([]string, 0, len(m))
|
||||
for k := range m {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
return keys
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue