feature: use multi factor for overcommit plugin

Signed-off-by: googs1025 <googs1025@gmail.com>
This commit is contained in:
googs1025 2024-08-01 17:14:21 +08:00
parent ab23490e89
commit 14190b2f84
4 changed files with 295 additions and 24 deletions

View File

@ -296,6 +296,35 @@ func (r *Resource) Multi(ratio float64) *Resource {
return r
}
// ScaleResourcesWithRatios multiples the resource with ratio provided for CPU, Memory, and ScalarResources
// if ratio is nil, use default ratio
func (r *Resource) ScaleResourcesWithRatios(ratio map[string]float64, defaultRatio float64) *Resource {
// if ratio is nil, use default ratio
if ratio == nil {
return r.Multi(defaultRatio)
}
// apply ratio to CPU, Memory, and ScalarResources
// if ratio is provided, use ratio if exists,
// otherwise use default ratio
applyRatio := func(resource *float64, resourceName string) {
if ratioValue, ok := ratio[resourceName]; ok {
*resource *= ratioValue
} else {
*resource *= defaultRatio
}
}
applyRatio(&r.MilliCPU, string(v1.ResourceCPU))
applyRatio(&r.Memory, string(v1.ResourceMemory))
for rName, rQuant := range r.ScalarResources {
applyRatio(&rQuant, string(rName))
r.ScalarResources[rName] = rQuant
}
return r
}
// SetMaxResource compares with ResourceList and takes max value for each Resource.
func (r *Resource) SetMaxResource(rr *Resource) {
if r == nil || rr == nil {

View File

@ -147,6 +147,60 @@ func TestSetMaxResource(t *testing.T) {
}
}
func TestScaleResourcesWithRatios(t *testing.T) {
tests := []struct {
name string
inputRatio map[string]float64
defaultRatio float64
resource *Resource
expected *Resource
}{
{
name: "scale with ratio",
inputRatio: map[string]float64{
"overcommit-factor": 1.2,
"cpu": 1.5,
"memory": 1.5,
"ephemeral-storage": 1.2,
"nvidia.com/gpu": 1.0,
},
resource: &Resource{
MilliCPU: 4000,
Memory: 2000,
ScalarResources: map[v1.ResourceName]float64{"ephemeral-storage": 1000, "nvidia.com/gpu": 8},
},
defaultRatio: 1.2,
expected: &Resource{
MilliCPU: 6000,
Memory: 3000,
ScalarResources: map[v1.ResourceName]float64{"ephemeral-storage": 1200, "nvidia.com/gpu": 8},
},
},
{
name: "scale with default ratio",
inputRatio: map[string]float64{},
resource: &Resource{
MilliCPU: 4000,
Memory: 2000,
ScalarResources: map[v1.ResourceName]float64{"ephemeral-storage": 1000, "nvidia.com/gpu": 8},
},
defaultRatio: 1.5,
expected: &Resource{
MilliCPU: 6000,
Memory: 3000,
ScalarResources: map[v1.ResourceName]float64{"ephemeral-storage": 1500, "nvidia.com/gpu": 12},
},
},
}
for _, test := range tests {
outputResource := test.resource.ScaleResourcesWithRatios(test.inputRatio, test.defaultRatio)
if !equality.Semantic.DeepEqual(test.expected, outputResource) {
t.Errorf("expected: %#v, got: %#v", test.expected, outputResource)
}
}
}
func TestIsZero(t *testing.T) {
tests := []struct {
resource *Resource

View File

@ -17,6 +17,8 @@ limitations under the License.
package overcommit
import (
"strings"
v1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
@ -32,28 +34,51 @@ const (
// overCommitFactor is resource overCommit factor for enqueue action
// It determines the number of `pending` pods that the scheduler will tolerate
// when the resources of the cluster is insufficient
// This field is used as the default key in factorMaps
overCommitFactor = "overcommit-factor"
// defaultOverCommitFactor defines the default overCommit resource factor for enqueue action
defaultOverCommitFactor = 1.2
)
const (
// overCommitFactorPrefix is the prefix of resource overCommit factor
// We use this prefix to segment the rules for custom resources
// in the configuration file.
overCommitFactorPrefix = "overcommit-factor."
)
// overcommitFactors defines the resource overCommit factors
type overcommitFactors struct {
// factorMaps defines the resource overCommit factors
// key: resource, example: "cpu", "memory", "ephemeral-storage", "nvidia.com/gpu"
// value: overCommit factors
// when initializing, we will store a default value into this map
// key: "overcommit-factor", value: defaultOverCommitFactor
factorMaps map[string]float64
}
type overcommitPlugin struct {
// Arguments given for the plugin
pluginArguments framework.Arguments
totalResource *api.Resource
idleResource *api.Resource
inqueueResource *api.Resource
overCommitFactor float64
// pluginArguments Arguments given for the plugin
pluginArguments framework.Arguments
totalResource *api.Resource
idleResource *api.Resource
inqueueResource *api.Resource
// overCommitFactor is the different resource overCommit factors
overCommitFactors *overcommitFactors
}
// New function returns overcommit plugin object
func New(arguments framework.Arguments) framework.Plugin {
return &overcommitPlugin{
pluginArguments: arguments,
totalResource: api.EmptyResource(),
idleResource: api.EmptyResource(),
inqueueResource: api.EmptyResource(),
overCommitFactor: defaultOverCommitFactor,
pluginArguments: arguments,
totalResource: api.EmptyResource(),
idleResource: api.EmptyResource(),
inqueueResource: api.EmptyResource(),
overCommitFactors: &overcommitFactors{
factorMaps: map[string]float64{
overCommitFactor: defaultOverCommitFactor,
},
},
}
}
@ -62,25 +87,28 @@ func (op *overcommitPlugin) Name() string {
}
/*
User should give overcommit-factor through overcommit plugin arguments as format below:
User should give overcommit factors through overcommit plugin arguments as format below:
Example:
actions: "enqueue, allocate, backfill"
tiers:
- plugins:
- name: overcommit
arguments:
overcommit-factor: 1.0
overcommit-factor.cpu: 1.2
overcommit-factor.memory: 1.0
overcommit-factor: 1.2
*/
func (op *overcommitPlugin) OnSessionOpen(ssn *framework.Session) {
klog.V(5).Infof("Enter overcommit plugin ...")
defer klog.V(5).Infof("Leaving overcommit plugin.")
op.pluginArguments.GetFloat64(&op.overCommitFactor, overCommitFactor)
if op.overCommitFactor < 1.0 {
klog.Warningf("Invalid input %f for overcommit-factor, reason: overcommit-factor cannot be less than 1,"+
" using default value: %f.", op.overCommitFactor, defaultOverCommitFactor)
op.overCommitFactor = defaultOverCommitFactor
}
// parse plugin arguments
op.parse()
// validate plugin arguments
op.validate()
op.totalResource.Add(ssn.TotalResource)
// calculate idle resources of total cluster, overcommit resources included
@ -88,7 +116,9 @@ func (op *overcommitPlugin) OnSessionOpen(ssn *framework.Session) {
for _, node := range ssn.Nodes {
used.Add(node.Used)
}
op.idleResource = op.totalResource.Clone().Multi(op.overCommitFactor).SubWithoutAssert(used)
op.idleResource = op.totalResource.Clone().
ScaleResourcesWithRatios(op.overCommitFactors.factorMaps, op.overCommitFactors.factorMaps[overCommitFactor]).SubWithoutAssert(used)
for _, job := range ssn.Jobs {
// calculate inqueue job resources
@ -144,3 +174,58 @@ func (op *overcommitPlugin) OnSessionClose(ssn *framework.Session) {
op.idleResource = nil
op.inqueueResource = nil
}
// parseFactor iterates through the arguments map and extracts values based on the keys with specific prefixes.
// If a key matches overCommitFactor, its corresponding value is directly added to the target map.
// For keys starting with overCommitFactorPrefix,
// the suffix after the prefix is extracted and used as the key in the target map along with the corresponding value.
func (op *overcommitPlugin) parseFactor(arguments framework.Arguments, target map[string]float64) {
for key, value := range arguments {
switch v := value.(type) {
case float64:
if key == overCommitFactor {
// If the key is equal to overCommitFactor,
// directly add the value to the target map
target[overCommitFactor] = v
}
if strings.HasPrefix(key, overCommitFactorPrefix) {
// If the key starts with overCommitFactorPrefix
// Extract the suffix after the prefix
// Update target map with the extracted suffix and corresponding value
suffix := strings.TrimPrefix(key, overCommitFactorPrefix)
target[suffix] = v
}
case int:
// Handle int values by converting them to float64
floatValue := float64(v)
if key == overCommitFactor {
target[overCommitFactor] = floatValue
}
if strings.HasPrefix(key, overCommitFactorPrefix) {
suffix := strings.TrimPrefix(key, overCommitFactorPrefix)
target[suffix] = floatValue
}
default:
// we should log the unexpected value type here to prevent panics
klog.Warningf("Unexpected value type for key %s: %T\n", key, value)
}
}
}
func (op *overcommitPlugin) parse() {
op.parseFactor(op.pluginArguments, op.overCommitFactors.factorMaps)
}
// validate is used to validate the input parameters,
// and if the input parameters are invalid, use the default value.
func (op *overcommitPlugin) validate() {
for k, v := range op.overCommitFactors.factorMaps {
if v < 1.0 {
klog.Warningf("Invalid input %f for %v overcommit factor, reason: %v overcommit factor cannot be less than 1,"+
" using default value: %f.", v, k, k, defaultOverCommitFactor)
op.overCommitFactors.factorMaps[k] = defaultOverCommitFactor
}
}
}

View File

@ -1,8 +1,10 @@
package overcommit
import (
"sort"
"testing"
"github.com/google/go-cmp/cmp"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
@ -16,7 +18,7 @@ import (
)
func TestOvercommitPlugin(t *testing.T) {
n1 := util.BuildNode("n1", api.BuildResourceList("2", "4Gi"), make(map[string]string))
n1 := util.BuildNode("n1", api.BuildResourceList("2", "4Gi", api.ScalarResource{Name: "ephemeral-storage", Value: "32Gi"}, api.ScalarResource{Name: "nvidia.com/gpu", Value: "8"}), make(map[string]string))
n2 := util.BuildNode("n2", api.BuildResourceList("4", "16Gi"), make(map[string]string))
hugeResource := api.BuildResourceList("20000m", "20G")
normalResource := api.BuildResourceList("2000m", "2G")
@ -41,7 +43,7 @@ func TestOvercommitPlugin(t *testing.T) {
}{
{
TestCommonStruct: uthelper.TestCommonStruct{
Name: "overCommitFactor is more than 0",
Name: "overCommitFactor is more than 1",
Plugins: map[string]framework.PluginBuilder{PluginName: New},
PodGroups: []*schedulingv1.PodGroup{pg1},
Queues: []*schedulingv1.Queue{queue1},
@ -54,7 +56,7 @@ func TestOvercommitPlugin(t *testing.T) {
},
{
TestCommonStruct: uthelper.TestCommonStruct{
Name: "overCommitFactor is less than 0",
Name: "overCommitFactor is less than 1",
Plugins: map[string]framework.PluginBuilder{PluginName: New},
PodGroups: []*schedulingv1.PodGroup{pg1},
Queues: []*schedulingv1.Queue{queue1},
@ -91,6 +93,33 @@ func TestOvercommitPlugin(t *testing.T) {
},
expectedEnqueueAble: true,
},
{
TestCommonStruct: uthelper.TestCommonStruct{
Name: "overCommitFactor is more than 1 with different overcommit factors",
Plugins: map[string]framework.PluginBuilder{PluginName: New},
PodGroups: []*schedulingv1.PodGroup{pg1},
Queues: []*schedulingv1.Queue{queue1},
Nodes: []*v1.Node{n1, n2},
},
arguments: framework.Arguments{
"overcommit-factor.cpu": 1.3,
"overcommit-factor.memory": 1.4,
"overcommit-factor.ephemeral-storage": 1.4,
"overcommit-factor.nvidia.com/gpu": 1.3,
},
expectedEnqueueAble: true,
},
{
TestCommonStruct: uthelper.TestCommonStruct{
Name: "overCommitFactor is not set",
Plugins: map[string]framework.PluginBuilder{PluginName: New},
PodGroups: []*schedulingv1.PodGroup{pg3},
Queues: []*schedulingv1.Queue{queue2},
Nodes: []*v1.Node{n1, n2},
},
arguments: framework.Arguments{},
expectedEnqueueAble: true,
},
}
for _, test := range tests {
@ -118,5 +147,79 @@ func TestOvercommitPlugin(t *testing.T) {
}
})
}
}
func TestParseFactor(t *testing.T) {
tests := []struct {
name string
arguments framework.Arguments
expectedMaps map[string]float64
}{
{
name: "overCommitFactor with float64 type",
arguments: framework.Arguments{
"overcommit-factor.cpu": 1.3,
"overcommit-factor.memory": 1.4,
"overcommit-factor.ephemeral-storage": 1.4,
"overcommit-factor.nvidia.com/gpu": 1.3,
},
expectedMaps: map[string]float64{
// default value
"overcommit-factor": 1.2,
"cpu": 1.3,
"memory": 1.4,
"ephemeral-storage": 1.4,
"nvidia.com/gpu": 1.3,
},
},
{
name: "overCommitFactor with int type",
arguments: framework.Arguments{
"overcommit-factor.cpu": 2,
"overcommit-factor.memory": 2,
"overcommit-factor.ephemeral-storage": 2,
"overcommit-factor.nvidia.com/gpu": 2,
},
expectedMaps: map[string]float64{
// default value
"overcommit-factor": 1.2,
"cpu": 2.0,
"memory": 2.0,
"ephemeral-storage": 2.0,
"nvidia.com/gpu": 2.0,
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
op := New(test.arguments).(*overcommitPlugin)
op.parseFactor(test.arguments, op.overCommitFactors.factorMaps)
// Sort expected and resulting maps by keys for comparison
expectedKeys := sortMapByKey(test.expectedMaps)
resultKeys := sortMapByKey(op.overCommitFactors.factorMaps)
// Check if the sorted keys match
if diff := cmp.Diff(expectedKeys, resultKeys); diff != "" {
t.Errorf("sorted keys mismatch: %s", diff)
}
// Check if the values match after sorting by keys
for _, key := range expectedKeys {
if test.expectedMaps[key] != op.overCommitFactors.factorMaps[key] {
t.Errorf("value mismatch for key %s: expected %f, got %f",
key, test.expectedMaps[key], op.overCommitFactors.factorMaps[key])
}
}
})
}
}
func sortMapByKey(m map[string]float64) []string {
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
sort.Strings(keys)
return keys
}