336 lines
9.5 KiB
Go
336 lines
9.5 KiB
Go
/*
|
|
Copyright 2018 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package drf
|
|
|
|
import (
|
|
"math"
|
|
|
|
"k8s.io/klog"
|
|
|
|
"volcano.sh/volcano/pkg/scheduler/api"
|
|
"volcano.sh/volcano/pkg/scheduler/api/helpers"
|
|
"volcano.sh/volcano/pkg/scheduler/framework"
|
|
"volcano.sh/volcano/pkg/scheduler/metrics"
|
|
)
|
|
|
|
// PluginName indicates name of volcano scheduler plugin.
|
|
const PluginName = "drf"
|
|
|
|
var shareDelta = 0.000001
|
|
|
|
type drfAttr struct {
|
|
share float64
|
|
dominantResource string
|
|
allocated *api.Resource
|
|
}
|
|
|
|
type drfPlugin struct {
|
|
totalResource *api.Resource
|
|
|
|
// Key is Job ID
|
|
jobAttrs map[api.JobID]*drfAttr
|
|
|
|
// map[namespaceName]->attr
|
|
namespaceOpts map[string]*drfAttr
|
|
|
|
// Arguments given for the plugin
|
|
pluginArguments framework.Arguments
|
|
}
|
|
|
|
// New return drf plugin
|
|
func New(arguments framework.Arguments) framework.Plugin {
|
|
return &drfPlugin{
|
|
totalResource: api.EmptyResource(),
|
|
jobAttrs: map[api.JobID]*drfAttr{},
|
|
namespaceOpts: map[string]*drfAttr{},
|
|
pluginArguments: arguments,
|
|
}
|
|
}
|
|
|
|
func (drf *drfPlugin) Name() string {
|
|
return PluginName
|
|
}
|
|
|
|
// NamespaceOrderEnabled returns the NamespaceOrder for this plugin is enabled in this session or not
|
|
func (drf *drfPlugin) NamespaceOrderEnabled(ssn *framework.Session) bool {
|
|
for _, tier := range ssn.Tiers {
|
|
for _, plugin := range tier.Plugins {
|
|
if plugin.Name != PluginName {
|
|
continue
|
|
}
|
|
return plugin.EnabledNamespaceOrder != nil && *plugin.EnabledNamespaceOrder
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (drf *drfPlugin) OnSessionOpen(ssn *framework.Session) {
|
|
// Prepare scheduling data for this session.
|
|
for _, n := range ssn.Nodes {
|
|
drf.totalResource.Add(n.Allocatable)
|
|
}
|
|
|
|
namespaceOrderEnabled := drf.NamespaceOrderEnabled(ssn)
|
|
|
|
for _, job := range ssn.Jobs {
|
|
attr := &drfAttr{
|
|
allocated: api.EmptyResource(),
|
|
}
|
|
|
|
for status, tasks := range job.TaskStatusIndex {
|
|
if api.AllocatedStatus(status) {
|
|
for _, t := range tasks {
|
|
attr.allocated.Add(t.Resreq)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Calculate the init share of Job
|
|
drf.updateJobShare(job.Namespace, job.Name, attr)
|
|
|
|
drf.jobAttrs[job.UID] = attr
|
|
|
|
if namespaceOrderEnabled {
|
|
nsOpts, found := drf.namespaceOpts[job.Namespace]
|
|
if !found {
|
|
nsOpts = &drfAttr{
|
|
allocated: api.EmptyResource(),
|
|
}
|
|
drf.namespaceOpts[job.Namespace] = nsOpts
|
|
}
|
|
// all task in job should have the same namespace with job
|
|
nsOpts.allocated.Add(attr.allocated)
|
|
drf.updateNamespaceShare(job.Namespace, nsOpts)
|
|
}
|
|
}
|
|
|
|
preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) []*api.TaskInfo {
|
|
var victims []*api.TaskInfo
|
|
|
|
addVictim := func(candidate *api.TaskInfo) {
|
|
victims = append(victims, candidate)
|
|
}
|
|
|
|
if namespaceOrderEnabled {
|
|
// apply the namespace share policy on preemptee firstly
|
|
|
|
lWeight := ssn.NamespaceInfo[api.NamespaceName(preemptor.Namespace)].GetWeight()
|
|
lNsAtt := drf.namespaceOpts[preemptor.Namespace]
|
|
lNsAlloc := lNsAtt.allocated.Clone().Add(preemptor.Resreq)
|
|
_, lNsShare := drf.calculateShare(lNsAlloc, drf.totalResource)
|
|
lNsShareWeighted := lNsShare / float64(lWeight)
|
|
|
|
namespaceAllocation := map[string]*api.Resource{}
|
|
|
|
// undecidedPreemptees means this policy could not judge preemptee is preemptable or not
|
|
// and left it to next policy
|
|
undecidedPreemptees := []*api.TaskInfo{}
|
|
|
|
for _, preemptee := range preemptees {
|
|
if preemptor.Namespace == preemptee.Namespace {
|
|
// policy is disabled when they are in the same namespace
|
|
undecidedPreemptees = append(undecidedPreemptees, preemptee)
|
|
continue
|
|
}
|
|
|
|
// compute the preemptee namespace weighted share after preemption
|
|
nsAllocation, found := namespaceAllocation[preemptee.Namespace]
|
|
if !found {
|
|
rNsAtt := drf.namespaceOpts[preemptee.Namespace]
|
|
nsAllocation = rNsAtt.allocated.Clone()
|
|
namespaceAllocation[preemptee.Namespace] = nsAllocation
|
|
}
|
|
rWeight := ssn.NamespaceInfo[api.NamespaceName(preemptee.Namespace)].GetWeight()
|
|
rNsAlloc := nsAllocation.Sub(preemptee.Resreq)
|
|
_, rNsShare := drf.calculateShare(rNsAlloc, drf.totalResource)
|
|
rNsShareWeighted := rNsShare / float64(rWeight)
|
|
|
|
// to avoid ping pong actions, the preemptee namespace should
|
|
// have the higher weighted share after preemption.
|
|
if lNsShareWeighted < rNsShareWeighted {
|
|
addVictim(preemptee)
|
|
}
|
|
if lNsShareWeighted-rNsShareWeighted > shareDelta {
|
|
continue
|
|
}
|
|
|
|
// equal namespace order leads to judgement of jobOrder
|
|
undecidedPreemptees = append(undecidedPreemptees, preemptee)
|
|
}
|
|
|
|
preemptees = undecidedPreemptees
|
|
}
|
|
|
|
latt := drf.jobAttrs[preemptor.Job]
|
|
lalloc := latt.allocated.Clone().Add(preemptor.Resreq)
|
|
_, ls := drf.calculateShare(lalloc, drf.totalResource)
|
|
|
|
allocations := map[api.JobID]*api.Resource{}
|
|
|
|
for _, preemptee := range preemptees {
|
|
if _, found := allocations[preemptee.Job]; !found {
|
|
ratt := drf.jobAttrs[preemptee.Job]
|
|
allocations[preemptee.Job] = ratt.allocated.Clone()
|
|
}
|
|
ralloc := allocations[preemptee.Job].Sub(preemptee.Resreq)
|
|
_, rs := drf.calculateShare(ralloc, drf.totalResource)
|
|
|
|
if ls < rs || math.Abs(ls-rs) <= shareDelta {
|
|
addVictim(preemptee)
|
|
}
|
|
}
|
|
|
|
klog.V(4).Infof("Victims from DRF plugins are %+v", victims)
|
|
|
|
return victims
|
|
}
|
|
|
|
ssn.AddPreemptableFn(drf.Name(), preemptableFn)
|
|
|
|
jobOrderFn := func(l interface{}, r interface{}) int {
|
|
lv := l.(*api.JobInfo)
|
|
rv := r.(*api.JobInfo)
|
|
|
|
klog.V(4).Infof("DRF JobOrderFn: <%v/%v> share state: %v, <%v/%v> share state: %v",
|
|
lv.Namespace, lv.Name, drf.jobAttrs[lv.UID].share, rv.Namespace, rv.Name, drf.jobAttrs[rv.UID].share)
|
|
|
|
if drf.jobAttrs[lv.UID].share == drf.jobAttrs[rv.UID].share {
|
|
return 0
|
|
}
|
|
|
|
if drf.jobAttrs[lv.UID].share < drf.jobAttrs[rv.UID].share {
|
|
return -1
|
|
}
|
|
|
|
return 1
|
|
}
|
|
|
|
ssn.AddJobOrderFn(drf.Name(), jobOrderFn)
|
|
|
|
namespaceOrderFn := func(l interface{}, r interface{}) int {
|
|
lv := l.(api.NamespaceName)
|
|
rv := r.(api.NamespaceName)
|
|
|
|
lOpt := drf.namespaceOpts[string(lv)]
|
|
rOpt := drf.namespaceOpts[string(rv)]
|
|
|
|
lWeight := ssn.NamespaceInfo[lv].GetWeight()
|
|
rWeight := ssn.NamespaceInfo[rv].GetWeight()
|
|
|
|
klog.V(4).Infof("DRF NamespaceOrderFn: <%v> share state: %f, weight %v, <%v> share state: %f, weight %v",
|
|
lv, lOpt.share, lWeight, rv, rOpt.share, rWeight)
|
|
|
|
lWeightedShare := lOpt.share / float64(lWeight)
|
|
rWeightedShare := rOpt.share / float64(rWeight)
|
|
|
|
metrics.UpdateNamespaceWeight(string(lv), lWeight)
|
|
metrics.UpdateNamespaceWeight(string(rv), rWeight)
|
|
metrics.UpdateNamespaceWeightedShare(string(lv), lWeightedShare)
|
|
metrics.UpdateNamespaceWeightedShare(string(rv), rWeightedShare)
|
|
|
|
if lWeightedShare == rWeightedShare {
|
|
return 0
|
|
}
|
|
|
|
if lWeightedShare < rWeightedShare {
|
|
return -1
|
|
}
|
|
|
|
return 1
|
|
}
|
|
|
|
if namespaceOrderEnabled {
|
|
ssn.AddNamespaceOrderFn(drf.Name(), namespaceOrderFn)
|
|
}
|
|
|
|
// Register event handlers.
|
|
ssn.AddEventHandler(&framework.EventHandler{
|
|
AllocateFunc: func(event *framework.Event) {
|
|
attr := drf.jobAttrs[event.Task.Job]
|
|
attr.allocated.Add(event.Task.Resreq)
|
|
|
|
job := ssn.Jobs[event.Task.Job]
|
|
drf.updateJobShare(job.Namespace, job.Name, attr)
|
|
|
|
nsShare := -1.0
|
|
if namespaceOrderEnabled {
|
|
nsOpt := drf.namespaceOpts[event.Task.Namespace]
|
|
nsOpt.allocated.Add(event.Task.Resreq)
|
|
|
|
drf.updateNamespaceShare(event.Task.Namespace, nsOpt)
|
|
nsShare = nsOpt.share
|
|
}
|
|
|
|
klog.V(4).Infof("DRF AllocateFunc: task <%v/%v>, resreq <%v>, share <%v>, namespace share <%v>",
|
|
event.Task.Namespace, event.Task.Name, event.Task.Resreq, attr.share, nsShare)
|
|
},
|
|
DeallocateFunc: func(event *framework.Event) {
|
|
attr := drf.jobAttrs[event.Task.Job]
|
|
attr.allocated.Sub(event.Task.Resreq)
|
|
|
|
job := ssn.Jobs[event.Task.Job]
|
|
drf.updateJobShare(job.Namespace, job.Name, attr)
|
|
|
|
nsShare := -1.0
|
|
if namespaceOrderEnabled {
|
|
nsOpt := drf.namespaceOpts[event.Task.Namespace]
|
|
nsOpt.allocated.Sub(event.Task.Resreq)
|
|
|
|
drf.updateNamespaceShare(event.Task.Namespace, nsOpt)
|
|
nsShare = nsOpt.share
|
|
}
|
|
|
|
klog.V(4).Infof("DRF EvictFunc: task <%v/%v>, resreq <%v>, share <%v>, namespace share <%v>",
|
|
event.Task.Namespace, event.Task.Name, event.Task.Resreq, attr.share, nsShare)
|
|
},
|
|
})
|
|
}
|
|
|
|
func (drf *drfPlugin) updateNamespaceShare(namespaceName string, attr *drfAttr) {
|
|
drf.updateShare(attr)
|
|
metrics.UpdateNamespaceShare(namespaceName, attr.share)
|
|
}
|
|
|
|
func (drf *drfPlugin) updateJobShare(jobNs, jobName string, attr *drfAttr) {
|
|
drf.updateShare(attr)
|
|
metrics.UpdateJobShare(jobNs, jobName, attr.share)
|
|
}
|
|
|
|
func (drf *drfPlugin) updateShare(attr *drfAttr) {
|
|
attr.dominantResource, attr.share = drf.calculateShare(attr.allocated, drf.totalResource)
|
|
}
|
|
|
|
func (drf *drfPlugin) calculateShare(allocated, totalResource *api.Resource) (string, float64) {
|
|
res := float64(0)
|
|
dominantResource := ""
|
|
for _, rn := range totalResource.ResourceNames() {
|
|
share := helpers.Share(allocated.Get(rn), totalResource.Get(rn))
|
|
if share > res {
|
|
res = share
|
|
dominantResource = string(rn)
|
|
}
|
|
}
|
|
|
|
return dominantResource, res
|
|
}
|
|
|
|
func (drf *drfPlugin) OnSessionClose(session *framework.Session) {
|
|
// Clean schedule data.
|
|
drf.totalResource = api.EmptyResource()
|
|
drf.jobAttrs = map[api.JobID]*drfAttr{}
|
|
}
|