volcano/pkg/scheduler/plugins/drf/drf.go

336 lines
9.5 KiB
Go

/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package drf
import (
"math"
"k8s.io/klog"
"volcano.sh/volcano/pkg/scheduler/api"
"volcano.sh/volcano/pkg/scheduler/api/helpers"
"volcano.sh/volcano/pkg/scheduler/framework"
"volcano.sh/volcano/pkg/scheduler/metrics"
)
// PluginName indicates name of volcano scheduler plugin.
const PluginName = "drf"
var shareDelta = 0.000001
type drfAttr struct {
share float64
dominantResource string
allocated *api.Resource
}
type drfPlugin struct {
totalResource *api.Resource
// Key is Job ID
jobAttrs map[api.JobID]*drfAttr
// map[namespaceName]->attr
namespaceOpts map[string]*drfAttr
// Arguments given for the plugin
pluginArguments framework.Arguments
}
// New return drf plugin
func New(arguments framework.Arguments) framework.Plugin {
return &drfPlugin{
totalResource: api.EmptyResource(),
jobAttrs: map[api.JobID]*drfAttr{},
namespaceOpts: map[string]*drfAttr{},
pluginArguments: arguments,
}
}
func (drf *drfPlugin) Name() string {
return PluginName
}
// NamespaceOrderEnabled returns the NamespaceOrder for this plugin is enabled in this session or not
func (drf *drfPlugin) NamespaceOrderEnabled(ssn *framework.Session) bool {
for _, tier := range ssn.Tiers {
for _, plugin := range tier.Plugins {
if plugin.Name != PluginName {
continue
}
return plugin.EnabledNamespaceOrder != nil && *plugin.EnabledNamespaceOrder
}
}
return false
}
func (drf *drfPlugin) OnSessionOpen(ssn *framework.Session) {
// Prepare scheduling data for this session.
for _, n := range ssn.Nodes {
drf.totalResource.Add(n.Allocatable)
}
namespaceOrderEnabled := drf.NamespaceOrderEnabled(ssn)
for _, job := range ssn.Jobs {
attr := &drfAttr{
allocated: api.EmptyResource(),
}
for status, tasks := range job.TaskStatusIndex {
if api.AllocatedStatus(status) {
for _, t := range tasks {
attr.allocated.Add(t.Resreq)
}
}
}
// Calculate the init share of Job
drf.updateJobShare(job.Namespace, job.Name, attr)
drf.jobAttrs[job.UID] = attr
if namespaceOrderEnabled {
nsOpts, found := drf.namespaceOpts[job.Namespace]
if !found {
nsOpts = &drfAttr{
allocated: api.EmptyResource(),
}
drf.namespaceOpts[job.Namespace] = nsOpts
}
// all task in job should have the same namespace with job
nsOpts.allocated.Add(attr.allocated)
drf.updateNamespaceShare(job.Namespace, nsOpts)
}
}
preemptableFn := func(preemptor *api.TaskInfo, preemptees []*api.TaskInfo) []*api.TaskInfo {
var victims []*api.TaskInfo
addVictim := func(candidate *api.TaskInfo) {
victims = append(victims, candidate)
}
if namespaceOrderEnabled {
// apply the namespace share policy on preemptee firstly
lWeight := ssn.NamespaceInfo[api.NamespaceName(preemptor.Namespace)].GetWeight()
lNsAtt := drf.namespaceOpts[preemptor.Namespace]
lNsAlloc := lNsAtt.allocated.Clone().Add(preemptor.Resreq)
_, lNsShare := drf.calculateShare(lNsAlloc, drf.totalResource)
lNsShareWeighted := lNsShare / float64(lWeight)
namespaceAllocation := map[string]*api.Resource{}
// undecidedPreemptees means this policy could not judge preemptee is preemptable or not
// and left it to next policy
undecidedPreemptees := []*api.TaskInfo{}
for _, preemptee := range preemptees {
if preemptor.Namespace == preemptee.Namespace {
// policy is disabled when they are in the same namespace
undecidedPreemptees = append(undecidedPreemptees, preemptee)
continue
}
// compute the preemptee namespace weighted share after preemption
nsAllocation, found := namespaceAllocation[preemptee.Namespace]
if !found {
rNsAtt := drf.namespaceOpts[preemptee.Namespace]
nsAllocation = rNsAtt.allocated.Clone()
namespaceAllocation[preemptee.Namespace] = nsAllocation
}
rWeight := ssn.NamespaceInfo[api.NamespaceName(preemptee.Namespace)].GetWeight()
rNsAlloc := nsAllocation.Sub(preemptee.Resreq)
_, rNsShare := drf.calculateShare(rNsAlloc, drf.totalResource)
rNsShareWeighted := rNsShare / float64(rWeight)
// to avoid ping pong actions, the preemptee namespace should
// have the higher weighted share after preemption.
if lNsShareWeighted < rNsShareWeighted {
addVictim(preemptee)
}
if lNsShareWeighted-rNsShareWeighted > shareDelta {
continue
}
// equal namespace order leads to judgement of jobOrder
undecidedPreemptees = append(undecidedPreemptees, preemptee)
}
preemptees = undecidedPreemptees
}
latt := drf.jobAttrs[preemptor.Job]
lalloc := latt.allocated.Clone().Add(preemptor.Resreq)
_, ls := drf.calculateShare(lalloc, drf.totalResource)
allocations := map[api.JobID]*api.Resource{}
for _, preemptee := range preemptees {
if _, found := allocations[preemptee.Job]; !found {
ratt := drf.jobAttrs[preemptee.Job]
allocations[preemptee.Job] = ratt.allocated.Clone()
}
ralloc := allocations[preemptee.Job].Sub(preemptee.Resreq)
_, rs := drf.calculateShare(ralloc, drf.totalResource)
if ls < rs || math.Abs(ls-rs) <= shareDelta {
addVictim(preemptee)
}
}
klog.V(4).Infof("Victims from DRF plugins are %+v", victims)
return victims
}
ssn.AddPreemptableFn(drf.Name(), preemptableFn)
jobOrderFn := func(l interface{}, r interface{}) int {
lv := l.(*api.JobInfo)
rv := r.(*api.JobInfo)
klog.V(4).Infof("DRF JobOrderFn: <%v/%v> share state: %v, <%v/%v> share state: %v",
lv.Namespace, lv.Name, drf.jobAttrs[lv.UID].share, rv.Namespace, rv.Name, drf.jobAttrs[rv.UID].share)
if drf.jobAttrs[lv.UID].share == drf.jobAttrs[rv.UID].share {
return 0
}
if drf.jobAttrs[lv.UID].share < drf.jobAttrs[rv.UID].share {
return -1
}
return 1
}
ssn.AddJobOrderFn(drf.Name(), jobOrderFn)
namespaceOrderFn := func(l interface{}, r interface{}) int {
lv := l.(api.NamespaceName)
rv := r.(api.NamespaceName)
lOpt := drf.namespaceOpts[string(lv)]
rOpt := drf.namespaceOpts[string(rv)]
lWeight := ssn.NamespaceInfo[lv].GetWeight()
rWeight := ssn.NamespaceInfo[rv].GetWeight()
klog.V(4).Infof("DRF NamespaceOrderFn: <%v> share state: %f, weight %v, <%v> share state: %f, weight %v",
lv, lOpt.share, lWeight, rv, rOpt.share, rWeight)
lWeightedShare := lOpt.share / float64(lWeight)
rWeightedShare := rOpt.share / float64(rWeight)
metrics.UpdateNamespaceWeight(string(lv), lWeight)
metrics.UpdateNamespaceWeight(string(rv), rWeight)
metrics.UpdateNamespaceWeightedShare(string(lv), lWeightedShare)
metrics.UpdateNamespaceWeightedShare(string(rv), rWeightedShare)
if lWeightedShare == rWeightedShare {
return 0
}
if lWeightedShare < rWeightedShare {
return -1
}
return 1
}
if namespaceOrderEnabled {
ssn.AddNamespaceOrderFn(drf.Name(), namespaceOrderFn)
}
// Register event handlers.
ssn.AddEventHandler(&framework.EventHandler{
AllocateFunc: func(event *framework.Event) {
attr := drf.jobAttrs[event.Task.Job]
attr.allocated.Add(event.Task.Resreq)
job := ssn.Jobs[event.Task.Job]
drf.updateJobShare(job.Namespace, job.Name, attr)
nsShare := -1.0
if namespaceOrderEnabled {
nsOpt := drf.namespaceOpts[event.Task.Namespace]
nsOpt.allocated.Add(event.Task.Resreq)
drf.updateNamespaceShare(event.Task.Namespace, nsOpt)
nsShare = nsOpt.share
}
klog.V(4).Infof("DRF AllocateFunc: task <%v/%v>, resreq <%v>, share <%v>, namespace share <%v>",
event.Task.Namespace, event.Task.Name, event.Task.Resreq, attr.share, nsShare)
},
DeallocateFunc: func(event *framework.Event) {
attr := drf.jobAttrs[event.Task.Job]
attr.allocated.Sub(event.Task.Resreq)
job := ssn.Jobs[event.Task.Job]
drf.updateJobShare(job.Namespace, job.Name, attr)
nsShare := -1.0
if namespaceOrderEnabled {
nsOpt := drf.namespaceOpts[event.Task.Namespace]
nsOpt.allocated.Sub(event.Task.Resreq)
drf.updateNamespaceShare(event.Task.Namespace, nsOpt)
nsShare = nsOpt.share
}
klog.V(4).Infof("DRF EvictFunc: task <%v/%v>, resreq <%v>, share <%v>, namespace share <%v>",
event.Task.Namespace, event.Task.Name, event.Task.Resreq, attr.share, nsShare)
},
})
}
func (drf *drfPlugin) updateNamespaceShare(namespaceName string, attr *drfAttr) {
drf.updateShare(attr)
metrics.UpdateNamespaceShare(namespaceName, attr.share)
}
func (drf *drfPlugin) updateJobShare(jobNs, jobName string, attr *drfAttr) {
drf.updateShare(attr)
metrics.UpdateJobShare(jobNs, jobName, attr.share)
}
func (drf *drfPlugin) updateShare(attr *drfAttr) {
attr.dominantResource, attr.share = drf.calculateShare(attr.allocated, drf.totalResource)
}
func (drf *drfPlugin) calculateShare(allocated, totalResource *api.Resource) (string, float64) {
res := float64(0)
dominantResource := ""
for _, rn := range totalResource.ResourceNames() {
share := helpers.Share(allocated.Get(rn), totalResource.Get(rn))
if share > res {
res = share
dominantResource = string(rn)
}
}
return dominantResource, res
}
func (drf *drfPlugin) OnSessionClose(session *framework.Session) {
// Clean schedule data.
drf.totalResource = api.EmptyResource()
drf.jobAttrs = map[api.JobID]*drfAttr{}
}