Set different hostname label for upcoming nodes
Function copying template node to use for upcoming nodes was not chaning hostname label, meaning that features relying on this label (ex. pod antiaffinity on hostname topology) would treat all upcoming nodes as a single node. This resulted in triggering too many scale-ups for pods using such features. Analogous function in binpacking didn't have the same bug (but it didn't set unique UID or pod names). I extracted the functionality to a util function used in both places to avoid the two functions getting out of sync again.
This commit is contained in:
parent
962bbd0cdd
commit
9831623810
|
|
@ -23,7 +23,6 @@ import (
|
|||
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
||||
|
|
@ -42,6 +41,7 @@ import (
|
|||
"k8s.io/autoscaler/cluster-autoscaler/utils/deletetaint"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
|
||||
scheduler_utils "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/tpu"
|
||||
|
||||
|
|
@ -775,21 +775,6 @@ func allPodsAreNew(pods []*apiv1.Pod, currentTime time.Time) bool {
|
|||
return found && oldest.Add(unschedulablePodWithGpuTimeBuffer).After(currentTime)
|
||||
}
|
||||
|
||||
func deepCopyNodeInfo(nodeTemplate *schedulerframework.NodeInfo, index int) *schedulerframework.NodeInfo {
|
||||
node := nodeTemplate.Node().DeepCopy()
|
||||
node.Name = fmt.Sprintf("%s-%d", node.Name, index)
|
||||
node.UID = uuid.NewUUID()
|
||||
nodeInfo := schedulerframework.NewNodeInfo()
|
||||
nodeInfo.SetNode(node)
|
||||
for _, podInfo := range nodeTemplate.Pods {
|
||||
pod := podInfo.Pod.DeepCopy()
|
||||
pod.Name = fmt.Sprintf("%s-%d", podInfo.Pod.Name, index)
|
||||
pod.UID = uuid.NewUUID()
|
||||
nodeInfo.AddPod(pod)
|
||||
}
|
||||
return nodeInfo
|
||||
}
|
||||
|
||||
func getUpcomingNodeInfos(registry *clusterstate.ClusterStateRegistry, nodeInfos map[string]*schedulerframework.NodeInfo) []*schedulerframework.NodeInfo {
|
||||
upcomingNodes := make([]*schedulerframework.NodeInfo, 0)
|
||||
for nodeGroup, numberOfNodes := range registry.GetUpcomingNodes() {
|
||||
|
|
@ -808,7 +793,7 @@ func getUpcomingNodeInfos(registry *clusterstate.ClusterStateRegistry, nodeInfos
|
|||
// Ensure new nodes have different names because nodeName
|
||||
// will be used as a map key. Also deep copy pods (daemonsets &
|
||||
// any pods added by cloud provider on template).
|
||||
upcomingNodes = append(upcomingNodes, deepCopyNodeInfo(nodeTemplate, i))
|
||||
upcomingNodes = append(upcomingNodes, scheduler_utils.DeepCopyTemplateNode(nodeTemplate, i))
|
||||
}
|
||||
}
|
||||
return upcomingNodes
|
||||
|
|
|
|||
|
|
@ -17,13 +17,12 @@ limitations under the License.
|
|||
package estimator
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/simulator"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
|
||||
klog "k8s.io/klog/v2"
|
||||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
|
@ -75,7 +74,6 @@ func (estimator *BinpackingNodeEstimator) Estimate(
|
|||
}
|
||||
}()
|
||||
|
||||
newNodeNameTimestamp := time.Now()
|
||||
newNodeNameIndex := 0
|
||||
|
||||
for _, podInfo := range podInfos {
|
||||
|
|
@ -94,7 +92,7 @@ func (estimator *BinpackingNodeEstimator) Estimate(
|
|||
|
||||
if !found {
|
||||
// Add new node
|
||||
newNodeName, err := estimator.addNewNodeToSnapshot(nodeTemplate, newNodeNameTimestamp, newNodeNameIndex)
|
||||
newNodeName, err := estimator.addNewNodeToSnapshot(nodeTemplate, newNodeNameIndex)
|
||||
if err != nil {
|
||||
klog.Errorf("Error while adding new node for template to ClusterSnapshot; %v", err)
|
||||
return 0
|
||||
|
|
@ -113,23 +111,17 @@ func (estimator *BinpackingNodeEstimator) Estimate(
|
|||
|
||||
func (estimator *BinpackingNodeEstimator) addNewNodeToSnapshot(
|
||||
template *schedulerframework.NodeInfo,
|
||||
nameTimestamp time.Time,
|
||||
nameIndex int) (string, error) {
|
||||
|
||||
newNode := template.Node().DeepCopy()
|
||||
newNode.Name = fmt.Sprintf("%s-%d-%d", newNode.Name, nameTimestamp.Unix(), nameIndex)
|
||||
if newNode.Labels == nil {
|
||||
newNode.Labels = make(map[string]string)
|
||||
}
|
||||
newNode.Labels["kubernetes.io/hostname"] = newNode.Name
|
||||
newNodeInfo := scheduler.DeepCopyTemplateNode(template, nameIndex)
|
||||
var pods []*apiv1.Pod
|
||||
for _, podInfo := range template.Pods {
|
||||
for _, podInfo := range newNodeInfo.Pods {
|
||||
pods = append(pods, podInfo.Pod)
|
||||
}
|
||||
if err := estimator.clusterSnapshot.AddNodeWithPods(newNode, pods); err != nil {
|
||||
if err := estimator.clusterSnapshot.AddNodeWithPods(newNodeInfo.Node(), pods); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return newNode.Name, nil
|
||||
return newNodeInfo.Node().Name, nil
|
||||
}
|
||||
|
||||
// Calculates score for all pods and returns podInfo structure.
|
||||
|
|
|
|||
|
|
@ -17,7 +17,10 @@ limitations under the License.
|
|||
package scheduler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
|
|
@ -57,3 +60,25 @@ func CreateNodeNameToInfoMap(pods []*apiv1.Pod, nodes []*apiv1.Node) map[string]
|
|||
|
||||
return nodeNameToNodeInfo
|
||||
}
|
||||
|
||||
// DeepCopyTemplateNode copies NodeInfo object used as a template. It changes
|
||||
// names of UIDs of both node and pods running on it, so that copies can be used
|
||||
// to represent multiple nodes.
|
||||
func DeepCopyTemplateNode(nodeTemplate *schedulerframework.NodeInfo, index int) *schedulerframework.NodeInfo {
|
||||
node := nodeTemplate.Node().DeepCopy()
|
||||
node.Name = fmt.Sprintf("%s-%d", node.Name, index)
|
||||
node.UID = uuid.NewUUID()
|
||||
if node.Labels == nil {
|
||||
node.Labels = make(map[string]string)
|
||||
}
|
||||
node.Labels["kubernetes.io/hostname"] = node.Name
|
||||
nodeInfo := schedulerframework.NewNodeInfo()
|
||||
nodeInfo.SetNode(node)
|
||||
for _, podInfo := range nodeTemplate.Pods {
|
||||
pod := podInfo.Pod.DeepCopy()
|
||||
pod.Name = fmt.Sprintf("%s-%d", podInfo.Pod.Name, index)
|
||||
pod.UID = uuid.NewUUID()
|
||||
nodeInfo.AddPod(pod)
|
||||
}
|
||||
return nodeInfo
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue