Set different hostname label for upcoming nodes

Function copying template node to use for upcoming nodes was
not chaning hostname label, meaning that features relying on
this label (ex. pod antiaffinity on hostname topology) would
treat all upcoming nodes as a single node.
This resulted in triggering too many scale-ups for pods
using such features. Analogous function in binpacking didn't
have the same bug (but it didn't set unique UID or pod names).
I extracted the functionality to a util function used in both
places to avoid the two functions getting out of sync again.
This commit is contained in:
Maciek Pytel 2021-02-12 19:37:30 +01:00
parent 962bbd0cdd
commit 9831623810
3 changed files with 33 additions and 31 deletions

View File

@ -23,7 +23,6 @@ import (
apiv1 "k8s.io/api/core/v1" apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/uuid"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
@ -42,6 +41,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/utils/deletetaint" "k8s.io/autoscaler/cluster-autoscaler/utils/deletetaint"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu" "k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
scheduler_utils "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
"k8s.io/autoscaler/cluster-autoscaler/utils/taints" "k8s.io/autoscaler/cluster-autoscaler/utils/taints"
"k8s.io/autoscaler/cluster-autoscaler/utils/tpu" "k8s.io/autoscaler/cluster-autoscaler/utils/tpu"
@ -775,21 +775,6 @@ func allPodsAreNew(pods []*apiv1.Pod, currentTime time.Time) bool {
return found && oldest.Add(unschedulablePodWithGpuTimeBuffer).After(currentTime) return found && oldest.Add(unschedulablePodWithGpuTimeBuffer).After(currentTime)
} }
func deepCopyNodeInfo(nodeTemplate *schedulerframework.NodeInfo, index int) *schedulerframework.NodeInfo {
node := nodeTemplate.Node().DeepCopy()
node.Name = fmt.Sprintf("%s-%d", node.Name, index)
node.UID = uuid.NewUUID()
nodeInfo := schedulerframework.NewNodeInfo()
nodeInfo.SetNode(node)
for _, podInfo := range nodeTemplate.Pods {
pod := podInfo.Pod.DeepCopy()
pod.Name = fmt.Sprintf("%s-%d", podInfo.Pod.Name, index)
pod.UID = uuid.NewUUID()
nodeInfo.AddPod(pod)
}
return nodeInfo
}
func getUpcomingNodeInfos(registry *clusterstate.ClusterStateRegistry, nodeInfos map[string]*schedulerframework.NodeInfo) []*schedulerframework.NodeInfo { func getUpcomingNodeInfos(registry *clusterstate.ClusterStateRegistry, nodeInfos map[string]*schedulerframework.NodeInfo) []*schedulerframework.NodeInfo {
upcomingNodes := make([]*schedulerframework.NodeInfo, 0) upcomingNodes := make([]*schedulerframework.NodeInfo, 0)
for nodeGroup, numberOfNodes := range registry.GetUpcomingNodes() { for nodeGroup, numberOfNodes := range registry.GetUpcomingNodes() {
@ -808,7 +793,7 @@ func getUpcomingNodeInfos(registry *clusterstate.ClusterStateRegistry, nodeInfos
// Ensure new nodes have different names because nodeName // Ensure new nodes have different names because nodeName
// will be used as a map key. Also deep copy pods (daemonsets & // will be used as a map key. Also deep copy pods (daemonsets &
// any pods added by cloud provider on template). // any pods added by cloud provider on template).
upcomingNodes = append(upcomingNodes, deepCopyNodeInfo(nodeTemplate, i)) upcomingNodes = append(upcomingNodes, scheduler_utils.DeepCopyTemplateNode(nodeTemplate, i))
} }
} }
return upcomingNodes return upcomingNodes

View File

@ -17,13 +17,12 @@ limitations under the License.
package estimator package estimator
import ( import (
"fmt"
"sort" "sort"
"time"
apiv1 "k8s.io/api/core/v1" apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
"k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/simulator"
"k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
klog "k8s.io/klog/v2" klog "k8s.io/klog/v2"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
) )
@ -75,7 +74,6 @@ func (estimator *BinpackingNodeEstimator) Estimate(
} }
}() }()
newNodeNameTimestamp := time.Now()
newNodeNameIndex := 0 newNodeNameIndex := 0
for _, podInfo := range podInfos { for _, podInfo := range podInfos {
@ -94,7 +92,7 @@ func (estimator *BinpackingNodeEstimator) Estimate(
if !found { if !found {
// Add new node // Add new node
newNodeName, err := estimator.addNewNodeToSnapshot(nodeTemplate, newNodeNameTimestamp, newNodeNameIndex) newNodeName, err := estimator.addNewNodeToSnapshot(nodeTemplate, newNodeNameIndex)
if err != nil { if err != nil {
klog.Errorf("Error while adding new node for template to ClusterSnapshot; %v", err) klog.Errorf("Error while adding new node for template to ClusterSnapshot; %v", err)
return 0 return 0
@ -113,23 +111,17 @@ func (estimator *BinpackingNodeEstimator) Estimate(
func (estimator *BinpackingNodeEstimator) addNewNodeToSnapshot( func (estimator *BinpackingNodeEstimator) addNewNodeToSnapshot(
template *schedulerframework.NodeInfo, template *schedulerframework.NodeInfo,
nameTimestamp time.Time,
nameIndex int) (string, error) { nameIndex int) (string, error) {
newNode := template.Node().DeepCopy() newNodeInfo := scheduler.DeepCopyTemplateNode(template, nameIndex)
newNode.Name = fmt.Sprintf("%s-%d-%d", newNode.Name, nameTimestamp.Unix(), nameIndex)
if newNode.Labels == nil {
newNode.Labels = make(map[string]string)
}
newNode.Labels["kubernetes.io/hostname"] = newNode.Name
var pods []*apiv1.Pod var pods []*apiv1.Pod
for _, podInfo := range template.Pods { for _, podInfo := range newNodeInfo.Pods {
pods = append(pods, podInfo.Pod) pods = append(pods, podInfo.Pod)
} }
if err := estimator.clusterSnapshot.AddNodeWithPods(newNode, pods); err != nil { if err := estimator.clusterSnapshot.AddNodeWithPods(newNodeInfo.Node(), pods); err != nil {
return "", err return "", err
} }
return newNode.Name, nil return newNodeInfo.Node().Name, nil
} }
// Calculates score for all pods and returns podInfo structure. // Calculates score for all pods and returns podInfo structure.

View File

@ -17,7 +17,10 @@ limitations under the License.
package scheduler package scheduler
import ( import (
"fmt"
apiv1 "k8s.io/api/core/v1" apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/uuid"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
) )
@ -57,3 +60,25 @@ func CreateNodeNameToInfoMap(pods []*apiv1.Pod, nodes []*apiv1.Node) map[string]
return nodeNameToNodeInfo return nodeNameToNodeInfo
} }
// DeepCopyTemplateNode copies NodeInfo object used as a template. It changes
// names of UIDs of both node and pods running on it, so that copies can be used
// to represent multiple nodes.
func DeepCopyTemplateNode(nodeTemplate *schedulerframework.NodeInfo, index int) *schedulerframework.NodeInfo {
node := nodeTemplate.Node().DeepCopy()
node.Name = fmt.Sprintf("%s-%d", node.Name, index)
node.UID = uuid.NewUUID()
if node.Labels == nil {
node.Labels = make(map[string]string)
}
node.Labels["kubernetes.io/hostname"] = node.Name
nodeInfo := schedulerframework.NewNodeInfo()
nodeInfo.SetNode(node)
for _, podInfo := range nodeTemplate.Pods {
pod := podInfo.Pod.DeepCopy()
pod.Name = fmt.Sprintf("%s-%d", podInfo.Pod.Name, index)
pod.UID = uuid.NewUUID()
nodeInfo.AddPod(pod)
}
return nodeInfo
}