From 622a838c2ccd57f5dbd6daeda376a3170a80233a Mon Sep 17 00:00:00 2001 From: t-qini Date: Sun, 7 Jul 2019 09:41:46 +0800 Subject: [PATCH] Modify nodal similarity rules. --- .../alicloud/alicloud_cloud_provider.go | 11 +++- .../cloudprovider/aws/aws_cloud_provider.go | 7 +++ .../azure/azure_cloud_provider.go | 26 ++++++++- .../cloudprovider/azure/azure_util.go | 6 ++- .../baiducloud/baiducloud_cloud_provider.go | 7 +++ .../cloudprovider/cloud_provider.go | 3 ++ .../cloudprovider/gce/gce_cloud_provider.go | 7 +++ .../magnum/magnum_cloud_provider.go | 8 +++ cluster-autoscaler/core/autoscaler.go | 2 + cluster-autoscaler/core/static_autoscaler.go | 11 +++- cluster-autoscaler/main.go | 22 ++++++-- .../nodegroupset/compare_nodegroups.go | 54 ++++++++++++------- 12 files changed, 133 insertions(+), 31 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/alicloud/alicloud_cloud_provider.go b/cluster-autoscaler/cloudprovider/alicloud/alicloud_cloud_provider.go index 232cce387f..892c085acf 100644 --- a/cluster-autoscaler/cloudprovider/alicloud/alicloud_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/alicloud/alicloud_cloud_provider.go @@ -18,17 +18,18 @@ package alicloud import ( "fmt" - "strings" - "os" + "strings" apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/klog" + schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" ) const ( @@ -230,3 +231,9 @@ func BuildAlicloud(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDis } return cloudProvider } + +// IsNodeInfoSimilar compares if two nodes should be considered part of the +// same NodeGroupSet. +func (ali *aliCloudProvider) IsNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool { + return nodegroupset.IsNodeInfoSimilar(n1, n2) +} diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go index f848896697..4d95310cea 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go @@ -27,6 +27,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/klog" schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" @@ -357,3 +358,9 @@ func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover } return provider } + +// IsNodeInfoSimilar compares if two nodes should be considered part of the +// same NodeGroupSet. +func (aws *awsCloudProvider) IsNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool { + return nodegroupset.IsNodeInfoSimilar(n1, n2) +} diff --git a/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go b/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go index c3e169c4ad..4407eaf9e2 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go @@ -20,13 +20,14 @@ import ( "io" "os" - "k8s.io/klog" - apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/klog" + schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" ) const ( @@ -170,3 +171,24 @@ func BuildAzure(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscov } return provider } + +func nodesFromSameAzureNodePool(n1, n2 *schedulernodeinfo.NodeInfo) bool { + n1AzureNodePool := n1.Node().Labels[AzureNodepoolLabel] + n2AzureNodePool := n2.Node().Labels[AzureNodepoolLabel] + return n1AzureNodePool != "" && n1AzureNodePool == n2AzureNodePool +} + +// IsNodeInfoSimilar compares if two nodes should be considered part of the +// same NodeGroupSet. This is true if they either belong to the same Azure agentpool +// or match usual conditions checked by IsNodeInfoSimilar, even if they have different agentpool labels. +func (azure *AzureCloudProvider) IsNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool { + if nodesFromSameAzureNodePool(n1, n2) { + return true + } + azureIgnoredLabels := make(map[string]bool) + for k, v := range nodegroupset.IgnoredLabels { + azureIgnoredLabels[k] = v + } + azureIgnoredLabels[AzureNodepoolLabel] = true + return nodegroupset.IsNodeInfoSimilarExceptIgnoredLabels(n1, n2, azureIgnoredLabels) +} diff --git a/cluster-autoscaler/cloudprovider/azure/azure_util.go b/cluster-autoscaler/cloudprovider/azure/azure_util.go index 5272acec2c..6fee19b80c 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_util.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_util.go @@ -34,11 +34,12 @@ import ( azStorage "github.com/Azure/azure-sdk-for-go/storage" "github.com/Azure/go-autorest/autorest" "github.com/Azure/go-autorest/autorest/to" + "golang.org/x/crypto/pkcs12" - "k8s.io/klog" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/client-go/pkg/version" + "k8s.io/klog" ) const ( @@ -75,6 +76,9 @@ const ( k8sWindowsVMAgentPoolPrefixIndex = 1 k8sWindowsVMAgentOrchestratorNameIndex = 2 k8sWindowsVMAgentPoolInfoIndex = 3 + + // AzureNodepoolLabel is a label specifying which Azure node pool a particular node belongs to. + AzureNodepoolLabel = "agentpool" ) var ( diff --git a/cluster-autoscaler/cloudprovider/baiducloud/baiducloud_cloud_provider.go b/cluster-autoscaler/cloudprovider/baiducloud/baiducloud_cloud_provider.go index c06bba3338..9f121ccf7d 100644 --- a/cluster-autoscaler/cloudprovider/baiducloud/baiducloud_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/baiducloud/baiducloud_cloud_provider.go @@ -27,6 +27,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/klog" schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" @@ -368,3 +369,9 @@ func (asg *Asg) Delete() error { func (asg *Asg) Autoprovisioned() bool { return false } + +// IsNodeInfoSimilar compares if two nodes should be considered part of the +// same NodeGroupSet. +func (baiducloud *baiducloudCloudProvider) IsNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool { + return nodegroupset.IsNodeInfoSimilar(n1, n2) +} diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go index ad10ccd6e2..ce33c41ffa 100644 --- a/cluster-autoscaler/cloudprovider/cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/cloud_provider.go @@ -69,6 +69,9 @@ type CloudProvider interface { // Refresh is called before every main loop and can be used to dynamically update cloud provider state. // In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh(). Refresh() error + + // IsNodeInfoSimilar compare if two nodes are similar + IsNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool } // ErrNotImplemented is returned if a method is not implemented. diff --git a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go index b4c314804f..9ebe41f88b 100644 --- a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go @@ -26,6 +26,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/klog" schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" @@ -366,3 +367,9 @@ func BuildGCE(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover RegisterMetrics() return provider } + +// IsNodeInfoSimilar compares if two nodes should be considered part of the +// same NodeGroupSet. +func (gce *GceCloudProvider) IsNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool { + return nodegroupset.IsNodeInfoSimilar(n1, n2) +} diff --git a/cluster-autoscaler/cloudprovider/magnum/magnum_cloud_provider.go b/cluster-autoscaler/cloudprovider/magnum/magnum_cloud_provider.go index 44f5b2e9b5..2a52fa6e25 100644 --- a/cluster-autoscaler/cloudprovider/magnum/magnum_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/magnum/magnum_cloud_provider.go @@ -26,8 +26,10 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/klog" + schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" ) const ( @@ -207,3 +209,9 @@ func BuildMagnum(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDisco return provider } + +// IsNodeInfoSimilar compares if two nodes should be considered part of the +// same NodeGroupSet. +func (mcp *magnumCloudProvider) IsNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool { + return nodegroupset.IsNodeInfoSimilar(n1, n2) +} diff --git a/cluster-autoscaler/core/autoscaler.go b/cluster-autoscaler/core/autoscaler.go index 4bf0d61021..c04161fc5c 100644 --- a/cluster-autoscaler/core/autoscaler.go +++ b/cluster-autoscaler/core/autoscaler.go @@ -57,6 +57,8 @@ type Autoscaler interface { RunOnce(currentTime time.Time) errors.AutoscalerError // ExitCleanUp is a clean-up performed just before process termination. ExitCleanUp() + + GetCloudProvider() cloudprovider.CloudProvider } // NewAutoscaler creates an autoscaler of an appropriate type according to the parameters diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 8369266bac..9d87b9fdf8 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -74,6 +74,11 @@ type StaticAutoscaler struct { ignoredTaints taintKeySet } +// GetCloudProvider returns the CloudProvider instance in staticAutoscaler +func (a *StaticAutoscaler) GetCloudProvider() cloudprovider.CloudProvider { + return a.CloudProvider +} + type staticAutoscalerProcessorCallbacks struct { disableScaleDownForLoop bool extraValues map[string]interface{} @@ -165,10 +170,12 @@ func (a *StaticAutoscaler) cleanUpIfRequired() { if readyNodes, err := a.ReadyNodeLister().List(); err != nil { klog.Errorf("Failed to list ready nodes, not cleaning up taints: %v", err) } else { - deletetaint.CleanAllToBeDeleted(readyNodes, a.AutoscalingContext.ClientSet, a.Recorder) + deletetaint.CleanAllToBeDeleted(readyNodes, + a.AutoscalingContext.ClientSet, a.Recorder) if a.AutoscalingContext.AutoscalingOptions.MaxBulkSoftTaintCount == 0 { // Clean old taints if soft taints handling is disabled - deletetaint.CleanAllDeletionCandidates(readyNodes, a.AutoscalingContext.ClientSet, a.Recorder) + deletetaint.CleanAllDeletionCandidates(readyNodes, + a.AutoscalingContext.ClientSet, a.Recorder) } } a.initialized = true diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index cb8034b073..a15c0da0ea 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -29,6 +29,9 @@ import ( "syscall" "time" + "github.com/prometheus/client_golang/prometheus" + "github.com/spf13/pflag" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" cloudBuilder "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/builder" "k8s.io/autoscaler/cluster-autoscaler/config" @@ -37,6 +40,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/expander" "k8s.io/autoscaler/cluster-autoscaler/metrics" ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" "k8s.io/autoscaler/cluster-autoscaler/utils/units" @@ -48,11 +52,8 @@ import ( "k8s.io/client-go/tools/leaderelection/resourcelock" kube_flag "k8s.io/component-base/cli/flag" componentbaseconfig "k8s.io/component-base/config" - "k8s.io/kubernetes/pkg/client/leaderelectionconfig" - - "github.com/prometheus/client_golang/prometheus" - "github.com/spf13/pflag" "k8s.io/klog" + "k8s.io/kubernetes/pkg/client/leaderelectionconfig" ) // MultiStringFlag is a flag for passing multiple parameters using same flag @@ -298,7 +299,18 @@ func buildAutoscaler() (core.Autoscaler, error) { metrics.UpdateNapEnabled(autoscalingOptions.NodeAutoprovisioningEnabled) // Create autoscaler. - return core.NewAutoscaler(opts) + ca, err := core.NewAutoscaler(opts) + if ca == nil || err != nil { + return ca, err + } + + // Modify the NodeGroupSetProcessor.Comparator in autoscaler + cp := ca.GetCloudProvider() + processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{ + Comparator: cp.IsNodeInfoSimilar, + } + + return ca, err } func run(healthCheck *metrics.HealthCheck) { diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go index cf6c662ce9..194a57e203 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go @@ -33,6 +33,15 @@ const ( MaxFreeDifferenceRatio = 0.05 ) +// IgnoredLabels define a set of basic labels that should be ignored when comparing the similarity +// of two nodes +var IgnoredLabels = map[string]bool{ + apiv1.LabelHostname: true, + apiv1.LabelZoneFailureDomain: true, + apiv1.LabelZoneRegion: true, + "beta.kubernetes.io/fluentd-ds-ready": true, // this is internal label used for determining if fluentd should be installed as deamon set. Used for migration 1.8 to 1.9. +} + // NodeInfoComparator is a function that tells if two nodes are from NodeGroups // similar enough to be considered a part of a single NodeGroupSet. type NodeInfoComparator func(n1, n2 *schedulernodeinfo.NodeInfo) bool @@ -52,12 +61,36 @@ func compareResourceMapsWithTolerance(resources map[apiv1.ResourceName][]resourc return true } +func compareLabels(nodes []*schedulernodeinfo.NodeInfo, ignoredLabels map[string]bool) bool { + labels := make(map[string][]string) + for _, node := range nodes { + for label, value := range node.Node().ObjectMeta.Labels { + ignore, _ := ignoredLabels[label] + if !ignore { + labels[label] = append(labels[label], value) + } + } + } + for _, labelValues := range labels { + if len(labelValues) != 2 || labelValues[0] != labelValues[1] { + return false + } + } + return true +} + // IsNodeInfoSimilar returns true if two NodeInfos are similar enough to consider // that the NodeGroups they come from are part of the same NodeGroupSet. The criteria are // somewhat arbitrary, but generally we check if resources provided by both nodes // are similar enough to likely be the same type of machine and if the set of labels // is the same (except for a pre-defined set of labels like hostname or zone). func IsNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool { + return IsNodeInfoSimilarExceptIgnoredLabels(n1, n2, IgnoredLabels) +} + +// IsNodeInfoSimilarExceptIgnoredLabels returns true if two NodeInfos are similar while +// ignoring the set of labels provided +func IsNodeInfoSimilarExceptIgnoredLabels(n1, n2 *schedulernodeinfo.NodeInfo, ignoredLabels map[string]bool) bool { capacity := make(map[apiv1.ResourceName][]resource.Quantity) allocatable := make(map[apiv1.ResourceName][]resource.Quantity) free := make(map[apiv1.ResourceName][]resource.Quantity) @@ -92,26 +125,9 @@ func IsNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo) bool { return false } - ignoredLabels := map[string]bool{ - apiv1.LabelHostname: true, - apiv1.LabelZoneFailureDomain: true, - apiv1.LabelZoneRegion: true, - "beta.kubernetes.io/fluentd-ds-ready": true, // this is internal label used for determining if fluentd should be installed as deamon set. Used for migration 1.8 to 1.9. + if !compareLabels(nodes, ignoredLabels) { + return false } - labels := make(map[string][]string) - for _, node := range nodes { - for label, value := range node.Node().ObjectMeta.Labels { - ignore, _ := ignoredLabels[label] - if !ignore { - labels[label] = append(labels[label], value) - } - } - } - for _, labelValues := range labels { - if len(labelValues) != 2 || labelValues[0] != labelValues[1] { - return false - } - } return true }