cluster-autoscaler: Fix node group auto discovery for AWS not to mix up ASGs from different k8s clusters

This commit is contained in:
Yusuke Kuoka 2017-06-22 13:39:58 +09:00
parent 7697d5345a
commit 3e8cc02243
4 changed files with 47 additions and 20 deletions

View File

@ -159,8 +159,11 @@ spec:
As of version v0.5.1, docker images including the support for `--node-group-auto-discovery` is not yet published to official repository.
Please checkout the latest source of this project locally and run `REGISTRY=<your docker repo> make release` to build and push an image yourself.
Then, a manifest like below would run a cluster-autoscaler which auto-discovers ASGs tagged with `k8s.io/cluster-autoscaler/enabled` to be node groups.
Please notice that there are no `--nodes` flags passed to cluster-autoscaler in this setup.
Then, a manifest like below would run a cluster-autoscaler which auto-discovers ASGs tagged with `k8s.io/cluster-autoscaler/enabled` and `kubernetes.io/cluster/<YOUR CLUSTER NAME>` to be node groups.
Note that:
* `kubernetes.io/cluster/<YOUR CLUSTER NAME>` is required when `k8s.io/cluster-autoscaler/enabled` is used across many clusters to prevent ASGs from different clusters recognized as the node groups
* There are no `--nodes` flags passed to cluster-autoscaler because the node groups are automatically discovered by tags
```yaml
---
@ -198,7 +201,7 @@ spec:
- --cloud-provider=aws
- --skip-nodes-with-local-storage=false
- --expander=least-waste
- --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled
- --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,kubernetes.io/cluster/<YOUR CLUSTER NAME>
env:
- name: AWS_REGION
value: us-east-1

View File

@ -67,7 +67,11 @@ func buildAutoDiscoveringProvider(awsManager *AwsManager, spec string) (*awsClou
if tag == "" {
return nil, fmt.Errorf("Invalid ASG tag for auto discovery specified: ASG tag must not be empty")
}
asgs, err := awsManager.getAutoscalingGroupsByTag(tag)
// Use the k8s cluster name tag to only discover asgs of the cluster denoted by clusterName
// See https://github.com/kubernetes/kubernetes/blob/9ef85a7/pkg/cloudprovider/providers/aws/tags.go#L30-L34
// for more information about the tag
tags := strings.Split(tag, ",")
asgs, err := awsManager.getAutoscalingGroupsByTags(tags)
if err != nil {
return nil, fmt.Errorf("Failed to get ASGs: %v", err)
}

View File

@ -257,27 +257,33 @@ func (m *AwsManager) getAutoscalingGroupsByNames(names []string) ([]*autoscaling
return asgs, nil
}
func (m *AwsManager) getAutoscalingGroupsByTag(key string) ([]*autoscaling.Group, error) {
glog.V(6).Infof("Starting getAutoscalingGroupsByTag with key=%v", key)
func (m *AwsManager) getAutoscalingGroupsByTags(keys []string) ([]*autoscaling.Group, error) {
glog.V(6).Infof("Starting getAutoscalingGroupsByTag with keys=%v", keys)
tags := []*autoscaling.TagDescription{}
numKeys := len(keys)
// DescribeTags does an OR query when multiple filters on different tags are specified.
// In other words, DescribeTags returns [asg1, asg1] for keys [t1, t2] when there's only one asg tagged both t1 and t2.
filters := []*autoscaling.Filter{}
for _, key := range keys {
filter := &autoscaling.Filter{
Name: aws.String("key"),
Values: []*string{aws.String(key)},
}
filters = append(filters, filter)
}
description, err := m.service.DescribeTags(&autoscaling.DescribeTagsInput{
Filters: []*autoscaling.Filter{
{
Name: aws.String("key"),
Values: []*string{aws.String(key)},
},
},
Filters: filters,
MaxRecords: aws.Int64(maxRecordsReturnedByAPI),
})
if err != nil {
glog.V(4).Infof("Failed to describe ASG tags for key %s : %v", key, err)
glog.V(4).Infof("Failed to describe ASG tags for keys %v : %v", keys, err)
return nil, err
}
if len(description.Tags) < 1 {
return nil, fmt.Errorf("Unable to find ASGs for tag key %s", key)
return nil, fmt.Errorf("Unable to find ASGs for tag keys %v", keys)
}
tags := []*autoscaling.TagDescription{}
tags = append(tags, description.Tags...)
for description.NextToken != nil {
@ -286,16 +292,30 @@ func (m *AwsManager) getAutoscalingGroupsByTag(key string) ([]*autoscaling.Group
MaxRecords: aws.Int64(maxRecordsReturnedByAPI),
})
if err != nil {
glog.V(4).Infof("Failed to describe ASG tags for key %s: %v", key, err)
glog.V(4).Infof("Failed to describe ASG tags for key %v: %v", keys, err)
return nil, err
}
tags = append(tags, description.Tags...)
}
asgNames := []string{}
// De-duplicate asg names
asgNameOccurrences := map[string]int{}
for _, t := range tags {
asgName := t.ResourceId
asgNames = append(asgNames, *asgName)
asgName := *(t.ResourceId)
if n, ok := asgNameOccurrences[asgName]; ok {
asgNameOccurrences[asgName] = n + 1
} else {
asgNameOccurrences[asgName] = 1
}
}
// Accordingly to how DescribeTags API works, the result contains ASGs which not all but only subset of tags are associated.
// Explicitly select ASGs to which all the tags are associated so that we won't end up calling DescribeAutoScalingGroups API
// multiple times on an ASG
asgNames := []string{}
for asgName, n := range asgNameOccurrences {
if n == numKeys {
asgNames = append(asgNames, asgName)
}
}
asgs, err := m.getAutoscalingGroupsByNames(asgNames)

View File

@ -67,7 +67,7 @@ var (
cloudConfig = flag.String("cloud-config", "", "The path to the cloud provider configuration file. Empty string for no configuration file.")
configMapName = flag.String("configmap", "", "The name of the ConfigMap containing settings used for dynamic reconfiguration. Empty string for no ConfigMap.")
namespace = flag.String("namespace", "kube-system", "Namespace in which cluster-autoscaler run. If a --configmap flag is also provided, ensure that the configmap exists in this namespace before CA runs.")
nodeGroupAutoDiscovery = flag.String("node-group-auto-discovery", "", "One or more definition(s) of node group auto-discovery. A definition is expressed `<name of discoverer per cloud provider>:[<key>[=<value>]]`. Only the `aws` cloud provider is currently supported. The only valid discoverer for it is `asg` and the valid key is `tag`. For example, specifying `--cloud-provider aws` and `--node-group-auto-discovery asg:tag=cluster-autoscaler/auto-discovery/enabled` resuls in ASGs tagged with `cluster-autoscaler/auto-discovery/enabled` to be considered as target node groups")
nodeGroupAutoDiscovery = flag.String("node-group-auto-discovery", "", "One or more definition(s) of node group auto-discovery. A definition is expressed `<name of discoverer per cloud provider>:[<key>[=<value>]]`. Only the `aws` cloud provider is currently supported. The only valid discoverer for it is `asg` and the valid key is `tag`. For example, specifying `--cloud-provider aws` and `--node-group-auto-discovery asg:tag=cluster-autoscaler/auto-discovery/enabled,kubernetes.io/cluster/<YOUR CLUSTER NAME>` results in ASGs tagged with `cluster-autoscaler/auto-discovery/enabled` and `kubernetes.io/cluster/<YOUR CLUSTER NAME>` to be considered as target node groups")
verifyUnschedulablePods = flag.Bool("verify-unschedulable-pods", true,
"If enabled CA will ensure that each pod marked by Scheduler as unschedulable actually can't be scheduled on any node."+
"This prevents from adding unnecessary nodes in situation when CA and Scheduler have different configuration.")