cluster-autoscaler: Fix node group auto discovery for AWS not to mix up ASGs from different k8s clusters
This commit is contained in:
parent
7697d5345a
commit
3e8cc02243
|
|
@ -159,8 +159,11 @@ spec:
|
|||
|
||||
As of version v0.5.1, docker images including the support for `--node-group-auto-discovery` is not yet published to official repository.
|
||||
Please checkout the latest source of this project locally and run `REGISTRY=<your docker repo> make release` to build and push an image yourself.
|
||||
Then, a manifest like below would run a cluster-autoscaler which auto-discovers ASGs tagged with `k8s.io/cluster-autoscaler/enabled` to be node groups.
|
||||
Please notice that there are no `--nodes` flags passed to cluster-autoscaler in this setup.
|
||||
Then, a manifest like below would run a cluster-autoscaler which auto-discovers ASGs tagged with `k8s.io/cluster-autoscaler/enabled` and `kubernetes.io/cluster/<YOUR CLUSTER NAME>` to be node groups.
|
||||
Note that:
|
||||
|
||||
* `kubernetes.io/cluster/<YOUR CLUSTER NAME>` is required when `k8s.io/cluster-autoscaler/enabled` is used across many clusters to prevent ASGs from different clusters recognized as the node groups
|
||||
* There are no `--nodes` flags passed to cluster-autoscaler because the node groups are automatically discovered by tags
|
||||
|
||||
```yaml
|
||||
---
|
||||
|
|
@ -198,7 +201,7 @@ spec:
|
|||
- --cloud-provider=aws
|
||||
- --skip-nodes-with-local-storage=false
|
||||
- --expander=least-waste
|
||||
- --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled
|
||||
- --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,kubernetes.io/cluster/<YOUR CLUSTER NAME>
|
||||
env:
|
||||
- name: AWS_REGION
|
||||
value: us-east-1
|
||||
|
|
|
|||
|
|
@ -67,7 +67,11 @@ func buildAutoDiscoveringProvider(awsManager *AwsManager, spec string) (*awsClou
|
|||
if tag == "" {
|
||||
return nil, fmt.Errorf("Invalid ASG tag for auto discovery specified: ASG tag must not be empty")
|
||||
}
|
||||
asgs, err := awsManager.getAutoscalingGroupsByTag(tag)
|
||||
// Use the k8s cluster name tag to only discover asgs of the cluster denoted by clusterName
|
||||
// See https://github.com/kubernetes/kubernetes/blob/9ef85a7/pkg/cloudprovider/providers/aws/tags.go#L30-L34
|
||||
// for more information about the tag
|
||||
tags := strings.Split(tag, ",")
|
||||
asgs, err := awsManager.getAutoscalingGroupsByTags(tags)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to get ASGs: %v", err)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -257,27 +257,33 @@ func (m *AwsManager) getAutoscalingGroupsByNames(names []string) ([]*autoscaling
|
|||
return asgs, nil
|
||||
}
|
||||
|
||||
func (m *AwsManager) getAutoscalingGroupsByTag(key string) ([]*autoscaling.Group, error) {
|
||||
glog.V(6).Infof("Starting getAutoscalingGroupsByTag with key=%v", key)
|
||||
func (m *AwsManager) getAutoscalingGroupsByTags(keys []string) ([]*autoscaling.Group, error) {
|
||||
glog.V(6).Infof("Starting getAutoscalingGroupsByTag with keys=%v", keys)
|
||||
|
||||
tags := []*autoscaling.TagDescription{}
|
||||
numKeys := len(keys)
|
||||
|
||||
// DescribeTags does an OR query when multiple filters on different tags are specified.
|
||||
// In other words, DescribeTags returns [asg1, asg1] for keys [t1, t2] when there's only one asg tagged both t1 and t2.
|
||||
filters := []*autoscaling.Filter{}
|
||||
for _, key := range keys {
|
||||
filter := &autoscaling.Filter{
|
||||
Name: aws.String("key"),
|
||||
Values: []*string{aws.String(key)},
|
||||
}
|
||||
filters = append(filters, filter)
|
||||
}
|
||||
description, err := m.service.DescribeTags(&autoscaling.DescribeTagsInput{
|
||||
Filters: []*autoscaling.Filter{
|
||||
{
|
||||
Name: aws.String("key"),
|
||||
Values: []*string{aws.String(key)},
|
||||
},
|
||||
},
|
||||
Filters: filters,
|
||||
MaxRecords: aws.Int64(maxRecordsReturnedByAPI),
|
||||
})
|
||||
if err != nil {
|
||||
glog.V(4).Infof("Failed to describe ASG tags for key %s : %v", key, err)
|
||||
glog.V(4).Infof("Failed to describe ASG tags for keys %v : %v", keys, err)
|
||||
return nil, err
|
||||
}
|
||||
if len(description.Tags) < 1 {
|
||||
return nil, fmt.Errorf("Unable to find ASGs for tag key %s", key)
|
||||
return nil, fmt.Errorf("Unable to find ASGs for tag keys %v", keys)
|
||||
}
|
||||
tags := []*autoscaling.TagDescription{}
|
||||
tags = append(tags, description.Tags...)
|
||||
|
||||
for description.NextToken != nil {
|
||||
|
|
@ -286,16 +292,30 @@ func (m *AwsManager) getAutoscalingGroupsByTag(key string) ([]*autoscaling.Group
|
|||
MaxRecords: aws.Int64(maxRecordsReturnedByAPI),
|
||||
})
|
||||
if err != nil {
|
||||
glog.V(4).Infof("Failed to describe ASG tags for key %s: %v", key, err)
|
||||
glog.V(4).Infof("Failed to describe ASG tags for key %v: %v", keys, err)
|
||||
return nil, err
|
||||
}
|
||||
tags = append(tags, description.Tags...)
|
||||
}
|
||||
|
||||
asgNames := []string{}
|
||||
// De-duplicate asg names
|
||||
asgNameOccurrences := map[string]int{}
|
||||
for _, t := range tags {
|
||||
asgName := t.ResourceId
|
||||
asgNames = append(asgNames, *asgName)
|
||||
asgName := *(t.ResourceId)
|
||||
if n, ok := asgNameOccurrences[asgName]; ok {
|
||||
asgNameOccurrences[asgName] = n + 1
|
||||
} else {
|
||||
asgNameOccurrences[asgName] = 1
|
||||
}
|
||||
}
|
||||
// Accordingly to how DescribeTags API works, the result contains ASGs which not all but only subset of tags are associated.
|
||||
// Explicitly select ASGs to which all the tags are associated so that we won't end up calling DescribeAutoScalingGroups API
|
||||
// multiple times on an ASG
|
||||
asgNames := []string{}
|
||||
for asgName, n := range asgNameOccurrences {
|
||||
if n == numKeys {
|
||||
asgNames = append(asgNames, asgName)
|
||||
}
|
||||
}
|
||||
|
||||
asgs, err := m.getAutoscalingGroupsByNames(asgNames)
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ var (
|
|||
cloudConfig = flag.String("cloud-config", "", "The path to the cloud provider configuration file. Empty string for no configuration file.")
|
||||
configMapName = flag.String("configmap", "", "The name of the ConfigMap containing settings used for dynamic reconfiguration. Empty string for no ConfigMap.")
|
||||
namespace = flag.String("namespace", "kube-system", "Namespace in which cluster-autoscaler run. If a --configmap flag is also provided, ensure that the configmap exists in this namespace before CA runs.")
|
||||
nodeGroupAutoDiscovery = flag.String("node-group-auto-discovery", "", "One or more definition(s) of node group auto-discovery. A definition is expressed `<name of discoverer per cloud provider>:[<key>[=<value>]]`. Only the `aws` cloud provider is currently supported. The only valid discoverer for it is `asg` and the valid key is `tag`. For example, specifying `--cloud-provider aws` and `--node-group-auto-discovery asg:tag=cluster-autoscaler/auto-discovery/enabled` resuls in ASGs tagged with `cluster-autoscaler/auto-discovery/enabled` to be considered as target node groups")
|
||||
nodeGroupAutoDiscovery = flag.String("node-group-auto-discovery", "", "One or more definition(s) of node group auto-discovery. A definition is expressed `<name of discoverer per cloud provider>:[<key>[=<value>]]`. Only the `aws` cloud provider is currently supported. The only valid discoverer for it is `asg` and the valid key is `tag`. For example, specifying `--cloud-provider aws` and `--node-group-auto-discovery asg:tag=cluster-autoscaler/auto-discovery/enabled,kubernetes.io/cluster/<YOUR CLUSTER NAME>` results in ASGs tagged with `cluster-autoscaler/auto-discovery/enabled` and `kubernetes.io/cluster/<YOUR CLUSTER NAME>` to be considered as target node groups")
|
||||
verifyUnschedulablePods = flag.Bool("verify-unschedulable-pods", true,
|
||||
"If enabled CA will ensure that each pod marked by Scheduler as unschedulable actually can't be scheduled on any node."+
|
||||
"This prevents from adding unnecessary nodes in situation when CA and Scheduler have different configuration.")
|
||||
|
|
|
|||
Loading…
Reference in New Issue