diff --git a/cluster-autoscaler/cloudprovider/aws/README.md b/cluster-autoscaler/cloudprovider/aws/README.md index 2203f95b24..8977f7d63b 100644 --- a/cluster-autoscaler/cloudprovider/aws/README.md +++ b/cluster-autoscaler/cloudprovider/aws/README.md @@ -5,7 +5,9 @@ The cluster autoscaler on AWS scales worker nodes within any specified autoscali Cluster autoscaler must run on v1.3.0 or greater. ## Permissions -The worker running the cluster autoscaler will need access to certain resources and actions: +The worker running the cluster autoscaler will need access to certain resources and actions. + +A minimum IAM policy would look like: ```json { "Version": "2012-10-17", @@ -23,6 +25,28 @@ The worker running the cluster autoscaler will need access to certain resources ] } ``` + +If you'd like to auto-discover node groups by specifing the `--node-group-auto-discover` flag, a `DescribeTags` permission is also required: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "autoscaling:DescribeAutoScalingGroups", + "autoscaling:DescribeAutoScalingInstances", + "autoscaling:DescribeAutoScalingTags", + "autoscaling:SetDesiredCapacity", + "autoscaling:TerminateInstanceInAutoScalingGroup" + ], + "Resource": "*" + } + ] +} +``` + Unfortunately AWS does not support ARNs for autoscaling groups yet so you must use "*" as the resource. More information [here](http://docs.aws.amazon.com/autoscaling/latest/userguide/IAM.html#UsingWithAutoScaling_Actions). ## Deployment Specification @@ -131,6 +155,64 @@ spec: path: "/etc/ssl/certs/ca-certificates.crt" ``` +### Auto-Discovery Setup + +As of version v0.5.1, docker images including the support for `--node-group-auto-discovery` is not yet published to official repository. +Please checkout the latest source of this project locally and run `REGISTRY= make release` to build and push an image yourself. +Then, a manifest like below would run a cluster-autoscaler which auto-discovers ASGs tagged with `k8s.io/cluster-autoscaler/enabled` to be node groups. +Please notice that there are no `--nodes` flags passed to cluster-autoscaler in this setup. + +```yaml +--- +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + labels: + app: cluster-autoscaler + spec: + containers: + - image: /cluster-autoscaler:dev + name: cluster-autoscaler + resources: + limits: + cpu: 100m + memory: 300Mi + requests: + cpu: 100m + memory: 300Mi + command: + - ./cluster-autoscaler + - --v=4 + - --stderrthreshold=info + - --cloud-provider=aws + - --skip-nodes-with-local-storage=false + - --expander=least-waste + - --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled + env: + - name: AWS_REGION + value: us-east-1 + volumeMounts: + - name: ssl-certs + mountPath: /etc/ssl/certs/ca-certificates.crt + readOnly: true + imagePullPolicy: "Always" + volumes: + - name: ssl-certs + hostPath: + path: "/etc/ssl/certs/ca-certificates.crt" +``` + Common Notes and Gotchas: - The `/etc/ssl/certs/ca-certificates.crt` should exist by default on your ec2 instance. - Cluster autoscaler is not zone aware (for now), so if you wish to span multiple availability zones in your autoscaling groups beware that cluster autoscaler will not evenly distribute them. For more information, see https://github.com/kubernetes/contrib/pull/1552#discussion_r75533090. diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go index 28fc1b0441..19839843b9 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go @@ -17,6 +17,7 @@ limitations under the License. package aws import ( + "errors" "fmt" "regexp" "strings" @@ -27,15 +28,62 @@ import ( "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" ) -// AwsCloudProvider implements CloudProvider interface. -type AwsCloudProvider struct { +// awsCloudProvider implements CloudProvider interface. +type awsCloudProvider struct { awsManager *AwsManager asgs []*Asg } // BuildAwsCloudProvider builds CloudProvider implementation for AWS. -func BuildAwsCloudProvider(awsManager *AwsManager, specs []string) (*AwsCloudProvider, error) { - aws := &AwsCloudProvider{ +func BuildAwsCloudProvider(awsManager *AwsManager, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions) (cloudprovider.CloudProvider, error) { + if err := discoveryOpts.Validate(); err != nil { + return nil, fmt.Errorf("Failed to build an aws cloud provider: %v", err) + } + if discoveryOpts.StaticDiscoverySpecified() { + return buildStaticallyDiscoveringProvider(awsManager, discoveryOpts.NodeGroupSpecs) + } + if discoveryOpts.AutoDiscoverySpecified() { + return buildAutoDiscoveringProvider(awsManager, discoveryOpts.NodeGroupAutoDiscoverySpec) + } + return nil, errors.New("Failed to build an aws cloud provider: Either node group specs or node group auto discovery spec must be specified") +} + +func buildAutoDiscoveringProvider(awsManager *AwsManager, spec string) (*awsCloudProvider, error) { + tokens := strings.Split(spec, ":") + if len(tokens) != 2 { + return nil, fmt.Errorf("Invalid node group auto discovery spec specified via --node-group-auto-discovery: %s", spec) + } + discoverer := tokens[0] + if discoverer != "asg" { + return nil, fmt.Errorf("Unsupported discoverer specified: %s", discoverer) + } + param := tokens[1] + paramTokens := strings.Split(param, "=") + parameterKey := paramTokens[0] + if parameterKey != "tag" { + return nil, fmt.Errorf("Unsupported parameter key \"%s\" is specified for discoverer \"%s\". The only supported key is \"tag\"", parameterKey, discoverer) + } + tag := paramTokens[1] + if tag == "" { + return nil, errors.New("Invalid ASG tag for auto discovery specified: ASG tag must not be empty") + } + asgs, err := awsManager.getAutoscalingGroupsByTag(tag) + if err != nil { + return nil, fmt.Errorf("Failed to get ASGs: %v", err) + } + + aws := &awsCloudProvider{ + awsManager: awsManager, + asgs: make([]*Asg, 0), + } + for _, asg := range asgs { + aws.addAsg(buildAsg(aws.awsManager, int(*asg.MinSize), int(*asg.MaxSize), *asg.AutoScalingGroupName)) + } + return aws, nil +} + +func buildStaticallyDiscoveringProvider(awsManager *AwsManager, specs []string) (*awsCloudProvider, error) { + aws := &awsCloudProvider{ awsManager: awsManager, asgs: make([]*Asg, 0), } @@ -49,23 +97,28 @@ func BuildAwsCloudProvider(awsManager *AwsManager, specs []string) (*AwsCloudPro // addNodeGroup adds node group defined in string spec. Format: // minNodes:maxNodes:asgName -func (aws *AwsCloudProvider) addNodeGroup(spec string) error { - asg, err := buildAsg(spec, aws.awsManager) +func (aws *awsCloudProvider) addNodeGroup(spec string) error { + asg, err := buildAsgFromSpec(spec, aws.awsManager) if err != nil { return err } - aws.asgs = append(aws.asgs, asg) - aws.awsManager.RegisterAsg(asg) + aws.addAsg(asg) return nil } +// addAsg adds and registers an asg to this cloud provider +func (aws *awsCloudProvider) addAsg(asg *Asg) { + aws.asgs = append(aws.asgs, asg) + aws.awsManager.RegisterAsg(asg) +} + // Name returns name of the cloud provider. -func (aws *AwsCloudProvider) Name() string { +func (aws *awsCloudProvider) Name() string { return "aws" } // NodeGroups returns all node groups configured for this cloud provider. -func (aws *AwsCloudProvider) NodeGroups() []cloudprovider.NodeGroup { +func (aws *awsCloudProvider) NodeGroups() []cloudprovider.NodeGroup { result := make([]cloudprovider.NodeGroup, 0, len(aws.asgs)) for _, asg := range aws.asgs { result = append(result, asg) @@ -74,7 +127,7 @@ func (aws *AwsCloudProvider) NodeGroups() []cloudprovider.NodeGroup { } // NodeGroupForNode returns the node group for the given node. -func (aws *AwsCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) { +func (aws *awsCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) { ref, err := AwsRefFromProviderId(node.Spec.ProviderID) if err != nil { return nil, err @@ -233,21 +286,25 @@ func (asg *Asg) TemplateNodeInfo() (*schedulercache.NodeInfo, error) { return nil, cloudprovider.ErrNotImplemented } -func buildAsg(value string, awsManager *AwsManager) (*Asg, error) { +func buildAsgFromSpec(value string, awsManager *AwsManager) (*Asg, error) { spec, err := dynamic.SpecFromString(value) if err != nil { return nil, fmt.Errorf("failed to parse node group spec: %v", err) } - asg := Asg{ + asg := buildAsg(awsManager, spec.MinSize, spec.MaxSize, spec.Name) + + return asg, nil +} + +func buildAsg(awsManager *AwsManager, minSize int, maxSize int, name string) *Asg { + return &Asg{ awsManager: awsManager, - minSize: spec.MinSize, - maxSize: spec.MaxSize, + minSize: minSize, + maxSize: maxSize, AwsRef: AwsRef{ - Name: spec.Name, + Name: name, }, } - - return &asg, nil } diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go index 9544116daf..d535fb3977 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go @@ -48,6 +48,20 @@ func (a *AutoScalingMock) DescribeAutoScalingGroups(i *autoscaling.DescribeAutoS }, nil } +func (a *AutoScalingMock) DescribeTags(i *autoscaling.DescribeTagsInput) (*autoscaling.DescribeTagsOutput, error) { + return &autoscaling.DescribeTagsOutput{ + Tags: []*autoscaling.TagDescription{ + { + Key: aws.String("foo"), + Value: aws.String("bar"), + ResourceId: aws.String("asg-123456"), + ResourceType: aws.String("auto-scaling-group"), + PropagateAtLaunch: aws.Bool(false), + }, + }, + }, nil +} + func (a *AutoScalingMock) SetDesiredCapacity(input *autoscaling.SetDesiredCapacityInput) (*autoscaling.SetDesiredCapacityOutput, error) { args := a.Called(input) return args.Get(0).(*autoscaling.SetDesiredCapacityOutput), nil @@ -64,18 +78,18 @@ var testAwsManager = &AwsManager{ asgCache: make(map[AwsRef]*Asg), } -func testProvider(t *testing.T, m *AwsManager) *AwsCloudProvider { - provider, err := BuildAwsCloudProvider(m, nil) +func testProvider(t *testing.T, m *AwsManager) *awsCloudProvider { + provider, err := buildStaticallyDiscoveringProvider(m, nil) assert.NoError(t, err) return provider } func TestBuildAwsCloudProvider(t *testing.T) { m := testAwsManager - _, err := BuildAwsCloudProvider(m, []string{"bad spec"}) + _, err := buildStaticallyDiscoveringProvider(m, []string{"bad spec"}) assert.Error(t, err) - _, err = BuildAwsCloudProvider(m, nil) + _, err = buildStaticallyDiscoveringProvider(m, nil) assert.NoError(t, err) } @@ -261,16 +275,16 @@ func TestDebug(t *testing.T) { } func TestBuildAsg(t *testing.T) { - _, err := buildAsg("a", nil) + _, err := buildAsgFromSpec("a", nil) assert.Error(t, err) - _, err = buildAsg("a:b:c", nil) + _, err = buildAsgFromSpec("a:b:c", nil) assert.Error(t, err) - _, err = buildAsg("1:", nil) + _, err = buildAsgFromSpec("1:", nil) assert.Error(t, err) - _, err = buildAsg("1:2:", nil) + _, err = buildAsgFromSpec("1:2:", nil) assert.Error(t, err) - asg, err := buildAsg("111:222:test-name", nil) + asg, err := buildAsgFromSpec("111:222:test-name", nil) assert.NoError(t, err) assert.Equal(t, 111, asg.MinSize()) assert.Equal(t, 222, asg.MaxSize()) diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager.go b/cluster-autoscaler/cloudprovider/aws/aws_manager.go index 29f292e267..2167c3058d 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_manager.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_manager.go @@ -17,6 +17,7 @@ limitations under the License. package aws import ( + "errors" "fmt" "io" "sync" @@ -33,8 +34,9 @@ import ( ) const ( - operationWaitTimeout = 5 * time.Second - operationPollInterval = 100 * time.Millisecond + operationWaitTimeout = 5 * time.Second + operationPollInterval = 100 * time.Millisecond + maxRecordsReturnedByAPI = 100 ) type asgInformation struct { @@ -44,6 +46,7 @@ type asgInformation struct { type autoScaling interface { DescribeAutoScalingGroups(input *autoscaling.DescribeAutoScalingGroupsInput) (*autoscaling.DescribeAutoScalingGroupsOutput, error) + DescribeTags(input *autoscaling.DescribeTagsInput) (*autoscaling.DescribeTagsOutput, error) SetDesiredCapacity(input *autoscaling.SetDesiredCapacityInput) (*autoscaling.SetDesiredCapacityOutput, error) TerminateInstanceInAutoScalingGroup(input *autoscaling.TerminateInstanceInAutoScalingGroupInput) (*autoscaling.TerminateInstanceInAutoScalingGroupOutput, error) } @@ -216,6 +219,95 @@ func (m *AwsManager) getAutoscalingGroup(name string) (*autoscaling.Group, error return groups.AutoScalingGroups[0], nil } +func (m *AwsManager) getAutoscalingGroupsByNames(names []string) ([]*autoscaling.Group, error) { + glog.V(6).Infof("Starting getAutoscalingGroupsByNames with names=%v", names) + + nameRefs := []*string{} + for _, n := range names { + nameRefs = append(nameRefs, aws.String(n)) + } + params := &autoscaling.DescribeAutoScalingGroupsInput{ + AutoScalingGroupNames: nameRefs, + MaxRecords: aws.Int64(maxRecordsReturnedByAPI), + } + description, err := m.service.DescribeAutoScalingGroups(params) + if err != nil { + glog.V(4).Infof("Failed to describe ASGs : %v", err) + return nil, err + } + if len(description.AutoScalingGroups) < 1 { + return nil, errors.New("No ASGs found") + } + + asgs := description.AutoScalingGroups + for description.NextToken != nil { + description, err = m.service.DescribeAutoScalingGroups(&autoscaling.DescribeAutoScalingGroupsInput{ + NextToken: description.NextToken, + MaxRecords: aws.Int64(maxRecordsReturnedByAPI), + }) + if err != nil { + glog.V(4).Infof("Failed to describe ASGs : %v", err) + return nil, err + } + asgs = append(asgs, description.AutoScalingGroups...) + } + + glog.V(6).Infof("Finishing getAutoscalingGroupsByNames asgs=%v", asgs) + + return asgs, nil +} + +func (m *AwsManager) getAutoscalingGroupsByTag(key string) ([]*autoscaling.Group, error) { + glog.V(6).Infof("Starting getAutoscalingGroupsByTag with key=%v", key) + + tags := []*autoscaling.TagDescription{} + + description, err := m.service.DescribeTags(&autoscaling.DescribeTagsInput{ + Filters: []*autoscaling.Filter{ + { + Name: aws.String("key"), + Values: []*string{aws.String(key)}, + }, + }, + MaxRecords: aws.Int64(maxRecordsReturnedByAPI), + }) + if err != nil { + glog.V(4).Infof("Failed to describe ASG tags for key %s : %v", key, err) + return nil, err + } + if len(description.Tags) < 1 { + return nil, fmt.Errorf("Unable to find ASGs for tag key %s", key) + } + tags = append(tags, description.Tags...) + + for description.NextToken != nil { + description, err = m.service.DescribeTags(&autoscaling.DescribeTagsInput{ + NextToken: description.NextToken, + MaxRecords: aws.Int64(maxRecordsReturnedByAPI), + }) + if err != nil { + glog.V(4).Infof("Failed to describe ASG tags for key %s: %v", key, err) + return nil, err + } + tags = append(tags, description.Tags...) + } + + asgNames := []string{} + for _, t := range tags { + asgName := t.ResourceId + asgNames = append(asgNames, *asgName) + } + + asgs, err := m.getAutoscalingGroupsByNames(asgNames) + if err != nil { + return nil, err + } + + glog.V(6).Infof("Finishing getAutoscalingGroupsByTag with asgs=%v", asgs) + + return asgs, nil +} + // GetAsgNodes returns Asg nodes. func (m *AwsManager) GetAsgNodes(asg *Asg) ([]string, error) { result := make([]string, 0) diff --git a/cluster-autoscaler/cloudprovider/builder/cloud_provider_builder.go b/cluster-autoscaler/cloudprovider/builder/cloud_provider_builder.go index 5eb030194b..4c60ffe9ae 100644 --- a/cluster-autoscaler/cloudprovider/builder/cloud_provider_builder.go +++ b/cluster-autoscaler/cloudprovider/builder/cloud_provider_builder.go @@ -41,10 +41,12 @@ func NewCloudProviderBuilder(cloudProviderFlag string, cloudConfig string) Cloud } // Build a cloud provider from static settings contained in the builder and dynamic settings passed via args -func (b CloudProviderBuilder) Build(nodeGroupsFlag []string) cloudprovider.CloudProvider { +func (b CloudProviderBuilder) Build(discoveryOpts cloudprovider.NodeGroupDiscoveryOptions) cloudprovider.CloudProvider { var err error var cloudProvider cloudprovider.CloudProvider + nodeGroupsFlag := discoveryOpts.NodeGroupSpecs + if b.cloudProviderFlag == "gce" { // GCE Manager var gceManager *gce.GceManager @@ -84,7 +86,7 @@ func (b CloudProviderBuilder) Build(nodeGroupsFlag []string) cloudprovider.Cloud if awsError != nil { glog.Fatalf("Failed to create AWS Manager: %v", err) } - cloudProvider, err = aws.BuildAwsCloudProvider(awsManager, nodeGroupsFlag) + cloudProvider, err = aws.BuildAwsCloudProvider(awsManager, discoveryOpts) if err != nil { glog.Fatalf("Failed to create AWS cloud provider: %v", err) } diff --git a/cluster-autoscaler/cloudprovider/node_group_discovery_options.go b/cluster-autoscaler/cloudprovider/node_group_discovery_options.go new file mode 100644 index 0000000000..ab98324323 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/node_group_discovery_options.go @@ -0,0 +1,45 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cloudprovider + +import "fmt" + +// NodeGroupDiscoveryOptions contains various options to configure how a cloud provider discovers node groups +type NodeGroupDiscoveryOptions struct { + // NodeGroupSpecs is specified to statically discover node groups listed in it + NodeGroupSpecs []string + // NodeGroupAutoDiscoverySpec is specified for automatically discovering node groups according to the specs + NodeGroupAutoDiscoverySpec string +} + +// StaticDiscoverySpecified returns true only when there are 1 or more --nodes flags are specified +func (o NodeGroupDiscoveryOptions) StaticDiscoverySpecified() bool { + return len(o.NodeGroupSpecs) > 0 +} + +// AutoDiscoverySpecified returns true only when there is --node-group-auto-discovery specified +func (o NodeGroupDiscoveryOptions) AutoDiscoverySpecified() bool { + return o.NodeGroupAutoDiscoverySpec != "" +} + +// Validate returns and error when both --nodes and --node-group-auto-discovery are specified +func (o NodeGroupDiscoveryOptions) Validate() error { + if o.StaticDiscoverySpecified() && o.AutoDiscoverySpecified() { + return fmt.Errorf("Either node group specs(%v) or node group auto discovery spec(%v) can be specified but not both", o.NodeGroupSpecs, o.NodeGroupAutoDiscoverySpec) + } + return nil +} diff --git a/cluster-autoscaler/cloudprovider/node_group_discovery_options_test.go b/cluster-autoscaler/cloudprovider/node_group_discovery_options_test.go new file mode 100644 index 0000000000..ce15635c79 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/node_group_discovery_options_test.go @@ -0,0 +1,38 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cloudprovider + +import ( + "fmt" + "testing" +) + +func TestNodeGroupDiscoveryOptionsValidate(t *testing.T) { + o := NodeGroupDiscoveryOptions{ + NodeGroupAutoDiscoverySpec: "asg:tag=foobar", + NodeGroupSpecs: []string{"myasg:0:10"}, + } + + err := o.Validate() + if err == nil { + t.Errorf("Expected validation error didn't occur with NodeGroupDiscoveryOptions: %+v", o) + t.FailNow() + } + if msg := fmt.Sprintf("%v", err); msg != `Either node group specs([myasg:0:10]) or node group auto discovery spec(asg:tag=foobar) can be specified but not both` { + t.Errorf("Unexpected validation error message: %s", msg) + } +} diff --git a/cluster-autoscaler/core/autoscaler.go b/cluster-autoscaler/core/autoscaler.go index 1a0152f724..966f11147c 100644 --- a/cluster-autoscaler/core/autoscaler.go +++ b/cluster-autoscaler/core/autoscaler.go @@ -19,6 +19,7 @@ package core import ( "time" + "github.com/golang/glog" "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" "k8s.io/autoscaler/cluster-autoscaler/simulator" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" @@ -47,8 +48,14 @@ type Autoscaler interface { func NewAutoscaler(opts AutoscalerOptions, predicateChecker *simulator.PredicateChecker, kubeClient kube_client.Interface, kubeEventRecorder kube_record.EventRecorder, listerRegistry kube_util.ListerRegistry) Autoscaler { autoscalerBuilder := NewAutoscalerBuilder(opts.AutoscalingOptions, predicateChecker, kubeClient, kubeEventRecorder, listerRegistry) if opts.ConfigMapName != "" { + if opts.NodeGroupAutoDiscovery != "" { + glog.Warning("Both --configmap and --node-group-auto-discovery were specified but only the former is going to take effect") + } configFetcher := dynamic.NewConfigFetcher(opts.ConfigFetcherOptions, kubeClient, kubeEventRecorder) return NewDynamicAutoscaler(autoscalerBuilder, configFetcher) } + if opts.NodeGroupAutoDiscovery != "" { + return NewPollingAutoscaler(autoscalerBuilder) + } return autoscalerBuilder.Build() } diff --git a/cluster-autoscaler/core/autoscaling_context.go b/cluster-autoscaler/core/autoscaling_context.go index e8013f7cad..349eea953a 100644 --- a/cluster-autoscaler/core/autoscaling_context.go +++ b/cluster-autoscaler/core/autoscaling_context.go @@ -67,6 +67,8 @@ type AutoscalingOptions struct { ScaleDownUnreadyTime time.Duration // MaxNodesTotal sets the maximum number of nodes in the whole cluster MaxNodesTotal int + // NodeGroupAutoDiscovery represents one or more definition(s) of node group auto-discovery + NodeGroupAutoDiscovery string // UnregisteredNodeRemovalTime represents how long CA waits before removing nodes that are not registered in Kubernetes") UnregisteredNodeRemovalTime time.Duration // EstimatorName is the estimator used to estimate the number of needed nodes in scale up. @@ -105,7 +107,10 @@ type AutoscalingOptions struct { func NewAutoscalingContext(options AutoscalingOptions, predicateChecker *simulator.PredicateChecker, kubeClient kube_client.Interface, kubeEventRecorder kube_record.EventRecorder, logEventRecorder *utils.LogEventRecorder) *AutoscalingContext { cloudProviderBuilder := builder.NewCloudProviderBuilder(options.CloudProviderName, options.CloudConfig) - cloudProvider := cloudProviderBuilder.Build(options.NodeGroups) + cloudProvider := cloudProviderBuilder.Build(cloudprovider.NodeGroupDiscoveryOptions{ + NodeGroupSpecs: options.NodeGroups, + NodeGroupAutoDiscoverySpec: options.NodeGroupAutoDiscovery, + }) expanderStrategy := factory.ExpanderStrategyFromString(options.ExpanderName) clusterStateConfig := clusterstate.ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: options.MaxTotalUnreadyPercentage, diff --git a/cluster-autoscaler/core/polling_autoscaler.go b/cluster-autoscaler/core/polling_autoscaler.go new file mode 100644 index 0000000000..33c25005c7 --- /dev/null +++ b/cluster-autoscaler/core/polling_autoscaler.go @@ -0,0 +1,69 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +import ( + "time" + + "github.com/golang/glog" + "k8s.io/autoscaler/cluster-autoscaler/metrics" +) + +// PollingAutoscaler is a variant of autoscaler which polls the source-of-truth every time RunOnce is invoked +type PollingAutoscaler struct { + autoscaler Autoscaler + autoscalerBuilder AutoscalerBuilder +} + +// NewPollingAutoscaler builds a PollingAutoscaler from required parameters +func NewPollingAutoscaler(autoscalerBuilder AutoscalerBuilder) *PollingAutoscaler { + return &PollingAutoscaler{ + autoscaler: autoscalerBuilder.Build(), + autoscalerBuilder: autoscalerBuilder, + } +} + +// CleanUp does the work required before all the iterations of a polling autoscaler run +func (a *PollingAutoscaler) CleanUp() { + a.autoscaler.CleanUp() +} + +// ExitCleanUp cleans-up after autoscaler, so no mess remains after process termination. +func (a *PollingAutoscaler) ExitCleanUp() { + a.autoscaler.ExitCleanUp() +} + +// RunOnce represents a single iteration of a polling autoscaler inside the CA's control-loop +func (a *PollingAutoscaler) RunOnce(currentTime time.Time) { + reconfigureStart := time.Now() + metrics.UpdateLastTime("poll") + if err := a.Poll(); err != nil { + glog.Errorf("Failed to poll : %v", err) + } + metrics.UpdateDuration("poll", reconfigureStart) + a.autoscaler.RunOnce(currentTime) +} + +// Poll latest data from cloud provider to recreate this autoscaler +func (a *PollingAutoscaler) Poll() error { + // For safety, any config change should stop and recreate all the stuff running in CA hence recreating all the Autoscaler instance here + // See https://github.com/kubernetes/contrib/pull/2226#discussion_r94126064 + a.autoscaler = a.autoscalerBuilder.Build() + glog.V(4).Infof("Poll finished") + + return nil +} diff --git a/cluster-autoscaler/core/polling_autoscaler_test.go b/cluster-autoscaler/core/polling_autoscaler_test.go new file mode 100644 index 0000000000..35024496a4 --- /dev/null +++ b/cluster-autoscaler/core/polling_autoscaler_test.go @@ -0,0 +1,44 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +import ( + "testing" + "time" + + "github.com/stretchr/testify/mock" +) + +func TestRunOnce(t *testing.T) { + currentTime := time.Now() + + initialAutoscaler := &AutoscalerMock{} + + newAutoscaler := &AutoscalerMock{} + newAutoscaler.On("RunOnce", currentTime).Once() + + builder := &AutoscalerBuilderMock{} + builder.On("Build").Return(initialAutoscaler).Once() + builder.On("Build").Return(newAutoscaler).Once() + + a := NewPollingAutoscaler(builder) + a.RunOnce(currentTime) + + initialAutoscaler.AssertNotCalled(t, "RunOnce", mock.AnythingOfType("time.Time")) + newAutoscaler.AssertExpectations(t) + builder.AssertExpectations(t) +} diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index cad3a26e4f..7270204428 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -66,6 +66,7 @@ var ( cloudConfig = flag.String("cloud-config", "", "The path to the cloud provider configuration file. Empty string for no configuration file.") configMapName = flag.String("configmap", "", "The name of the ConfigMap containing settings used for dynamic reconfiguration. Empty string for no ConfigMap.") namespace = flag.String("namespace", "kube-system", "Namespace in which cluster-autoscaler run. If a --configmap flag is also provided, ensure that the configmap exists in this namespace before CA runs.") + nodeGroupAutoDiscovery = flag.String("node-group-auto-discovery", "", "One or more definition(s) of node group auto-discovery. A definition is expressed `:[[=]]`. Only the `aws` cloud provider is currently supported. The only valid discoverer for it is `asg` and the valid key is `tag`. For example, specifying `--cloud-provider aws` and `--node-group-auto-discovery asg:tag=cluster-autoscaler/auto-discovery/enabled` resuls in ASGs tagged with `cluster-autoscaler/auto-discovery/enabled` to be considered as target node groups") verifyUnschedulablePods = flag.Bool("verify-unschedulable-pods", true, "If enabled CA will ensure that each pod marked by Scheduler as unschedulable actually can't be scheduled on any node."+ "This prevents from adding unnecessary nodes in situation when CA and Scheduler have different configuration.") @@ -103,6 +104,7 @@ func createAutoscalerOptions() core.AutoscalerOptions { autoscalingOpts := core.AutoscalingOptions{ CloudConfig: *cloudConfig, CloudProviderName: *cloudProviderFlag, + NodeGroupAutoDiscovery: *nodeGroupAutoDiscovery, MaxTotalUnreadyPercentage: *maxTotalUnreadyPercentage, OkTotalUnreadyCount: *okTotalUnreadyCount, EstimatorName: *estimatorFlag,