From 81f5d80faead3359703ca793a7a6eb33c8816b88 Mon Sep 17 00:00:00 2001 From: andrewsykim Date: Tue, 17 Jan 2017 11:22:37 -0500 Subject: [PATCH 1/2] Update AWS docs to include --expander option flag and other common notes and gotchas --- .../cloudprovider/aws/README.md | 70 +++++++++++++++++-- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/aws/README.md b/cluster-autoscaler/cloudprovider/aws/README.md index a7dd081bcf..e1d611948c 100644 --- a/cluster-autoscaler/cloudprovider/aws/README.md +++ b/cluster-autoscaler/cloudprovider/aws/README.md @@ -1,5 +1,5 @@ # Cluster Autoscaler on AWS -The cluster autoscaler on AWS scales worker nodes within an autoscaling group. It will run as a `Deployment` in your cluster. This README will go over some of the necessary steps required to get the cluster autoscaler up and running. +The cluster autoscaler on AWS scales worker nodes within any specified autoscaling group. It will run as a `Deployment` in your cluster. This README will go over some of the necessary steps required to get the cluster autoscaler up and running. ## Kubernetes Version Cluster autoscaler must run on v1.3.0 or greater. @@ -26,13 +26,15 @@ The worker running the cluster autoscaler will need access to certain resources Unfortunately AWS does not support ARNs for autoscaling groups yet so you must use "*" as the resource. More information [here](http://docs.aws.amazon.com/autoscaling/latest/userguide/IAM.html#UsingWithAutoScaling_Actions). ## Deployment Specification -Your deployment configuration should look something like this: + +### 1 ASG Setup (min: 1, max: 10, ASG Name: k8s-worker-asg-1) ```yaml --- apiVersion: extensions/v1beta1 kind: Deployment metadata: name: cluster-autoscaler + namespace: kube-system labels: app: cluster-autoscaler spec: @@ -46,7 +48,7 @@ spec: app: cluster-autoscaler spec: containers: - - image: {{ YOUR IMAGE HERE }} + - image: gcr.io/google_containers/cluster-autoscaler:v0.4.0 name: cluster-autoscaler resources: limits: @@ -60,7 +62,7 @@ spec: - --v=4 - --cloud-provider=aws - --skip-nodes-with-local-storage=false - - --nodes={{ ASG MIN e.g. 1 }}:{{ASG MAX e.g. 5}}:{{ASG NAME e.g. k8s-worker-asg}} + - --nodes=1:10:k8s-worker-asg-1 env: - name: AWS_REGION value: us-east-1 @@ -74,6 +76,62 @@ spec: hostPath: path: "/etc/ssl/certs/ca-certificates.crt" ``` -Note: + +### Multiple ASG Setup +```yaml +--- +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + labels: + app: cluster-autoscaler + spec: + containers: + - image: gcr.io/google_containers/cluster-autoscaler:v0.4.0 + name: cluster-autoscaler + resources: + limits: + cpu: 100m + memory: 300Mi + requests: + cpu: 100m + memory: 300Mi + command: + - ./cluster-autoscaler + - --v=4 + - --cloud-provider=aws + - --skip-nodes-with-local-storage=false + - --expander=least-waste + - --nodes=1:10:k8s-worker-asg-1 + - --nodes=1:3:k8s-worker-asg-2 + env: + - name: AWS_REGION + value: us-east-1 + volumeMounts: + - name: ssl-certs + mountPath: /etc/ssl/certs/ca-certificates.crt + readOnly: true + imagePullPolicy: "Always" + volumes: + - name: ssl-certs + hostPath: + path: "/etc/ssl/certs/ca-certificates.crt" +``` + +Common Notes and Gotchas: - The `/etc/ssl/certs/ca-certificates.crt` should exist by default on your ec2 instance. -- The autoscaling group should span 1 availability zone for the cluster autoscaler to work. If you want to distribute workloads evenly across zones, set up multiple ASGs, with a cluster autoscaler for each ASG. At the time of writing this, cluster autoscaler is unaware of availability zones and although autoscaling groups can contain instances in multiple availability zones when configured so, the cluster autoscaler can't reliably add nodes to desired zones. That's because AWS AutoScaling determines which zone to add nodes which is out of the control of the cluster autoscaler. For more information, see https://github.com/kubernetes/contrib/pull/1552#discussion_r75533090. +- Cluster autoscaler is not zone aware (for now), so if you wish to span multiple availability zones in your autoscaling groups beware that cluster autoscaler will not evenly distribute them. For more information, see https://github.com/kubernetes/contrib/pull/1552#discussion_r75533090. +- By default, cluster autoscaler will not terminate nodes running pods in the kube-system namespace. You can override this default behaviour by passing in the `--skip-nodes-with-system-pods=false` flag. +- By default, cluster autoscaler will wait 10 minutes between scale down operations, you can adjust this using the `--scale-down-delay` flag. E.g. `--scale-down-delay=5m` to decrease the scale down delay to 5 minutes. +- If you're running multiple ASGs, the `--expander` flag supports three options: `random`, `most-pods` and `least-waste`. `random` will expand a random ASG on scale up. `most-pods` will scale up the ASG that will scheduable the most amount of pods. `least-waste` will expand the ASG that will waste the least amount of CPU/MEM resources. In the event of a tie, cluster autoscaler will fall back to `random`. From 07c753ba9a2e1f554ca96a71b02d90c7da0dfe47 Mon Sep 17 00:00:00 2001 From: andrewsykim Date: Tue, 17 Jan 2017 12:57:01 -0500 Subject: [PATCH 2/2] cluster autoscaler: fix typos --- cluster-autoscaler/clusterstate/clusterstate.go | 10 +++++----- cluster-autoscaler/utils.go | 15 +++++++-------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/cluster-autoscaler/clusterstate/clusterstate.go b/cluster-autoscaler/clusterstate/clusterstate.go index be7c51c0c1..e7ab085f3e 100644 --- a/cluster-autoscaler/clusterstate/clusterstate.go +++ b/cluster-autoscaler/clusterstate/clusterstate.go @@ -80,7 +80,7 @@ type IncorrectNodeGroupSize struct { ExpectedSize int // CurrentSize is the size of the node group measured on the kubernetes side. CurrentSize int - // FirstObserved is the time whtn the given difference occurred. + // FirstObserved is the time when the given difference occurred. FirstObserved time.Time } @@ -89,8 +89,8 @@ type IncorrectNodeGroupSize struct { type UnregisteredNode struct { // Node is a dummy node that contains only the name of the node. Node *apiv1.Node - // UnregisteredSice is the time when the node was first spotted. - UnregisteredSice time.Time + // UnregisteredSince is the time when the node was first spotted. + UnregisteredSince time.Time } // ClusterStateRegistry is a structure to keep track the current state of the cluster. @@ -254,7 +254,7 @@ type AcceptableRange struct { CurrentTarget int } -// calculateAcceptableRanges calcualtes how many nodes can be in a cluster. +// updateAcceptableRanges updates cluster state registry with how many nodes can be in a cluster. // The function assumes that the nodeGroup.TargetSize() is the desired number of nodes. // So if there has been a recent scale up of size 5 then there should be between targetSize-5 and targetSize // nodes in ready state. In the same way, if there have been 3 nodes removed recently then @@ -477,7 +477,7 @@ func getNotRegisteredNodes(allNodes []*apiv1.Node, cloudProvider cloudprovider.C ProviderID: node, }, }, - UnregisteredSice: time, + UnregisteredSince: time, }) } } diff --git a/cluster-autoscaler/utils.go b/cluster-autoscaler/utils.go index 907ec17e3d..6df84b43fe 100644 --- a/cluster-autoscaler/utils.go +++ b/cluster-autoscaler/utils.go @@ -223,13 +223,13 @@ func GetNodeInfosForGroups(nodes []*apiv1.Node, cloudProvider cloudprovider.Clou } // Removes unregisterd nodes if needed. Returns true if anything was removed and error if such occurred. -func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNode, contetxt *AutoscalingContext, +func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNode, context *AutoscalingContext, currentTime time.Time) (bool, error) { removedAny := false for _, unregisteredNode := range unregisteredNodes { - if unregisteredNode.UnregisteredSice.Add(contetxt.UnregisteredNodeRemovalTime).Before(currentTime) { + if unregisteredNode.UnregisteredSince.Add(context.UnregisteredNodeRemovalTime).Before(currentTime) { glog.V(0).Infof("Removing unregistered node %v", unregisteredNode.Node.Name) - nodeGroup, err := contetxt.CloudProvider.NodeGroupForNode(unregisteredNode.Node) + nodeGroup, err := context.CloudProvider.NodeGroupForNode(unregisteredNode.Node) if err != nil { glog.Warningf("Failed to get node group for %s: %v", unregisteredNode.Node.Name, err) return removedAny, err @@ -248,15 +248,14 @@ func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNod // Sets the target size of node groups to the current number of nodes in them // if the difference was constant for a prolonged time. Returns true if managed // to fix something. -func fixNodeGroupSize(contetxt *AutoscalingContext, currentTime time.Time) (bool, error) { +func fixNodeGroupSize(context *AutoscalingContext, currentTime time.Time) (bool, error) { fixed := false - for _, nodeGroup := range contetxt.CloudProvider.NodeGroups() { - incorrectSize := contetxt.ClusterStateRegistry.GetIncorrectNodeGroupSize(nodeGroup.Id()) + for _, nodeGroup := range context.CloudProvider.NodeGroups() { + incorrectSize := context.ClusterStateRegistry.GetIncorrectNodeGroupSize(nodeGroup.Id()) if incorrectSize == nil { continue } - if incorrectSize.FirstObserved.Add(contetxt.UnregisteredNodeRemovalTime).Before(currentTime) { - + if incorrectSize.FirstObserved.Add(context.UnregisteredNodeRemovalTime).Before(currentTime) { delta := incorrectSize.CurrentSize - incorrectSize.ExpectedSize if delta < 0 { glog.V(0).Infof("Decreasing size of %s, expected=%d current=%d delta=%d", nodeGroup.Id(),