Merge branch 'master' into enable-unready

This commit is contained in:
Marcin Wielgus 2017-01-18 15:10:50 +01:00 committed by GitHub
commit 1d19914f93
3 changed files with 76 additions and 19 deletions

View File

@ -1,5 +1,5 @@
# Cluster Autoscaler on AWS
The cluster autoscaler on AWS scales worker nodes within an autoscaling group. It will run as a `Deployment` in your cluster. This README will go over some of the necessary steps required to get the cluster autoscaler up and running.
The cluster autoscaler on AWS scales worker nodes within any specified autoscaling group. It will run as a `Deployment` in your cluster. This README will go over some of the necessary steps required to get the cluster autoscaler up and running.
## Kubernetes Version
Cluster autoscaler must run on v1.3.0 or greater.
@ -26,13 +26,15 @@ The worker running the cluster autoscaler will need access to certain resources
Unfortunately AWS does not support ARNs for autoscaling groups yet so you must use "*" as the resource. More information [here](http://docs.aws.amazon.com/autoscaling/latest/userguide/IAM.html#UsingWithAutoScaling_Actions).
## Deployment Specification
Your deployment configuration should look something like this:
### 1 ASG Setup (min: 1, max: 10, ASG Name: k8s-worker-asg-1)
```yaml
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
app: cluster-autoscaler
spec:
@ -46,7 +48,7 @@ spec:
app: cluster-autoscaler
spec:
containers:
- image: {{ YOUR IMAGE HERE }}
- image: gcr.io/google_containers/cluster-autoscaler:v0.4.0
name: cluster-autoscaler
resources:
limits:
@ -60,7 +62,7 @@ spec:
- --v=4
- --cloud-provider=aws
- --skip-nodes-with-local-storage=false
- --nodes={{ ASG MIN e.g. 1 }}:{{ASG MAX e.g. 5}}:{{ASG NAME e.g. k8s-worker-asg}}
- --nodes=1:10:k8s-worker-asg-1
env:
- name: AWS_REGION
value: us-east-1
@ -74,6 +76,62 @@ spec:
hostPath:
path: "/etc/ssl/certs/ca-certificates.crt"
```
Note:
### Multiple ASG Setup
```yaml
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
app: cluster-autoscaler
spec:
replicas: 1
selector:
matchLabels:
app: cluster-autoscaler
template:
metadata:
labels:
app: cluster-autoscaler
spec:
containers:
- image: gcr.io/google_containers/cluster-autoscaler:v0.4.0
name: cluster-autoscaler
resources:
limits:
cpu: 100m
memory: 300Mi
requests:
cpu: 100m
memory: 300Mi
command:
- ./cluster-autoscaler
- --v=4
- --cloud-provider=aws
- --skip-nodes-with-local-storage=false
- --expander=least-waste
- --nodes=1:10:k8s-worker-asg-1
- --nodes=1:3:k8s-worker-asg-2
env:
- name: AWS_REGION
value: us-east-1
volumeMounts:
- name: ssl-certs
mountPath: /etc/ssl/certs/ca-certificates.crt
readOnly: true
imagePullPolicy: "Always"
volumes:
- name: ssl-certs
hostPath:
path: "/etc/ssl/certs/ca-certificates.crt"
```
Common Notes and Gotchas:
- The `/etc/ssl/certs/ca-certificates.crt` should exist by default on your ec2 instance.
- The autoscaling group should span 1 availability zone for the cluster autoscaler to work. If you want to distribute workloads evenly across zones, set up multiple ASGs, with a cluster autoscaler for each ASG. At the time of writing this, cluster autoscaler is unaware of availability zones and although autoscaling groups can contain instances in multiple availability zones when configured so, the cluster autoscaler can't reliably add nodes to desired zones. That's because AWS AutoScaling determines which zone to add nodes which is out of the control of the cluster autoscaler. For more information, see https://github.com/kubernetes/contrib/pull/1552#discussion_r75533090.
- Cluster autoscaler is not zone aware (for now), so if you wish to span multiple availability zones in your autoscaling groups beware that cluster autoscaler will not evenly distribute them. For more information, see https://github.com/kubernetes/contrib/pull/1552#discussion_r75533090.
- By default, cluster autoscaler will not terminate nodes running pods in the kube-system namespace. You can override this default behaviour by passing in the `--skip-nodes-with-system-pods=false` flag.
- By default, cluster autoscaler will wait 10 minutes between scale down operations, you can adjust this using the `--scale-down-delay` flag. E.g. `--scale-down-delay=5m` to decrease the scale down delay to 5 minutes.
- If you're running multiple ASGs, the `--expander` flag supports three options: `random`, `most-pods` and `least-waste`. `random` will expand a random ASG on scale up. `most-pods` will scale up the ASG that will scheduable the most amount of pods. `least-waste` will expand the ASG that will waste the least amount of CPU/MEM resources. In the event of a tie, cluster autoscaler will fall back to `random`.

View File

@ -80,7 +80,7 @@ type IncorrectNodeGroupSize struct {
ExpectedSize int
// CurrentSize is the size of the node group measured on the kubernetes side.
CurrentSize int
// FirstObserved is the time whtn the given difference occurred.
// FirstObserved is the time when the given difference occurred.
FirstObserved time.Time
}
@ -89,8 +89,8 @@ type IncorrectNodeGroupSize struct {
type UnregisteredNode struct {
// Node is a dummy node that contains only the name of the node.
Node *apiv1.Node
// UnregisteredSice is the time when the node was first spotted.
UnregisteredSice time.Time
// UnregisteredSince is the time when the node was first spotted.
UnregisteredSince time.Time
}
// ClusterStateRegistry is a structure to keep track the current state of the cluster.
@ -254,7 +254,7 @@ type AcceptableRange struct {
CurrentTarget int
}
// calculateAcceptableRanges calcualtes how many nodes can be in a cluster.
// updateAcceptableRanges updates cluster state registry with how many nodes can be in a cluster.
// The function assumes that the nodeGroup.TargetSize() is the desired number of nodes.
// So if there has been a recent scale up of size 5 then there should be between targetSize-5 and targetSize
// nodes in ready state. In the same way, if there have been 3 nodes removed recently then
@ -464,7 +464,7 @@ func getNotRegisteredNodes(allNodes []*apiv1.Node, cloudProvider cloudprovider.C
ProviderID: node,
},
},
UnregisteredSice: time,
UnregisteredSince: time,
})
}
}

View File

@ -194,13 +194,13 @@ func GetNodeInfosForGroups(nodes []*apiv1.Node, cloudProvider cloudprovider.Clou
}
// Removes unregisterd nodes if needed. Returns true if anything was removed and error if such occurred.
func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNode, contetxt *AutoscalingContext,
func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNode, context *AutoscalingContext,
currentTime time.Time) (bool, error) {
removedAny := false
for _, unregisteredNode := range unregisteredNodes {
if unregisteredNode.UnregisteredSice.Add(contetxt.UnregisteredNodeRemovalTime).Before(currentTime) {
if unregisteredNode.UnregisteredSince.Add(context.UnregisteredNodeRemovalTime).Before(currentTime) {
glog.V(0).Infof("Removing unregistered node %v", unregisteredNode.Node.Name)
nodeGroup, err := contetxt.CloudProvider.NodeGroupForNode(unregisteredNode.Node)
nodeGroup, err := context.CloudProvider.NodeGroupForNode(unregisteredNode.Node)
if err != nil {
glog.Warningf("Failed to get node group for %s: %v", unregisteredNode.Node.Name, err)
return removedAny, err
@ -219,15 +219,14 @@ func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNod
// Sets the target size of node groups to the current number of nodes in them
// if the difference was constant for a prolonged time. Returns true if managed
// to fix something.
func fixNodeGroupSize(contetxt *AutoscalingContext, currentTime time.Time) (bool, error) {
func fixNodeGroupSize(context *AutoscalingContext, currentTime time.Time) (bool, error) {
fixed := false
for _, nodeGroup := range contetxt.CloudProvider.NodeGroups() {
incorrectSize := contetxt.ClusterStateRegistry.GetIncorrectNodeGroupSize(nodeGroup.Id())
for _, nodeGroup := range context.CloudProvider.NodeGroups() {
incorrectSize := context.ClusterStateRegistry.GetIncorrectNodeGroupSize(nodeGroup.Id())
if incorrectSize == nil {
continue
}
if incorrectSize.FirstObserved.Add(contetxt.UnregisteredNodeRemovalTime).Before(currentTime) {
if incorrectSize.FirstObserved.Add(context.UnregisteredNodeRemovalTime).Before(currentTime) {
delta := incorrectSize.CurrentSize - incorrectSize.ExpectedSize
if delta < 0 {
glog.V(0).Infof("Decreasing size of %s, expected=%d current=%d delta=%d", nodeGroup.Id(),