/* Copyright 2016 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package awsup import ( "fmt" "strings" "sync" "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/aws/request" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/autoscaling" "github.com/aws/aws-sdk-go/service/autoscaling/autoscalingiface" "github.com/aws/aws-sdk-go/service/cloudformation" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/ec2/ec2iface" "github.com/aws/aws-sdk-go/service/elb" "github.com/aws/aws-sdk-go/service/elb/elbiface" "github.com/aws/aws-sdk-go/service/elbv2" "github.com/aws/aws-sdk-go/service/elbv2/elbv2iface" "github.com/aws/aws-sdk-go/service/iam" "github.com/aws/aws-sdk-go/service/iam/iamiface" "github.com/aws/aws-sdk-go/service/route53" "github.com/aws/aws-sdk-go/service/route53/route53iface" "github.com/golang/glog" "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/kops/dnsprovider/pkg/dnsprovider" dnsproviderroute53 "k8s.io/kops/dnsprovider/pkg/dnsprovider/providers/aws/route53" "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/pkg/apis/kops/model" "k8s.io/kops/pkg/cloudinstances" "k8s.io/kops/pkg/featureflag" "k8s.io/kops/pkg/resources/spotinst" "k8s.io/kops/upup/pkg/fi" k8s_aws "k8s.io/kubernetes/pkg/cloudprovider/providers/aws" ) // By default, aws-sdk-go only retries 3 times, which doesn't give // much time for exponential backoff to work for serious issues. At 13 // retries, we'll try a given request for up to ~6m with exponential // backoff along the way. const ClientMaxRetries = 13 const DescribeTagsMaxAttempts = 120 const DescribeTagsRetryInterval = 2 * time.Second const DescribeTagsLogInterval = 10 // this is in "retry intervals" const CreateTagsMaxAttempts = 120 const CreateTagsRetryInterval = 2 * time.Second const CreateTagsLogInterval = 10 // this is in "retry intervals" const DeleteTagsMaxAttempts = 120 const DeleteTagsRetryInterval = 2 * time.Second const DeleteTagsLogInterval = 10 // this is in "retry intervals" const TagClusterName = "KubernetesCluster" const TagNameRolePrefix = "k8s.io/role/" const TagNameEtcdClusterPrefix = "k8s.io/etcd/" const TagRoleMaster = "master" // TagNameKopsRole is the AWS tag used to identify the role an object plays for a cluster const TagNameKopsRole = "kubernetes.io/kops/role" // TagNameClusterOwnershipPrefix is the AWS tag used for ownership const TagNameClusterOwnershipPrefix = "kubernetes.io/cluster/" const ( WellKnownAccountKopeio = "383156758163" WellKnownAccountRedhat = "309956199498" WellKnownAccountCoreOS = "595879546273" WellKnownAccountAmazonSystemLinux2 = "137112412989" WellKnownAccountUbuntu = "099720109477" ) type AWSCloud interface { fi.Cloud Region() string CloudFormation() *cloudformation.CloudFormation EC2() ec2iface.EC2API IAM() iamiface.IAMAPI ELB() elbiface.ELBAPI ELBV2() elbv2iface.ELBV2API Autoscaling() autoscalingiface.AutoScalingAPI Route53() route53iface.Route53API Spotinst() spotinst.Service // TODO: Document and rationalize these tags/filters methods AddTags(name *string, tags map[string]string) BuildFilters(name *string) []*ec2.Filter BuildTags(name *string) map[string]string Tags() map[string]string // GetTags will fetch the tags for the specified resource, retrying (up to MaxDescribeTagsAttempts) if it hits an eventual-consistency type error GetTags(resourceId string) (map[string]string, error) // CreateTags will add tags to the specified resource, retrying up to MaxCreateTagsAttempts times if it hits an eventual-consistency type error CreateTags(resourceId string, tags map[string]string) error AddAWSTags(id string, expected map[string]string) error GetELBTags(loadBalancerName string) (map[string]string, error) // CreateELBTags will add tags to the specified loadBalancer, retrying up to MaxCreateTagsAttempts times if it hits an eventual-consistency type error CreateELBTags(loadBalancerName string, tags map[string]string) error // DeleteTags will delete tags from the specified resource, retrying up to MaxCreateTagsAttempts times if it hits an eventual-consistency type error DeleteTags(id string, tags map[string]string) error // DescribeInstance is a helper that queries for the specified instance by id DescribeInstance(instanceID string) (*ec2.Instance, error) // DescribeVPC is a helper that queries for the specified vpc by id DescribeVPC(vpcID string) (*ec2.Vpc, error) DescribeAvailabilityZones() ([]*ec2.AvailabilityZone, error) // ResolveImage finds an AMI image based on the given name. // The name can be one of: // `ami-...` in which case it is presumed to be an id // owner/name in which case we find the image with the specified name, owned by owner // name in which case we find the image with the specified name, with the current owner ResolveImage(name string) (*ec2.Image, error) // WithTags created a copy of AWSCloud with the specified default-tags bound WithTags(tags map[string]string) AWSCloud // DefaultInstanceType determines a suitable instance type for the specified instance group DefaultInstanceType(cluster *kops.Cluster, ig *kops.InstanceGroup) (string, error) // FindClusterStatus gets the status of the cluster as it exists in AWS, inferred from volumes FindClusterStatus(cluster *kops.Cluster) (*kops.ClusterStatus, error) } type awsCloudImplementation struct { cf *cloudformation.CloudFormation ec2 *ec2.EC2 iam *iam.IAM elb *elb.ELB elbv2 *elbv2.ELBV2 autoscaling *autoscaling.AutoScaling route53 *route53.Route53 spotinst spotinst.Service region string tags map[string]string regionDelayers *RegionDelayers } type RegionDelayers struct { mutex sync.Mutex delayerMap map[string]*k8s_aws.CrossRequestRetryDelay } var _ fi.Cloud = &awsCloudImplementation{} func (c *awsCloudImplementation) ProviderID() kops.CloudProviderID { return kops.CloudProviderAWS } func (c *awsCloudImplementation) Region() string { return c.region } var awsCloudInstances map[string]AWSCloud = make(map[string]AWSCloud) func NewAWSCloud(region string, tags map[string]string) (AWSCloud, error) { raw := awsCloudInstances[region] if raw == nil { c := &awsCloudImplementation{ region: region, regionDelayers: &RegionDelayers{ delayerMap: make(map[string]*k8s_aws.CrossRequestRetryDelay), }, } config := aws.NewConfig().WithRegion(region) // This avoids a confusing error message when we fail to get credentials // e.g. https://github.com/kubernetes/kops/issues/605 config = config.WithCredentialsChainVerboseErrors(true) config = request.WithRetryer(config, newLoggingRetryer(ClientMaxRetries)) // We have the updated aws sdk from 1.9, but don't have https://github.com/kubernetes/kubernetes/pull/55307 // Set the SleepDelay function to work around this // TODO: Remove once we update to k8s >= 1.9 (or a version of the retry delayer than includes this) config.SleepDelay = func(d time.Duration) { glog.V(6).Infof("aws request sleeping for %v", d) time.Sleep(d) } requestLogger := newRequestLogger(2) sess, err := session.NewSession(config) if err != nil { return c, err } c.cf = cloudformation.New(sess, config) c.cf.Handlers.Send.PushFront(requestLogger) c.addHandlers(region, &c.cf.Handlers) sess, err = session.NewSession(config) if err != nil { return c, err } c.ec2 = ec2.New(sess, config) c.ec2.Handlers.Send.PushFront(requestLogger) c.addHandlers(region, &c.ec2.Handlers) sess, err = session.NewSession(config) if err != nil { return c, err } c.iam = iam.New(sess, config) c.iam.Handlers.Send.PushFront(requestLogger) c.addHandlers(region, &c.iam.Handlers) sess, err = session.NewSession(config) if err != nil { return c, err } c.elb = elb.New(sess, config) c.elb.Handlers.Send.PushFront(requestLogger) c.addHandlers(region, &c.elb.Handlers) sess, err = session.NewSession(config) if err != nil { return c, err } c.elbv2 = elbv2.New(sess, config) c.elbv2.Handlers.Send.PushFront(requestLogger) c.addHandlers(region, &c.elbv2.Handlers) sess, err = session.NewSession(config) if err != nil { return c, err } c.autoscaling = autoscaling.New(sess, config) c.autoscaling.Handlers.Send.PushFront(requestLogger) c.addHandlers(region, &c.autoscaling.Handlers) sess, err = session.NewSession(config) if err != nil { return c, err } c.route53 = route53.New(sess, config) c.route53.Handlers.Send.PushFront(requestLogger) c.addHandlers(region, &c.route53.Handlers) if featureflag.Spotinst.Enabled() { c.spotinst, err = spotinst.NewService(kops.CloudProviderAWS) if err != nil { return c, err } } awsCloudInstances[region] = c raw = c } i := raw.WithTags(tags) return i, nil } func (c *awsCloudImplementation) addHandlers(regionName string, h *request.Handlers) { delayer := c.getCrossRequestRetryDelay(regionName) if delayer != nil { h.Sign.PushFrontNamed(request.NamedHandler{ Name: "kops/delay-presign", Fn: delayer.BeforeSign, }) h.AfterRetry.PushFrontNamed(request.NamedHandler{ Name: "kops/delay-afterretry", Fn: delayer.AfterRetry, }) } } // Get a CrossRequestRetryDelay, scoped to the region, not to the request. // This means that when we hit a limit on a call, we will delay _all_ calls to the API. // We do this to protect the AWS account from becoming overloaded and effectively locked. // We also log when we hit request limits. // Note that this delays the current goroutine; this is bad behaviour and will // likely cause kops to become slow or unresponsive for cloud operations. // However, this throttle is intended only as a last resort. When we observe // this throttling, we need to address the root cause (e.g. add a delay to a // controller retry loop) func (c *awsCloudImplementation) getCrossRequestRetryDelay(regionName string) *k8s_aws.CrossRequestRetryDelay { c.regionDelayers.mutex.Lock() defer c.regionDelayers.mutex.Unlock() delayer, found := c.regionDelayers.delayerMap[regionName] if !found { delayer = k8s_aws.NewCrossRequestRetryDelay() c.regionDelayers.delayerMap[regionName] = delayer } return delayer } func NewEC2Filter(name string, values ...string) *ec2.Filter { awsValues := []*string{} for _, value := range values { awsValues = append(awsValues, aws.String(value)) } filter := &ec2.Filter{ Name: aws.String(name), Values: awsValues, } return filter } // DeleteGroup deletes an aws autoscaling group func (c *awsCloudImplementation) DeleteGroup(g *cloudinstances.CloudInstanceGroup) error { if c.spotinst != nil { return spotinst.DeleteGroup(c.spotinst, g) } return deleteGroup(c, g) } func deleteGroup(c AWSCloud, g *cloudinstances.CloudInstanceGroup) error { asg := g.Raw.(*autoscaling.Group) name := aws.StringValue(asg.AutoScalingGroupName) template := aws.StringValue(asg.LaunchConfigurationName) // Delete ASG { glog.V(2).Infof("Deleting autoscaling group %q", name) request := &autoscaling.DeleteAutoScalingGroupInput{ AutoScalingGroupName: aws.String(name), ForceDelete: aws.Bool(true), } _, err := c.Autoscaling().DeleteAutoScalingGroup(request) if err != nil { return fmt.Errorf("error deleting autoscaling group %q: %v", name, err) } } // Delete LaunchConfig { glog.V(2).Infof("Deleting autoscaling launch configuration %q", template) request := &autoscaling.DeleteLaunchConfigurationInput{ LaunchConfigurationName: aws.String(template), } _, err := c.Autoscaling().DeleteLaunchConfiguration(request) if err != nil { return fmt.Errorf("error deleting autoscaling launch configuration %q: %v", template, err) } } glog.V(8).Infof("deleted aws autoscaling group: %q", name) return nil } // DeleteInstance deletes an aws instance func (c *awsCloudImplementation) DeleteInstance(i *cloudinstances.CloudInstanceGroupMember) error { if c.spotinst != nil { return spotinst.DeleteInstance(c.spotinst, i) } return deleteInstance(c, i) } func deleteInstance(c AWSCloud, i *cloudinstances.CloudInstanceGroupMember) error { id := i.ID if id == "" { return fmt.Errorf("id was not set on CloudInstanceGroupMember: %v", i) } request := &autoscaling.TerminateInstanceInAutoScalingGroupInput{ InstanceId: aws.String(id), ShouldDecrementDesiredCapacity: aws.Bool(false), } if _, err := c.Autoscaling().TerminateInstanceInAutoScalingGroup(request); err != nil { return fmt.Errorf("error deleting instance %q: %v", id, err) } glog.V(8).Infof("deleted aws ec2 instance %q", id) return nil } // TODO not used yet, as this requires a major refactor of rolling-update code, slowly but surely // GetCloudGroups returns a groups of instances that back a kops instance groups func (c *awsCloudImplementation) GetCloudGroups(cluster *kops.Cluster, instancegroups []*kops.InstanceGroup, warnUnmatched bool, nodes []v1.Node) (map[string]*cloudinstances.CloudInstanceGroup, error) { if c.spotinst != nil { return spotinst.GetCloudGroups(c.spotinst, cluster, instancegroups, warnUnmatched, nodes) } return getCloudGroups(c, cluster, instancegroups, warnUnmatched, nodes) } func getCloudGroups(c AWSCloud, cluster *kops.Cluster, instancegroups []*kops.InstanceGroup, warnUnmatched bool, nodes []v1.Node) (map[string]*cloudinstances.CloudInstanceGroup, error) { nodeMap := cloudinstances.GetNodeMap(nodes, cluster) groups := make(map[string]*cloudinstances.CloudInstanceGroup) asgs, err := FindAutoscalingGroups(c, c.Tags()) if err != nil { return nil, fmt.Errorf("unable to find autoscale groups: %v", err) } for _, asg := range asgs { name := aws.StringValue(asg.AutoScalingGroupName) instancegroup, err := matchInstanceGroup(name, cluster.ObjectMeta.Name, instancegroups) if err != nil { return nil, fmt.Errorf("error getting instance group for ASG %q", name) } if instancegroup == nil { if warnUnmatched { glog.Warningf("Found ASG with no corresponding instance group %q", name) } continue } groups[instancegroup.ObjectMeta.Name], err = awsBuildCloudInstanceGroup(c, instancegroup, asg, nodeMap) if err != nil { return nil, fmt.Errorf("error getting cloud instance group %q: %v", instancegroup.ObjectMeta.Name, err) } } return groups, nil } // FindAutoscalingGroups finds autoscaling groups matching the specified tags // This isn't entirely trivial because autoscaling doesn't let us filter with as much precision as we would like func FindAutoscalingGroups(c AWSCloud, tags map[string]string) ([]*autoscaling.Group, error) { var asgs []*autoscaling.Group glog.V(2).Infof("Listing all Autoscaling groups matching cluster tags") var asgNames []*string { var asFilters []*autoscaling.Filter for _, v := range tags { // Not an exact match, but likely the best we can do asFilters = append(asFilters, &autoscaling.Filter{ Name: aws.String("value"), Values: []*string{aws.String(v)}, }) } request := &autoscaling.DescribeTagsInput{ Filters: asFilters, } err := c.Autoscaling().DescribeTagsPages(request, func(p *autoscaling.DescribeTagsOutput, lastPage bool) bool { for _, t := range p.Tags { switch *t.ResourceType { case "auto-scaling-group": asgNames = append(asgNames, t.ResourceId) default: glog.Warningf("Unknown resource type: %v", *t.ResourceType) } } return true }) if err != nil { return nil, fmt.Errorf("error listing autoscaling cluster tags: %v", err) } } if len(asgNames) != 0 { request := &autoscaling.DescribeAutoScalingGroupsInput{ AutoScalingGroupNames: asgNames, } err := c.Autoscaling().DescribeAutoScalingGroupsPages(request, func(p *autoscaling.DescribeAutoScalingGroupsOutput, lastPage bool) bool { for _, asg := range p.AutoScalingGroups { if !matchesAsgTags(tags, asg.Tags) { // We used an inexact filter above continue } // Check for "Delete in progress" (the only use of .Status) if asg.Status != nil { glog.Warningf("Skipping ASG %v (which matches tags): %v", *asg.AutoScalingGroupARN, *asg.Status) continue } asgs = append(asgs, asg) } return true }) if err != nil { return nil, fmt.Errorf("error listing autoscaling groups: %v", err) } } return asgs, nil } // matchesAsgTags is used to filter an asg by tags func matchesAsgTags(tags map[string]string, actual []*autoscaling.TagDescription) bool { for k, v := range tags { found := false for _, a := range actual { if aws.StringValue(a.Key) == k { if aws.StringValue(a.Value) == v { found = true break } } } if !found { return false } } return true } func awsBuildCloudInstanceGroup(c AWSCloud, ig *kops.InstanceGroup, g *autoscaling.Group, nodeMap map[string]*v1.Node) (*cloudinstances.CloudInstanceGroup, error) { newLaunchConfigName := aws.StringValue(g.LaunchConfigurationName) cg := &cloudinstances.CloudInstanceGroup{ HumanName: aws.StringValue(g.AutoScalingGroupName), InstanceGroup: ig, MinSize: int(aws.Int64Value(g.MinSize)), MaxSize: int(aws.Int64Value(g.MaxSize)), Raw: g, } for _, i := range g.Instances { instanceId := aws.StringValue(i.InstanceId) if instanceId == "" { glog.Warningf("ignoring instance with no instance id: %s", i) continue } err := cg.NewCloudInstanceGroupMember(instanceId, newLaunchConfigName, aws.StringValue(i.LaunchConfigurationName), nodeMap) if err != nil { return nil, fmt.Errorf("error creating cloud instance group member: %v", err) } } return cg, nil } func (c *awsCloudImplementation) Tags() map[string]string { // Defensive copy tags := make(map[string]string) for k, v := range c.tags { tags[k] = v } return tags } func (c *awsCloudImplementation) WithTags(tags map[string]string) AWSCloud { i := &awsCloudImplementation{} *i = *c i.tags = tags return i } var tagsEventualConsistencyErrors = map[string]bool{ "InvalidInstanceID.NotFound": true, "InvalidRouteTableID.NotFound": true, "InvalidVpcID.NotFound": true, "InvalidGroup.NotFound": true, "InvalidSubnetID.NotFound": true, "InvalidDhcpOptionsID.NotFound": true, "InvalidInternetGatewayID.NotFound": true, } // isTagsEventualConsistencyError checks if the error is one of the errors encountered when we try to create/get tags before the resource has fully 'propagated' in EC2 func isTagsEventualConsistencyError(err error) bool { if awsErr, ok := err.(awserr.Error); ok { isEventualConsistency, found := tagsEventualConsistencyErrors[awsErr.Code()] if found { return isEventualConsistency } glog.Warningf("Uncategorized error in isTagsEventualConsistencyError: %v", awsErr.Code()) } return false } // GetTags will fetch the tags for the specified resource, retrying (up to MaxDescribeTagsAttempts) if it hits an eventual-consistency type error func (c *awsCloudImplementation) GetTags(resourceID string) (map[string]string, error) { return getTags(c, resourceID) } func getTags(c AWSCloud, resourceId string) (map[string]string, error) { if resourceId == "" { return nil, fmt.Errorf("resourceId not provided to getTags") } tags := map[string]string{} request := &ec2.DescribeTagsInput{ Filters: []*ec2.Filter{ NewEC2Filter("resource-id", resourceId), }, } attempt := 0 for { attempt++ response, err := c.EC2().DescribeTags(request) if err != nil { if isTagsEventualConsistencyError(err) { if attempt > DescribeTagsMaxAttempts { return nil, fmt.Errorf("Got retryable error while getting tags on %q, but retried too many times without success: %v", resourceId, err) } if (attempt % DescribeTagsLogInterval) == 0 { glog.Infof("waiting for eventual consistency while describing tags on %q", resourceId) } glog.V(2).Infof("will retry after encountering error getting tags on %q: %v", resourceId, err) time.Sleep(DescribeTagsRetryInterval) continue } return nil, fmt.Errorf("error listing tags on %v: %v", resourceId, err) } for _, tag := range response.Tags { if tag == nil { glog.Warning("unexpected nil tag") continue } tags[aws.StringValue(tag.Key)] = aws.StringValue(tag.Value) } return tags, nil } } // CreateTags will add tags to the specified resource, retrying up to MaxCreateTagsAttempts times if it hits an eventual-consistency type error func (c *awsCloudImplementation) CreateTags(resourceId string, tags map[string]string) error { return createTags(c, resourceId, tags) } func createTags(c AWSCloud, resourceId string, tags map[string]string) error { if len(tags) == 0 { return nil } ec2Tags := []*ec2.Tag{} for k, v := range tags { ec2Tags = append(ec2Tags, &ec2.Tag{Key: aws.String(k), Value: aws.String(v)}) } attempt := 0 for { attempt++ request := &ec2.CreateTagsInput{ Tags: ec2Tags, Resources: []*string{&resourceId}, } _, err := c.EC2().CreateTags(request) if err != nil { if isTagsEventualConsistencyError(err) { if attempt > CreateTagsMaxAttempts { return fmt.Errorf("Got retryable error while creating tags on %q, but retried too many times without success: %v", resourceId, err) } if (attempt % CreateTagsLogInterval) == 0 { glog.Infof("waiting for eventual consistency while creating tags on %q", resourceId) } glog.V(2).Infof("will retry after encountering error creating tags on %q: %v", resourceId, err) time.Sleep(CreateTagsRetryInterval) continue } return fmt.Errorf("error creating tags on %v: %v", resourceId, err) } return nil } } // DeleteTags will remove tags from the specified resource, retrying up to MaxCreateTagsAttempts times if it hits an eventual-consistency type error func (c *awsCloudImplementation) DeleteTags(resourceId string, tags map[string]string) error { return deleteTags(c, resourceId, tags) } func deleteTags(c AWSCloud, resourceId string, tags map[string]string) error { if len(tags) == 0 { return nil } ec2Tags := []*ec2.Tag{} for k, v := range tags { ec2Tags = append(ec2Tags, &ec2.Tag{Key: aws.String(k), Value: aws.String(v)}) } attempt := 0 for { attempt++ request := &ec2.DeleteTagsInput{ Tags: ec2Tags, Resources: []*string{&resourceId}, } _, err := c.EC2().DeleteTags(request) if err != nil { if isTagsEventualConsistencyError(err) { if attempt > DeleteTagsMaxAttempts { return fmt.Errorf("Got retryable error while deleting tags on %q, but retried too many times without success: %v", resourceId, err) } if (attempt % DeleteTagsLogInterval) == 0 { glog.Infof("waiting for eventual consistency while deleting tags on %q", resourceId) } glog.V(2).Infof("will retry after encountering error deleting tags on %q: %v", resourceId, err) time.Sleep(DeleteTagsRetryInterval) continue } return fmt.Errorf("error deleting tags on %v: %v", resourceId, err) } return nil } } func (c *awsCloudImplementation) AddAWSTags(id string, expected map[string]string) error { return addAWSTags(c, id, expected) } func addAWSTags(c AWSCloud, id string, expected map[string]string) error { actual, err := c.GetTags(id) if err != nil { return fmt.Errorf("unexpected error fetching tags for resource: %v", err) } missing := map[string]string{} for k, v := range expected { actualValue, found := actual[k] if found && actualValue == v { continue } missing[k] = v } if len(missing) != 0 { glog.V(4).Infof("adding tags to %q: %v", id, missing) err := c.CreateTags(id, missing) if err != nil { return fmt.Errorf("error adding tags to resource %q: %v", id, err) } } return nil } func (c *awsCloudImplementation) GetELBTags(loadBalancerName string) (map[string]string, error) { return getELBTags(c, loadBalancerName) } func getELBTags(c AWSCloud, loadBalancerName string) (map[string]string, error) { tags := map[string]string{} request := &elb.DescribeTagsInput{ LoadBalancerNames: []*string{&loadBalancerName}, } attempt := 0 for { attempt++ response, err := c.ELB().DescribeTags(request) if err != nil { return nil, fmt.Errorf("error listing tags on %v: %v", loadBalancerName, err) } for _, tagset := range response.TagDescriptions { for _, tag := range tagset.Tags { tags[aws.StringValue(tag.Key)] = aws.StringValue(tag.Value) } } return tags, nil } } // CreateELBTags will add tags to the specified loadBalancer, retrying up to MaxCreateTagsAttempts times if it hits an eventual-consistency type error func (c *awsCloudImplementation) CreateELBTags(loadBalancerName string, tags map[string]string) error { return createELBTags(c, loadBalancerName, tags) } func createELBTags(c AWSCloud, loadBalancerName string, tags map[string]string) error { if len(tags) == 0 { return nil } elbTags := []*elb.Tag{} for k, v := range tags { elbTags = append(elbTags, &elb.Tag{Key: aws.String(k), Value: aws.String(v)}) } attempt := 0 for { attempt++ request := &elb.AddTagsInput{ Tags: elbTags, LoadBalancerNames: []*string{&loadBalancerName}, } _, err := c.ELB().AddTags(request) if err != nil { return fmt.Errorf("error creating tags on %v: %v", loadBalancerName, err) } return nil } } func (c *awsCloudImplementation) GetELBV2Tags(ResourceArn string) (map[string]string, error) { return getELBV2Tags(c, ResourceArn) } func getELBV2Tags(c AWSCloud, ResourceArn string) (map[string]string, error) { tags := map[string]string{} request := &elbv2.DescribeTagsInput{ ResourceArns: []*string{&ResourceArn}, } attempt := 0 for { attempt++ response, err := c.ELBV2().DescribeTags(request) if err != nil { return nil, fmt.Errorf("error listing tags on %v: %v", ResourceArn, err) } for _, tagset := range response.TagDescriptions { for _, tag := range tagset.Tags { tags[aws.StringValue(tag.Key)] = aws.StringValue(tag.Value) } } return tags, nil } } func (c *awsCloudImplementation) CreateELBV2Tags(ResourceArn string, tags map[string]string) error { return createELBV2Tags(c, ResourceArn, tags) } func createELBV2Tags(c AWSCloud, ResourceArn string, tags map[string]string) error { if len(tags) == 0 { return nil } elbv2Tags := []*elbv2.Tag{} for k, v := range tags { elbv2Tags = append(elbv2Tags, &elbv2.Tag{Key: aws.String(k), Value: aws.String(v)}) } attempt := 0 for { attempt++ request := &elbv2.AddTagsInput{ Tags: elbv2Tags, ResourceArns: []*string{&ResourceArn}, } _, err := c.ELBV2().AddTags(request) if err != nil { return fmt.Errorf("error creating tags on %v: %v", ResourceArn, err) } return nil } } func (c *awsCloudImplementation) BuildTags(name *string) map[string]string { return buildTags(c.tags, name) } func buildTags(commonTags map[string]string, name *string) map[string]string { tags := make(map[string]string) if name != nil { tags["Name"] = *name } else { glog.Warningf("Name not set when filtering by name") } for k, v := range commonTags { tags[k] = v } return tags } func (c *awsCloudImplementation) AddTags(name *string, tags map[string]string) { if name != nil { tags["Name"] = *name } for k, v := range c.tags { tags[k] = v } } func (c *awsCloudImplementation) BuildFilters(name *string) []*ec2.Filter { return buildFilters(c.tags, name) } func buildFilters(commonTags map[string]string, name *string) []*ec2.Filter { filters := []*ec2.Filter{} merged := make(map[string]string) if name != nil { merged["Name"] = *name } else { glog.Warningf("Name not set when filtering by name") } for k, v := range commonTags { merged[k] = v } for k, v := range merged { filter := NewEC2Filter("tag:"+k, v) filters = append(filters, filter) } return filters } // DescribeInstance is a helper that queries for the specified instance by id func (c *awsCloudImplementation) DescribeInstance(instanceID string) (*ec2.Instance, error) { glog.V(2).Infof("Calling DescribeInstances for instance %q", instanceID) request := &ec2.DescribeInstancesInput{ InstanceIds: []*string{&instanceID}, } response, err := c.EC2().DescribeInstances(request) if err != nil { return nil, fmt.Errorf("error listing Instances: %v", err) } if response == nil || len(response.Reservations) == 0 { return nil, nil } if len(response.Reservations) != 1 { glog.Fatalf("found multiple Reservations for %q", instanceID) } reservation := response.Reservations[0] if len(reservation.Instances) == 0 { return nil, nil } if len(reservation.Instances) != 1 { return nil, fmt.Errorf("found multiple Instances for %q", instanceID) } instance := reservation.Instances[0] return instance, nil } // DescribeVPC is a helper that queries for the specified vpc by id func (c *awsCloudImplementation) DescribeVPC(vpcID string) (*ec2.Vpc, error) { return describeVPC(c, vpcID) } func describeVPC(c AWSCloud, vpcID string) (*ec2.Vpc, error) { glog.V(2).Infof("Calling DescribeVPC for VPC %q", vpcID) request := &ec2.DescribeVpcsInput{ VpcIds: []*string{&vpcID}, } response, err := c.EC2().DescribeVpcs(request) if err != nil { return nil, fmt.Errorf("error listing VPCs: %v", err) } if response == nil || len(response.Vpcs) == 0 { return nil, nil } if len(response.Vpcs) != 1 { return nil, fmt.Errorf("found multiple VPCs for %q", vpcID) } vpc := response.Vpcs[0] return vpc, nil } // ResolveImage finds an AMI image based on the given name. // The name can be one of: // `ami-...` in which case it is presumed to be an id // owner/name in which case we find the image with the specified name, owned by owner // name in which case we find the image with the specified name, with the current owner func (c *awsCloudImplementation) ResolveImage(name string) (*ec2.Image, error) { return resolveImage(c.ec2, name) } func resolveImage(ec2Client ec2iface.EC2API, name string) (*ec2.Image, error) { // TODO: Cache this result during a single execution (we get called multiple times) glog.V(2).Infof("Calling DescribeImages to resolve name %q", name) request := &ec2.DescribeImagesInput{} if strings.HasPrefix(name, "ami-") { // ami-xxxxxxxx request.ImageIds = []*string{&name} } else { // Either or / tokens := strings.SplitN(name, "/", 2) if len(tokens) == 1 { // self is a well-known value in the DescribeImages call request.Owners = aws.StringSlice([]string{"self"}) request.Filters = append(request.Filters, NewEC2Filter("name", name)) } else if len(tokens) == 2 { owner := tokens[0] // Check for well known owner aliases switch owner { case "kope.io": owner = WellKnownAccountKopeio case "coreos.com": owner = WellKnownAccountCoreOS case "redhat.com": owner = WellKnownAccountRedhat case "amazon.com": owner = WellKnownAccountAmazonSystemLinux2 } request.Owners = []*string{&owner} request.Filters = append(request.Filters, NewEC2Filter("name", tokens[1])) } else { return nil, fmt.Errorf("image name specification not recognized: %q", name) } } response, err := ec2Client.DescribeImages(request) if err != nil { return nil, fmt.Errorf("error listing images: %v", err) } if response == nil || len(response.Images) == 0 { return nil, fmt.Errorf("could not find Image for %q", name) } image := response.Images[0] for _, v := range response.Images { itime, _ := time.Parse(time.RFC3339, *image.CreationDate) vtime, _ := time.Parse(time.RFC3339, *v.CreationDate) if vtime.After(itime) { image = v } } glog.V(4).Infof("Resolved image %q", aws.StringValue(image.ImageId)) return image, nil } func (c *awsCloudImplementation) DescribeAvailabilityZones() ([]*ec2.AvailabilityZone, error) { glog.V(2).Infof("Querying EC2 for all valid zones in region %q", c.region) request := &ec2.DescribeAvailabilityZonesInput{} response, err := c.EC2().DescribeAvailabilityZones(request) if err != nil { return nil, fmt.Errorf("error querying for valid AZs in %q - verify your AWS credentials. Error: %v", c.region, err) } return response.AvailabilityZones, nil } // ValidateZones checks that every zone in the sliced passed is recognized func ValidateZones(zones []string, cloud AWSCloud) error { azs, err := cloud.DescribeAvailabilityZones() if err != nil { return err } zoneMap := make(map[string]*ec2.AvailabilityZone) for _, z := range azs { name := aws.StringValue(z.ZoneName) zoneMap[name] = z } for _, zone := range zones { z := zoneMap[zone] if z == nil { var knownZones []string for z := range zoneMap { knownZones = append(knownZones, z) } glog.Infof("Known zones: %q", strings.Join(knownZones, ",")) return fmt.Errorf("Zone is not a recognized AZ: %q (check you have specified a valid zone?)", zone) } for _, message := range z.Messages { glog.Warningf("Zone %q has message: %q", zone, aws.StringValue(message.Message)) } if aws.StringValue(z.State) != "available" { glog.Warningf("Zone %q has state %q", zone, aws.StringValue(z.State)) } } return nil } func (c *awsCloudImplementation) DNS() (dnsprovider.Interface, error) { provider, err := dnsprovider.GetDnsProvider(dnsproviderroute53.ProviderName, nil) if err != nil { return nil, fmt.Errorf("Error building (k8s) DNS provider: %v", err) } return provider, nil } func (c *awsCloudImplementation) CloudFormation() *cloudformation.CloudFormation { return c.cf } func (c *awsCloudImplementation) EC2() ec2iface.EC2API { return c.ec2 } func (c *awsCloudImplementation) IAM() iamiface.IAMAPI { return c.iam } func (c *awsCloudImplementation) ELB() elbiface.ELBAPI { return c.elb } func (c *awsCloudImplementation) ELBV2() elbv2iface.ELBV2API { return c.elbv2 } func (c *awsCloudImplementation) Autoscaling() autoscalingiface.AutoScalingAPI { return c.autoscaling } func (c *awsCloudImplementation) Route53() route53iface.Route53API { return c.route53 } func (c *awsCloudImplementation) Spotinst() spotinst.Service { return c.spotinst } func (c *awsCloudImplementation) FindVPCInfo(vpcID string) (*fi.VPCInfo, error) { return findVPCInfo(c, vpcID) } func findVPCInfo(c AWSCloud, vpcID string) (*fi.VPCInfo, error) { vpc, err := c.DescribeVPC(vpcID) if err != nil { return nil, err } if vpc == nil { return nil, nil } vpcInfo := &fi.VPCInfo{ CIDR: aws.StringValue(vpc.CidrBlock), } // Find subnets in the VPC { glog.V(2).Infof("Calling DescribeSubnets for subnets in VPC %q", vpcID) request := &ec2.DescribeSubnetsInput{ Filters: []*ec2.Filter{NewEC2Filter("vpc-id", vpcID)}, } response, err := c.EC2().DescribeSubnets(request) if err != nil { return nil, fmt.Errorf("error listing subnets in VPC %q: %v", vpcID, err) } if response != nil { for _, subnet := range response.Subnets { subnetInfo := &fi.SubnetInfo{ ID: aws.StringValue(subnet.SubnetId), CIDR: aws.StringValue(subnet.CidrBlock), Zone: aws.StringValue(subnet.AvailabilityZone), } vpcInfo.Subnets = append(vpcInfo.Subnets, subnetInfo) } } } return vpcInfo, nil } // DefaultInstanceType determines an instance type for the specified cluster & instance group func (c *awsCloudImplementation) DefaultInstanceType(cluster *kops.Cluster, ig *kops.InstanceGroup) (string, error) { var candidates []string switch ig.Spec.Role { case kops.InstanceGroupRoleMaster: // Some regions do not (currently) support the m3 family; the c4 large is the cheapest non-burstable instance // (us-east-2, ca-central-1, eu-west-2, ap-northeast-2). // Also some accounts are no longer supporting m3 in us-east-1 zones candidates = []string{"m3.medium", "c4.large"} case kops.InstanceGroupRoleNode: candidates = []string{"t2.medium"} case kops.InstanceGroupRoleBastion: candidates = []string{"t2.micro"} default: return "", fmt.Errorf("unhandled role %q", ig.Spec.Role) } // Find the AZs the InstanceGroup targets igZones, err := model.FindZonesForInstanceGroup(cluster, ig) if err != nil { return "", err } igZonesSet := sets.NewString(igZones...) // TODO: Validate that instance type exists in all AZs, but skip AZs that don't support any VPC stuff for _, instanceType := range candidates { zones, err := c.zonesWithInstanceType(instanceType) if err != nil { return "", err } if zones.IsSuperset(igZonesSet) { return instanceType, nil } else { glog.V(2).Infof("can't use instance type %q, available in zones %v but need %v", instanceType, zones, igZones) } } return "", fmt.Errorf("could not find a suitable supported instance type for the instance group %q (type %q) in region %q", ig.Name, ig.Spec.Role, c.region) } // supportsInstanceType uses the DescribeReservedInstancesOfferings API call to determine if an instance type is supported in a region func (c *awsCloudImplementation) zonesWithInstanceType(instanceType string) (sets.String, error) { glog.V(4).Infof("checking if instance type %q is supported in region %q", instanceType, c.region) request := &ec2.DescribeReservedInstancesOfferingsInput{} request.InstanceTenancy = aws.String("default") request.IncludeMarketplace = aws.Bool(false) request.OfferingClass = aws.String("standard") request.OfferingType = aws.String("No Upfront") request.ProductDescription = aws.String("Linux/UNIX (Amazon VPC)") request.InstanceType = aws.String(instanceType) zones := sets.NewString() response, err := c.ec2.DescribeReservedInstancesOfferings(request) if err != nil { return zones, fmt.Errorf("error checking if instance type %q is supported in region %q: %v", instanceType, c.region, err) } for _, item := range response.ReservedInstancesOfferings { if aws.StringValue(item.InstanceType) == instanceType { zones.Insert(aws.StringValue(item.AvailabilityZone)) } else { glog.Warningf("skipping non-matching instance type offering: %v", item) } } return zones, nil }