package kutil import ( "encoding/base64" "fmt" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/autoscaling" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/elb" "github.com/golang/glog" "k8s.io/kube-deploy/upup/pkg/fi" "k8s.io/kube-deploy/upup/pkg/fi/cloudup/awsup" "strings" "sync" "time" ) // DeleteCluster implements deletion of cluster cloud resources // The algorithm is pretty simple: it discovers all the resources it can (primary using tags), // and then it repeatedly attempts to delete them all until they are all deleted. // There are a few tweaks to that approach, like choosing a default ordering, but it is not much // smarter. Cluster deletion is a fairly rare operation anyway, and also some dependencies are invisible // (e.g. ELB dependencies). type DeleteCluster struct { ClusterID string Region string Cloud fi.Cloud } // HasStatus is implemented by resources where we want to hint the dependencies // (ideally we would implement for everything, but realistically there are only a few where it is worthwhile) type HasStatus interface { Status(cloud fi.Cloud) (exists bool, blocks []string, err error) } func (c *DeleteCluster) ListResources() (map[string]DeletableResource, error) { cloud := c.Cloud.(*awsup.AWSCloud) resources := make(map[string]DeletableResource) filters := cloud.BuildFilters(nil) tags := cloud.BuildTags(nil, nil) { glog.V(2).Infof("Listing all Autoscaling groups matching cluster tags") var asgNames []*string { var asFilters []*autoscaling.Filter for _, f := range filters { asFilters = append(asFilters, &autoscaling.Filter{ Name: aws.String("value"), Values: f.Values, }) } request := &autoscaling.DescribeTagsInput{ Filters: asFilters, } response, err := cloud.Autoscaling.DescribeTags(request) if err != nil { return nil, fmt.Errorf("error listing autoscaling cluster tags: %v", err) } for _, t := range response.Tags { switch *t.ResourceType { case "auto-scaling-group": asgNames = append(asgNames, t.ResourceId) default: glog.Warningf("Unknown resource type: %v", *t.ResourceType) } } } if len(asgNames) != 0 { request := &autoscaling.DescribeAutoScalingGroupsInput{ AutoScalingGroupNames: asgNames, } response, err := cloud.Autoscaling.DescribeAutoScalingGroups(request) if err != nil { return nil, fmt.Errorf("error listing autoscaling groups: %v", err) } for _, t := range response.AutoScalingGroups { if !matchesAsgTags(tags, t.Tags) { continue } r := &DeletableASG{Name: *t.AutoScalingGroupName} resources["autoscaling-group:"+r.Name] = r } } } { glog.V(2).Infof("Listing all Autoscaling LaunchConfigurations") request := &autoscaling.DescribeLaunchConfigurationsInput{} response, err := cloud.Autoscaling.DescribeLaunchConfigurations(request) if err != nil { return nil, fmt.Errorf("error listing autoscaling LaunchConfigurations: %v", err) } for _, t := range response.LaunchConfigurations { if t.UserData == nil { continue } userData, err := base64.StdEncoding.DecodeString(*t.UserData) if err != nil { glog.Infof("Ignoring autoscaling LaunchConfiguration with invalid UserData: %v", *t.LaunchConfigurationName) continue } if strings.Contains(string(userData), "\nINSTANCE_PREFIX: "+c.ClusterID+"\n") { r := &DeletableAutoscalingLaunchConfiguration{Name: *t.LaunchConfigurationName} resources["autoscaling-launchconfiguration:"+r.Name] = r } } } { glog.V(2).Infof("Listing all ELB tags") request := &elb.DescribeLoadBalancersInput{} response, err := cloud.ELB.DescribeLoadBalancers(request) if err != nil { return nil, fmt.Errorf("error listing elb LoadBalancers: %v", err) } for _, lb := range response.LoadBalancerDescriptions { // TODO: batch? request := &elb.DescribeTagsInput{ LoadBalancerNames: []*string{lb.LoadBalancerName}, } response, err := cloud.ELB.DescribeTags(request) if err != nil { return nil, fmt.Errorf("error listing elb Tags: %v", err) } for _, t := range response.TagDescriptions { if !matchesElbTags(tags, t.Tags) { continue } r := &DeletableELBLoadBalancer{Name: *t.LoadBalancerName} resources["elb:"+r.Name] = r } } } { glog.V(2).Infof("Listing all EC2 tags matching cluster tags") request := &ec2.DescribeTagsInput{ Filters: filters, } response, err := cloud.EC2.DescribeTags(request) if err != nil { return nil, fmt.Errorf("error listing cluster tags: %v", err) } for _, t := range response.Tags { var resource DeletableResource switch *t.ResourceType { case "dhcp-options": resource = &DeletableDHCPOptions{ID: *t.ResourceId} case "instance": resource = &DeletableInstance{ID: *t.ResourceId} case "volume": resource = &DeletableVolume{ID: *t.ResourceId} case "subnet": resource = &DeletableSubnet{ID: *t.ResourceId} case "security-group": resource = &DeletableSecurityGroup{ID: *t.ResourceId} case "internet-gateway": resource = &DeletableInternetGateway{ID: *t.ResourceId} case "route-table": resource = &DeletableRouteTable{ID: *t.ResourceId} case "vpc": resource = &DeletableVPC{ID: *t.ResourceId} } if resource == nil { glog.Warningf("Unknown resource type: %v", *t.ResourceType) continue } resources[*t.ResourceType+":"+*t.ResourceId] = resource } } return resources, nil } func (c *DeleteCluster) DeleteResources(resources map[string]DeletableResource) error { depMap := make(map[string][]string) done := make(map[string]DeletableResource) var mutex sync.Mutex // Initial pass to check that resources actually exist for k, r := range resources { hs, ok := r.(HasStatus) if !ok { continue } fmt.Printf("Checking status of resource %s: ", k) exists, blocks, err := hs.Status(c.Cloud) if err != nil { fmt.Printf("error (ignoring): %v\n", err) } else if exists { fmt.Printf("exists (gathered dependencies)\n") } else { fmt.Printf("already removed\n") done[k] = r } for _, block := range blocks { depMap[block] = append(depMap[block], k) } } glog.Infof("Dependencies") for k, v := range depMap { glog.Infof("\t%s\t%v", k, v) } for { // TODO: Some form of default ordering based on types? // TODO: Give up eventually? failed := make(map[string]DeletableResource) for { phase := make(map[string]DeletableResource) for k, r := range resources { if _, d := done[k]; d { continue } if _, d := failed[k]; d { // Only attempt each resource once per pass continue } ready := true for _, dep := range depMap[k] { if _, d := done[dep]; !d { glog.V(4).Infof("dependency %q of %q not deleted; skipping") ready = false } } if !ready { continue } phase[k] = r } if len(phase) == 0 { break } var wg sync.WaitGroup for k, r := range phase { wg.Add(1) go func(k string, r DeletableResource) { mutex.Lock() failed[k] = r mutex.Unlock() defer wg.Done() glog.V(4).Infof("Deleting resource %s: ", k) err := r.Delete(c.Cloud) if err != nil { mutex.Lock() if IsDependencyViolation(err) { fmt.Printf("%s\tstill has dependencies, will retry\n", k) glog.V(4).Infof("API call made when had dependency %s", k) } else { fmt.Printf("%s\terror deleting resource, will retry: %v\n", k, err) } failed[k] = r mutex.Unlock() } else { mutex.Lock() fmt.Printf("%s\tok\n", k) delete(failed, k) done[k] = r mutex.Unlock() } }(k, r) } wg.Wait() } if len(resources) == len(done) { return nil } fmt.Printf("Not all resources deleted; waiting before reattempting deletion\n") for k := range resources { if _, d := done[k]; d { continue } fmt.Printf("\t%s\n", k) } time.Sleep(10 * time.Second) } } func matchesAsgTags(tags map[string]string, actual []*autoscaling.TagDescription) bool { for k, v := range tags { found := false for _, a := range actual { if aws.StringValue(a.Key) == k { if aws.StringValue(a.Value) == v { found = true break } } } if !found { return false } } return true } func matchesElbTags(tags map[string]string, actual []*elb.Tag) bool { for k, v := range tags { found := false for _, a := range actual { if aws.StringValue(a.Key) == k { if aws.StringValue(a.Value) == v { found = true break } } } if !found { return false } } return true } type DeletableResource interface { Delete(cloud fi.Cloud) error } type DeletableInstance struct { ID string } func (r *DeletableInstance) Delete(cloud fi.Cloud) error { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Deleting EC2 instance %q", r.ID) request := &ec2.TerminateInstancesInput{ InstanceIds: []*string{&r.ID}, } _, err := c.EC2.TerminateInstances(request) if err != nil { return fmt.Errorf("error deleting instance %q: %v", r.ID, err) } return nil } func (r *DeletableInstance) Status(cloud fi.Cloud) (bool, []string, error) { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Querying EC2 instance %q", r.ID) request := &ec2.DescribeInstancesInput{ InstanceIds: []*string{&r.ID}, } response, err := c.EC2.DescribeInstances(request) if err != nil { return false, nil, fmt.Errorf("error describing instance %q: %v", r.ID, err) } var found []*ec2.Instance for _, reservation := range response.Reservations { for _, instance := range reservation.Instances { if aws.StringValue(instance.InstanceId) == r.ID { found = append(found, instance) } } } if len(found) == 0 { return false, nil, nil } if len(found) != 1 { return false, nil, fmt.Errorf("found multiple instances with id: %q", r.ID) } i := found[0] if i.State != nil { stateName := aws.StringValue(i.State.Name) switch stateName { case "terminated": return false, nil, nil case "running": // Fine glog.V(4).Infof("instance %q has state=%q", r.ID, stateName) default: glog.Infof("unknown instance state for %q: %q", r.ID, stateName) } } var blocks []string for _, volume := range i.BlockDeviceMappings { if volume.Ebs == nil { continue } blocks = append(blocks, "volume:"+aws.StringValue(volume.Ebs.VolumeId)) } for _, sg := range i.SecurityGroups { blocks = append(blocks, "security-group:"+aws.StringValue(sg.GroupId)) } blocks = append(blocks, "subnet:"+aws.StringValue(i.SubnetId)) blocks = append(blocks, "vpc:"+aws.StringValue(i.VpcId)) return true, blocks, nil } func (r *DeletableInstance) String() string { return "Instance:" + r.ID } type DeletableSecurityGroup struct { ID string } func (r *DeletableSecurityGroup) Delete(cloud fi.Cloud) error { c := cloud.(*awsup.AWSCloud) // First clear all inter-dependent rules // TODO: Move to a "pre-execute" phase? { request := &ec2.DescribeSecurityGroupsInput{ GroupIds: []*string{&r.ID}, } response, err := c.EC2.DescribeSecurityGroups(request) if err != nil { return fmt.Errorf("error describing SecurityGroup %q: %v", r.ID, err) } if len(response.SecurityGroups) == 0 { return nil } if len(response.SecurityGroups) != 1 { return fmt.Errorf("found mutiple SecurityGroups with ID %q", r.ID) } sg := response.SecurityGroups[0] if len(sg.IpPermissions) != 0 { revoke := &ec2.RevokeSecurityGroupIngressInput{ GroupId: &r.ID, IpPermissions: sg.IpPermissions, } _, err = c.EC2.RevokeSecurityGroupIngress(revoke) if err != nil { return fmt.Errorf("cannot revoke ingress for ID %q: %v", r.ID, err) } } } { glog.V(2).Infof("Deleting EC2 SecurityGroup %q", r.ID) request := &ec2.DeleteSecurityGroupInput{ GroupId: &r.ID, } _, err := c.EC2.DeleteSecurityGroup(request) if err != nil { if IsDependencyViolation(err) { return err } return fmt.Errorf("error deleting SecurityGroup %q: %v", r.ID, err) } } return nil } func (r *DeletableSecurityGroup) String() string { return "SecurityGroup:" + r.ID } type DeletableVolume struct { ID string } func (r *DeletableVolume) Delete(cloud fi.Cloud) error { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Deleting EC2 volume %q", r.ID) request := &ec2.DeleteVolumeInput{ VolumeId: &r.ID, } _, err := c.EC2.DeleteVolume(request) if err != nil { if IsDependencyViolation(err) { // Don't wrap return err } if AWSErrorCode(err) == "InvalidVolume.NotFound" { // Concurrently deleted return nil } return fmt.Errorf("error deleting volume %q: %v", r.ID, err) } return nil } func (r *DeletableVolume) String() string { return "Volume:" + r.ID } func (r *DeletableVolume) Status(cloud fi.Cloud) (bool, []string, error) { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Querying EC2 volume %q", r.ID) request := &ec2.DescribeVolumesInput{ VolumeIds: []*string{&r.ID}, } response, err := c.EC2.DescribeVolumes(request) if err != nil { if AWSErrorCode(err) == "InvalidVolume.NotFound" { return false, nil, nil } return false, nil, fmt.Errorf("error describing volume %q: %v", r.ID, err) } var found []*ec2.Volume for _, v := range response.Volumes { if aws.StringValue(v.VolumeId) == r.ID { found = append(found, v) } } if len(found) == 0 { return false, nil, nil } if len(found) != 1 { return false, nil, fmt.Errorf("found multiple volumes with id: %q", r.ID) } //v := found[0] var blocks []string return true, blocks, nil } type DeletableSubnet struct { ID string } // AWSErrorCode extracts the func AWSErrorCode(err error) string { if awsError, ok := err.(awserr.Error); ok { return awsError.Code() } return "" } func IsDependencyViolation(err error) bool { code := AWSErrorCode(err) switch code { case "": return false case "DependencyViolation", "VolumeInUse": return true default: glog.Infof("unexpected aws error code: %q", code) return false } } func (r *DeletableSubnet) Delete(cloud fi.Cloud) error { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Deleting EC2 Subnet %q", r.ID) request := &ec2.DeleteSubnetInput{ SubnetId: &r.ID, } _, err := c.EC2.DeleteSubnet(request) if err != nil { if IsDependencyViolation(err) { return err } return fmt.Errorf("error deleting Subnet %q: %v", r.ID, err) } return nil } func (r *DeletableSubnet) String() string { return "Subnet:" + r.ID } func (r *DeletableSubnet) Status(cloud fi.Cloud) (bool, []string, error) { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Querying EC2 subnet %q", r.ID) request := &ec2.DescribeSubnetsInput{ SubnetIds: []*string{&r.ID}, } response, err := c.EC2.DescribeSubnets(request) if err != nil { return false, nil, fmt.Errorf("error describing subnet %q: %v", r.ID, err) } var found []*ec2.Subnet for _, v := range response.Subnets { if aws.StringValue(v.SubnetId) == r.ID { found = append(found, v) } } if len(found) == 0 { return false, nil, nil } if len(found) != 1 { return false, nil, fmt.Errorf("found multiple subnets with id: %q", r.ID) } n := found[0] var blocks []string blocks = append(blocks, "vpc:"+aws.StringValue(n.VpcId)) return true, blocks, nil } type DeletableRouteTable struct { ID string } func (r *DeletableRouteTable) Delete(cloud fi.Cloud) error { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Deleting EC2 RouteTable %q", r.ID) request := &ec2.DeleteRouteTableInput{ RouteTableId: &r.ID, } _, err := c.EC2.DeleteRouteTable(request) if err != nil { if IsDependencyViolation(err) { return err } return fmt.Errorf("error deleting RouteTable %q: %v", r.ID, err) } return nil } func (r *DeletableRouteTable) String() string { return "RouteTable:" + r.ID } type DeletableDHCPOptions struct { ID string } func (r *DeletableDHCPOptions) Delete(cloud fi.Cloud) error { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Deleting EC2 DHCPOptions %q", r.ID) request := &ec2.DeleteDhcpOptionsInput{ DhcpOptionsId: &r.ID, } _, err := c.EC2.DeleteDhcpOptions(request) if err != nil { if IsDependencyViolation(err) { return err } return fmt.Errorf("error deleting %q: %v", r.ID, err) } return nil } func (r *DeletableDHCPOptions) String() string { return "DHCPOptions:" + r.ID } type DeletableInternetGateway struct { ID string } func (r *DeletableInternetGateway) Delete(cloud fi.Cloud) error { c := cloud.(*awsup.AWSCloud) var igw *ec2.InternetGateway { request := &ec2.DescribeInternetGatewaysInput{ InternetGatewayIds: []*string{&r.ID}, } response, err := c.EC2.DescribeInternetGateways(request) if err != nil { return fmt.Errorf("error describing InternetGateway %q: %v", r.ID, err) } if response == nil || len(response.InternetGateways) == 0 { return nil } if len(response.InternetGateways) != 1 { return fmt.Errorf("found multiple InternetGateways with id %q", r.ID) } igw = response.InternetGateways[0] } for _, a := range igw.Attachments { glog.V(2).Infof("Detaching EC2 InternetGateway %q", r.ID) request := &ec2.DetachInternetGatewayInput{ InternetGatewayId: &r.ID, VpcId: a.VpcId, } _, err := c.EC2.DetachInternetGateway(request) if err != nil { if IsDependencyViolation(err) { return err } return fmt.Errorf("error detaching InternetGateway %q: %v", r.ID, err) } } { glog.V(2).Infof("Deleting EC2 InternetGateway %q", r.ID) request := &ec2.DeleteInternetGatewayInput{ InternetGatewayId: &r.ID, } _, err := c.EC2.DeleteInternetGateway(request) if err != nil { if IsDependencyViolation(err) { return err } return fmt.Errorf("error deleting InternetGateway %q: %v", r.ID, err) } } return nil } func (r *DeletableInternetGateway) String() string { return "InternetGateway:" + r.ID } type DeletableVPC struct { ID string } func (r *DeletableVPC) Delete(cloud fi.Cloud) error { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Deleting EC2 VPC %q", r.ID) request := &ec2.DeleteVpcInput{ VpcId: &r.ID, } _, err := c.EC2.DeleteVpc(request) if err != nil { if IsDependencyViolation(err) { return err } return fmt.Errorf("error deleting VPC %q: %v", r.ID, err) } return nil } func (r *DeletableVPC) String() string { return "VPC:" + r.ID } func (r *DeletableVPC) Status(cloud fi.Cloud) (bool, []string, error) { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Querying EC2 VPC %q", r.ID) request := &ec2.DescribeVpcsInput{ VpcIds: []*string{&r.ID}, } response, err := c.EC2.DescribeVpcs(request) if err != nil { return false, nil, fmt.Errorf("error describing VPC %q: %v", r.ID, err) } var found []*ec2.Vpc for _, v := range response.Vpcs { if aws.StringValue(v.VpcId) == r.ID { found = append(found, v) } } if len(found) == 0 { return false, nil, nil } if len(found) != 1 { return false, nil, fmt.Errorf("found multiple VPCs with id: %q", r.ID) } v := found[0] var blocks []string blocks = append(blocks, "dhcp-options:"+aws.StringValue(v.DhcpOptionsId)) return true, blocks, nil } type DeletableASG struct { Name string } func (r *DeletableASG) Delete(cloud fi.Cloud) error { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Deleting autoscaling group %q", r.Name) request := &autoscaling.DeleteAutoScalingGroupInput{ AutoScalingGroupName: &r.Name, ForceDelete: aws.Bool(true), } _, err := c.Autoscaling.DeleteAutoScalingGroup(request) if err != nil { if IsDependencyViolation(err) { return err } return fmt.Errorf("error deleting autoscaling group %q: %v", r.Name, err) } return nil } func (r *DeletableASG) String() string { return "autoscaling-group:" + r.Name } type DeletableAutoscalingLaunchConfiguration struct { Name string } func (r *DeletableAutoscalingLaunchConfiguration) Delete(cloud fi.Cloud) error { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Deleting autoscaling LaunchConfiguration %q", r.Name) request := &autoscaling.DeleteLaunchConfigurationInput{ LaunchConfigurationName: &r.Name, } _, err := c.Autoscaling.DeleteLaunchConfiguration(request) if err != nil { return fmt.Errorf("error deleting autoscaling LaunchConfiguration %q: %v", r.Name, err) } return nil } func (r *DeletableAutoscalingLaunchConfiguration) String() string { return "autoscaling-launchconfiguration:" + r.Name } type DeletableELBLoadBalancer struct { Name string } func (r *DeletableELBLoadBalancer) Delete(cloud fi.Cloud) error { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Deleting LoadBalancer %q", r.Name) request := &elb.DeleteLoadBalancerInput{ LoadBalancerName: &r.Name, } _, err := c.ELB.DeleteLoadBalancer(request) if err != nil { if IsDependencyViolation(err) { return err } return fmt.Errorf("error deleting LoadBalancer %q: %v", r.Name, err) } return nil } func (r *DeletableELBLoadBalancer) String() string { return "LoadBalancer:" + r.Name } func (r *DeletableELBLoadBalancer) Status(cloud fi.Cloud) (bool, []string, error) { c := cloud.(*awsup.AWSCloud) glog.V(2).Infof("Querying LoadBalancer instance %q", r.Name) request := &elb.DescribeLoadBalancersInput{ LoadBalancerNames: []*string{&r.Name}, } response, err := c.ELB.DescribeLoadBalancers(request) if err != nil { return false, nil, fmt.Errorf("error describing LoadBalancer %q: %v", r.Name, err) } var found []*elb.LoadBalancerDescription for _, l := range response.LoadBalancerDescriptions { if aws.StringValue(l.LoadBalancerName) == r.Name { found = append(found, l) } } if len(found) == 0 { return false, nil, nil } if len(found) != 1 { return false, nil, fmt.Errorf("found multiple LoadBalancers with Name: %q", r.Name) } l := found[0] var blocks []string for _, sg := range l.SecurityGroups { blocks = append(blocks, "security-group:"+aws.StringValue(sg)) } for _, s := range l.Subnets { blocks = append(blocks, "subnet:"+aws.StringValue(s)) } blocks = append(blocks, "vpc:"+aws.StringValue(l.VPCId)) return true, blocks, nil }