From 095bf95fc953c8ba10f5d9555d7f633a21a13ad5 Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Mon, 30 May 2016 17:54:30 -0400 Subject: [PATCH] upup: simple command to delete (AWS) clusters A relatively dumb retry strategy to work around dependencies, but it works and it is difficult to do _much_ better. --- upup/cmd/upup/delete.go | 21 ++ upup/cmd/upup/delete_cluster.go | 111 ++++++ upup/pkg/kutil/delete_cluster.go | 559 +++++++++++++++++++++++++++++++ 3 files changed, 691 insertions(+) create mode 100644 upup/cmd/upup/delete.go create mode 100644 upup/cmd/upup/delete_cluster.go create mode 100644 upup/pkg/kutil/delete_cluster.go diff --git a/upup/cmd/upup/delete.go b/upup/cmd/upup/delete.go new file mode 100644 index 0000000000..7e31a8ad73 --- /dev/null +++ b/upup/cmd/upup/delete.go @@ -0,0 +1,21 @@ +package main + +import ( + "fmt" + + "github.com/spf13/cobra" +) + +// deleteCmd represents the delete command +var deleteCmd = &cobra.Command{ + Use: "delete", + Short: "delete clusters", + Long: `Delete clusters`, + Run: func(cmd *cobra.Command, args []string) { + fmt.Println("Usage: cluster") + }, +} + +func init() { + RootCmd.AddCommand(deleteCmd) +} diff --git a/upup/cmd/upup/delete_cluster.go b/upup/cmd/upup/delete_cluster.go new file mode 100644 index 0000000000..876066a99b --- /dev/null +++ b/upup/cmd/upup/delete_cluster.go @@ -0,0 +1,111 @@ +package main + +import ( + "fmt" + + "github.com/golang/glog" + "github.com/spf13/cobra" + "k8s.io/kube-deploy/upup/pkg/fi/cloudup/awsup" + "k8s.io/kube-deploy/upup/pkg/kutil" + "time" +) + +type DeleteClusterCmd struct { + ClusterID string + Yes bool + Region string +} + +var deleteCluster DeleteClusterCmd + +func init() { + cmd := &cobra.Command{ + Use: "cluster", + Short: "Delete cluster", + Long: `Deletes a k8s cluster.`, + Run: func(cmd *cobra.Command, args []string) { + err := deleteCluster.Run() + if err != nil { + glog.Exitf("%v", err) + } + }, + } + + deleteCmd.AddCommand(cmd) + + cmd.Flags().BoolVar(&deleteCluster.Yes, "yes", false, "Delete without confirmation") + + cmd.Flags().StringVar(&deleteCluster.ClusterID, "cluster-id", "", "cluster id") + cmd.Flags().StringVar(&deleteCluster.Region, "region", "", "region") +} + +func (c *DeleteClusterCmd) Run() error { + if c.Region == "" { + return fmt.Errorf("--region is required") + } + if c.ClusterID == "" { + return fmt.Errorf("--cluster-id is required") + } + + tags := map[string]string{"KubernetesCluster": c.ClusterID} + cloud, err := awsup.NewAWSCloud(c.Region, tags) + if err != nil { + return fmt.Errorf("error initializing AWS client: %v", err) + } + + d := &kutil.DeleteCluster{} + + d.ClusterID = c.ClusterID + d.Region = c.Region + d.Cloud = cloud + + glog.Infof("TODO: S3 bucket removal") + + resources, err := d.ListResources() + if err != nil { + return err + } + + for _, r := range resources { + fmt.Printf("%v\n", r) + } + + if !c.Yes { + return fmt.Errorf("Must specify --yes to delete") + } + + for { + // TODO: Parallel delete + // TODO: Some form of ordering? + // TODO: Give up eventually? + + var failed []kutil.DeletableResource + for _, r := range resources { + fmt.Printf("Deleting resource %s: ", r) + err := r.Delete(cloud) + if err != nil { + if kutil.IsDependencyViolation(err) { + fmt.Printf("still has dependencies, will retry\n") + } else { + fmt.Printf("error deleting resource, will retry: %v\n", err) + } + failed = append(failed, r) + } else { + fmt.Printf(" ok\n") + } + } + + resources = failed + if len(resources) == 0 { + break + } + + fmt.Printf("Not all resources deleted; waiting before reattempting deletion\n") + for _, r := range resources { + fmt.Printf("\t%s\n", r) + } + time.Sleep(10 * time.Second) + } + + return nil +} diff --git a/upup/pkg/kutil/delete_cluster.go b/upup/pkg/kutil/delete_cluster.go new file mode 100644 index 0000000000..eb157ffe5b --- /dev/null +++ b/upup/pkg/kutil/delete_cluster.go @@ -0,0 +1,559 @@ +package kutil + +import ( + "encoding/base64" + "fmt" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" + "github.com/aws/aws-sdk-go/service/autoscaling" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/elb" + "github.com/golang/glog" + "k8s.io/kube-deploy/upup/pkg/fi" + "k8s.io/kube-deploy/upup/pkg/fi/cloudup/awsup" + "strings" +) + +// DeleteCluster implements deletion of cluster cloud resources +// The algorithm is pretty simple: it discovers all the resources it can (primary using tags), +// and then it repeatedly attempts to delete them all until they are all deleted. +// There are a few tweaks to that approach, like choosing a default ordering, but it is not much +// smarter. Cluster deletion is a fairly rare operation anyway, and also some dependencies are invisible +// (e.g. ELB dependencies). +type DeleteCluster struct { + ClusterID string + Region string + Cloud fi.Cloud +} + +func (c *DeleteCluster) ListResources() ([]DeletableResource, error) { + cloud := c.Cloud.(*awsup.AWSCloud) + + var resources []DeletableResource + + filters := cloud.BuildFilters(nil) + tags := cloud.BuildTags(nil, nil) + + { + glog.V(2).Infof("Listing all Autoscaling groups matching cluster tags") + var asgNames []*string + { + var asFilters []*autoscaling.Filter + for _, f := range filters { + asFilters = append(asFilters, &autoscaling.Filter{ + Name: aws.String("value"), + Values: f.Values, + }) + } + request := &autoscaling.DescribeTagsInput{ + Filters: asFilters, + } + response, err := cloud.Autoscaling.DescribeTags(request) + if err != nil { + return nil, fmt.Errorf("error listing autoscaling cluster tags: %v", err) + } + + for _, t := range response.Tags { + switch *t.ResourceType { + case "auto-scaling-group": + asgNames = append(asgNames, t.ResourceId) + default: + glog.Warningf("Unknown resource type: %v", *t.ResourceType) + + } + } + } + + if len(asgNames) != 0 { + request := &autoscaling.DescribeAutoScalingGroupsInput{ + AutoScalingGroupNames: asgNames, + } + response, err := cloud.Autoscaling.DescribeAutoScalingGroups(request) + if err != nil { + return nil, fmt.Errorf("error listing autoscaling groups: %v", err) + } + + for _, t := range response.AutoScalingGroups { + if !matchesAsgTags(tags, t.Tags) { + continue + } + resources = append(resources, &DeletableASG{Name: *t.AutoScalingGroupName}) + } + } + } + + { + glog.V(2).Infof("Listing all Autoscaling LaunchConfigurations") + + request := &autoscaling.DescribeLaunchConfigurationsInput{} + response, err := cloud.Autoscaling.DescribeLaunchConfigurations(request) + if err != nil { + return nil, fmt.Errorf("error listing autoscaling LaunchConfigurations: %v", err) + } + + for _, t := range response.LaunchConfigurations { + if t.UserData == nil { + continue + } + + userData, err := base64.StdEncoding.DecodeString(*t.UserData) + if err != nil { + glog.Infof("Ignoring autoscaling LaunchConfiguration with invalid UserData: %v", *t.LaunchConfigurationName) + continue + } + + if strings.Contains(string(userData), "\nINSTANCE_PREFIX: "+c.ClusterID+"\n") { + resources = append(resources, &DeletableAutoscalingLaunchConfiguration{Name: *t.LaunchConfigurationName}) + } + } + } + + { + glog.V(2).Infof("Listing all ELB tags") + + request := &elb.DescribeLoadBalancersInput{} + response, err := cloud.ELB.DescribeLoadBalancers(request) + if err != nil { + return nil, fmt.Errorf("error listing elb LoadBalancers: %v", err) + } + + for _, lb := range response.LoadBalancerDescriptions { + // TODO: batch? + request := &elb.DescribeTagsInput{ + LoadBalancerNames: []*string{lb.LoadBalancerName}, + } + response, err := cloud.ELB.DescribeTags(request) + if err != nil { + return nil, fmt.Errorf("error listing elb Tags: %v", err) + } + + for _, t := range response.TagDescriptions { + if !matchesElbTags(tags, t.Tags) { + continue + } + resources = append(resources, &DeletableELBLoadBalancer{Name: *t.LoadBalancerName}) + } + } + } + + { + + glog.V(2).Infof("Listing all EC2 tags matching cluster tags") + request := &ec2.DescribeTagsInput{ + Filters: filters, + } + response, err := cloud.EC2.DescribeTags(request) + if err != nil { + return nil, fmt.Errorf("error listing cluster tags: %v", err) + } + + for _, t := range response.Tags { + var resource DeletableResource + switch *t.ResourceType { + case "dhcp-options": + resource = &DeletableDHCPOptions{ID: *t.ResourceId} + case "instance": + resource = &DeletableInstance{ID: *t.ResourceId} + case "volume": + resource = &DeletableVolume{ID: *t.ResourceId} + case "subnet": + resource = &DeletableSubnet{ID: *t.ResourceId} + case "security-group": + resource = &DeletableSecurityGroup{ID: *t.ResourceId} + case "internet-gateway": + resource = &DeletableInternetGateway{ID: *t.ResourceId} + case "route-table": + resource = &DeletableRouteTable{ID: *t.ResourceId} + case "vpc": + resource = &DeletableVPC{ID: *t.ResourceId} + } + + if resource == nil { + glog.Warningf("Unknown resource type: %v", *t.ResourceType) + continue + } + + resources = append(resources, resource) + } + } + + return resources, nil +} + +func matchesAsgTags(tags map[string]string, actual []*autoscaling.TagDescription) bool { + for k, v := range tags { + found := false + for _, a := range actual { + if aws.StringValue(a.Key) == k { + if aws.StringValue(a.Value) == v { + found = true + break + } + } + } + if !found { + return false + } + } + return true +} + +func matchesElbTags(tags map[string]string, actual []*elb.Tag) bool { + for k, v := range tags { + found := false + for _, a := range actual { + if aws.StringValue(a.Key) == k { + if aws.StringValue(a.Value) == v { + found = true + break + } + } + } + if !found { + return false + } + } + return true +} + +type DeletableResource interface { + Delete(cloud fi.Cloud) error +} + +type DeletableInstance struct { + ID string +} + +func (r *DeletableInstance) Delete(cloud fi.Cloud) error { + c := cloud.(*awsup.AWSCloud) + + glog.V(2).Infof("Deleting EC2 instance %q", r.ID) + request := &ec2.TerminateInstancesInput{ + InstanceIds: []*string{&r.ID}, + } + _, err := c.EC2.TerminateInstances(request) + if err != nil { + return fmt.Errorf("error deleting instance %q: %v", r.ID, err) + } + return nil +} +func (r *DeletableInstance) String() string { + return "Instance:" + r.ID +} + +type DeletableSecurityGroup struct { + ID string +} + +func (r *DeletableSecurityGroup) Delete(cloud fi.Cloud) error { + c := cloud.(*awsup.AWSCloud) + + // First clear all inter-dependent rules + // TODO: Move to a "pre-execute" phase? + { + request := &ec2.DescribeSecurityGroupsInput{ + GroupIds: []*string{&r.ID}, + } + response, err := c.EC2.DescribeSecurityGroups(request) + if err != nil { + return fmt.Errorf("error describing SecurityGroup %q: %v", r.ID, err) + } + + if len(response.SecurityGroups) == 0 { + return nil + } + if len(response.SecurityGroups) != 1 { + return fmt.Errorf("found mutiple SecurityGroups with ID %q", r.ID) + } + sg := response.SecurityGroups[0] + + if len(sg.IpPermissions) != 0 { + revoke := &ec2.RevokeSecurityGroupIngressInput{ + GroupId: &r.ID, + IpPermissions: sg.IpPermissions, + } + _, err = c.EC2.RevokeSecurityGroupIngress(revoke) + if err != nil { + return fmt.Errorf("cannot revoke ingress for ID %q: %v", r.ID, err) + } + } + } + + { + glog.V(2).Infof("Deleting EC2 SecurityGroup %q", r.ID) + request := &ec2.DeleteSecurityGroupInput{ + GroupId: &r.ID, + } + _, err := c.EC2.DeleteSecurityGroup(request) + if err != nil { + if IsDependencyViolation(err) { + return err + } + return fmt.Errorf("error deleting SecurityGroup %q: %v", r.ID, err) + } + } + return nil +} +func (r *DeletableSecurityGroup) String() string { + return "SecurityGroup:" + r.ID +} + +type DeletableVolume struct { + ID string +} + +func (r *DeletableVolume) Delete(cloud fi.Cloud) error { + c := cloud.(*awsup.AWSCloud) + + glog.V(2).Infof("Deleting EC2 volume %q", r.ID) + request := &ec2.DeleteVolumeInput{ + VolumeId: &r.ID, + } + _, err := c.EC2.DeleteVolume(request) + if err != nil { + if awsErr, ok := err.(awserr.Error); ok { + if awsErr.Code() == "InvalidVolume.NotFound" { + return nil + } + } + return fmt.Errorf("error deleting volume %q: %v", r.ID, err) + } + return nil +} +func (r *DeletableVolume) String() string { + return "Volume:" + r.ID +} + +type DeletableSubnet struct { + ID string +} + +func IsDependencyViolation(err error) bool { + if awsError, ok := err.(awserr.Error); ok { + if awsError.Code() == "DependencyViolation" { + return true + } + } + return false +} + +func (r *DeletableSubnet) Delete(cloud fi.Cloud) error { + c := cloud.(*awsup.AWSCloud) + + glog.V(2).Infof("Deleting EC2 Subnet %q", r.ID) + request := &ec2.DeleteSubnetInput{ + SubnetId: &r.ID, + } + _, err := c.EC2.DeleteSubnet(request) + if err != nil { + if IsDependencyViolation(err) { + return err + } + return fmt.Errorf("error deleting Subnet %q: %v", r.ID, err) + } + return nil +} +func (r *DeletableSubnet) String() string { + return "Subnet:" + r.ID +} + +type DeletableRouteTable struct { + ID string +} + +func (r *DeletableRouteTable) Delete(cloud fi.Cloud) error { + c := cloud.(*awsup.AWSCloud) + + glog.V(2).Infof("Deleting EC2 RouteTable %q", r.ID) + request := &ec2.DeleteRouteTableInput{ + RouteTableId: &r.ID, + } + _, err := c.EC2.DeleteRouteTable(request) + if err != nil { + if IsDependencyViolation(err) { + return err + } + return fmt.Errorf("error deleting RouteTable %q: %v", r.ID, err) + } + return nil +} +func (r *DeletableRouteTable) String() string { + return "RouteTable:" + r.ID +} + +type DeletableDHCPOptions struct { + ID string +} + +func (r *DeletableDHCPOptions) Delete(cloud fi.Cloud) error { + c := cloud.(*awsup.AWSCloud) + + glog.V(2).Infof("Deleting EC2 DHCPOptions %q", r.ID) + request := &ec2.DeleteDhcpOptionsInput{ + DhcpOptionsId: &r.ID, + } + _, err := c.EC2.DeleteDhcpOptions(request) + if err != nil { + if IsDependencyViolation(err) { + return err + } + return fmt.Errorf("error deleting %q: %v", r.ID, err) + } + return nil +} +func (r *DeletableDHCPOptions) String() string { + return "DHCPOptions:" + r.ID +} + +type DeletableInternetGateway struct { + ID string +} + +func (r *DeletableInternetGateway) Delete(cloud fi.Cloud) error { + c := cloud.(*awsup.AWSCloud) + + var igw *ec2.InternetGateway + { + request := &ec2.DescribeInternetGatewaysInput{ + InternetGatewayIds: []*string{&r.ID}, + } + response, err := c.EC2.DescribeInternetGateways(request) + if err != nil { + return fmt.Errorf("error describing InternetGateway %q: %v", r.ID, err) + } + if response == nil || len(response.InternetGateways) == 0 { + return nil + } + if len(response.InternetGateways) != 1 { + return fmt.Errorf("found multiple InternetGateways with id %q", r.ID) + } + igw = response.InternetGateways[0] + } + + for _, a := range igw.Attachments { + glog.V(2).Infof("Detaching EC2 InternetGateway %q", r.ID) + request := &ec2.DetachInternetGatewayInput{ + InternetGatewayId: &r.ID, + VpcId: a.VpcId, + } + _, err := c.EC2.DetachInternetGateway(request) + if err != nil { + return fmt.Errorf("error detaching InternetGateway %q: %v", r.ID, err) + } + } + + { + glog.V(2).Infof("Deleting EC2 InternetGateway %q", r.ID) + request := &ec2.DeleteInternetGatewayInput{ + InternetGatewayId: &r.ID, + } + _, err := c.EC2.DeleteInternetGateway(request) + if err != nil { + if IsDependencyViolation(err) { + return err + } + return fmt.Errorf("error deleting InternetGateway %q: %v", r.ID, err) + } + } + + return nil +} +func (r *DeletableInternetGateway) String() string { + return "InternetGateway:" + r.ID +} + +type DeletableVPC struct { + ID string +} + +func (r *DeletableVPC) Delete(cloud fi.Cloud) error { + c := cloud.(*awsup.AWSCloud) + + glog.V(2).Infof("Deleting EC2 VPC %q", r.ID) + request := &ec2.DeleteVpcInput{ + VpcId: &r.ID, + } + _, err := c.EC2.DeleteVpc(request) + if err != nil { + if IsDependencyViolation(err) { + return err + } + return fmt.Errorf("error deleting VPC %q: %v", r.ID, err) + } + return nil +} +func (r *DeletableVPC) String() string { + return "VPC:" + r.ID +} + +type DeletableASG struct { + Name string +} + +func (r *DeletableASG) Delete(cloud fi.Cloud) error { + c := cloud.(*awsup.AWSCloud) + + glog.V(2).Infof("Deleting autoscaling group %q", r.Name) + request := &autoscaling.DeleteAutoScalingGroupInput{ + AutoScalingGroupName: &r.Name, + ForceDelete: aws.Bool(true), + } + _, err := c.Autoscaling.DeleteAutoScalingGroup(request) + if err != nil { + if IsDependencyViolation(err) { + return err + } + return fmt.Errorf("error deleting autoscaling group %q: %v", r.Name, err) + } + return nil +} +func (r *DeletableASG) String() string { + return "autoscaling-group:" + r.Name +} + +type DeletableAutoscalingLaunchConfiguration struct { + Name string +} + +func (r *DeletableAutoscalingLaunchConfiguration) Delete(cloud fi.Cloud) error { + c := cloud.(*awsup.AWSCloud) + + glog.V(2).Infof("Deleting autoscaling LaunchConfiguration %q", r.Name) + request := &autoscaling.DeleteLaunchConfigurationInput{ + LaunchConfigurationName: &r.Name, + } + _, err := c.Autoscaling.DeleteLaunchConfiguration(request) + if err != nil { + return fmt.Errorf("error deleting autoscaling LaunchConfiguration %q: %v", r.Name, err) + } + return nil +} + +func (r *DeletableAutoscalingLaunchConfiguration) String() string { + return "autoscaling-launchconfiguration:" + r.Name +} + +type DeletableELBLoadBalancer struct { + Name string +} + +func (r *DeletableELBLoadBalancer) Delete(cloud fi.Cloud) error { + c := cloud.(*awsup.AWSCloud) + + glog.V(2).Infof("Deleting LoadBalancer %q", r.Name) + request := &elb.DeleteLoadBalancerInput{ + LoadBalancerName: &r.Name, + } + _, err := c.ELB.DeleteLoadBalancer(request) + if err != nil { + if IsDependencyViolation(err) { + return err + } + return fmt.Errorf("error deleting LoadBalancer %q: %v", r.Name, err) + } + return nil +} + +func (r *DeletableELBLoadBalancer) String() string { + return "LoadBalancer:" + r.Name +}