diff --git a/README.md b/README.md index b728ff2e53..7774c09f8f 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ on AWS. * Based on a simple meta-model defined in a directory tree * Command line [autocomplete](/docs/cli/kops_completion.md) * Community support - +* [Upgrade from kube-up](/docs/upgrade_from_kubeup.md) ## Installing `kubectl` is required, see [here](http://kubernetes.io/docs/user-guide/prereqs/). @@ -123,7 +123,7 @@ If you think you have found a bug please follow the instructions below. - Please spend a small amount of time giving due diligence to the issue tracker. Your issue might be a duplicate. - Set `-v 10` command line option and save the log output. Please paste this into your issue. -- Note you version of `kops`, and the command line options you are using +- Note the version of kops you are running (from `kops version`), and the command line options you are using - Open a [new issue](https://github.com/kubernetes/kops/issues/new) - Remember users might be searching for your issue in the future, so please give it a meaningful title to helps others. - Feel free to reach out to the kops community on [kubernetes slack](https://github.com/kubernetes/community#slack-chat) diff --git a/cmd/kops/create_secret_sshpublickey.go b/cmd/kops/create_secret_sshpublickey.go index e9c9b3d661..159d558d92 100644 --- a/cmd/kops/create_secret_sshpublickey.go +++ b/cmd/kops/create_secret_sshpublickey.go @@ -48,7 +48,7 @@ func NewCmdCreateSecretPublicKey(f *util.Factory, out io.Writer) *cobra.Command } options.Name = args[0] - err := rootCommand.ProcessArgs(args) + err := rootCommand.ProcessArgs(args[1:]) if err != nil { exitWithError(err) } diff --git a/docs/upgrade_from_k8s_12.md b/docs/upgrade_from_k8s_12.md index 4a9a62f316..5933699ffd 100644 --- a/docs/upgrade_from_k8s_12.md +++ b/docs/upgrade_from_k8s_12.md @@ -1,191 +1 @@ -# Upgrading from kubernetes 1.2 to kubernetes 1.3 - -Kops let you upgrade an existing 1.2 cluster, installed using kube-up, to a cluster managed by -kops running kubernetes version 1.3. - -** This is an experimental and slightly risky procedure, so we recommend backing up important data before proceeding. -Take a snapshot of your EBS volumes; export all your data from kubectl etc. ** - -Limitations: - -* kops splits etcd onto two volumes now: `main` and `events`. We will keep the `main` data, but - you will lose your events history. -* Doubtless others not yet known - please open issues if you encounter them! - -## Overview - -There are a few steps to upgrade a kubernetes cluster from 1.2 to 1.3: - -* First you import the existing cluster state, so you can see and edit the configuration -* You verify the cluster configuration -* You move existing AWS resources to your new cluster -* You bring up the new cluster -* You can then delete the old cluster - -## Importing the existing cluster - -The `import cluster` command reverse engineers an existing cluster, and creates a cluster -configuration. - -Make sure you have set `export KOPS_STATE_STORE=s3://` - -Then import the cluster; setting `--name` and `--region` to match the old cluster. If you're not sure -of the old cluster name, you can find it by looking at the `KubernetesCluster` tag on your AWS resources. - -``` -export OLD_NAME=kubernetes -export REGION=us-west-2 -kops import cluster --region ${REGION} --name ${OLD_NAME} -``` - -## Verify the cluster configuration - -Now have a look at the cluster configuration, to make sure it looks right. If it doesn't, please -open an issue. - -``` -kops get cluster ${OLD_NAME} -oyaml -```` - -## Move resources to a new cluster - -The upgrade moves some resources so they will be adopted by the new cluster. There are a number of things -this step does: - -* It resizes existing autoscaling groups to size 0 -* It will stop the existing master -* It detaches the master EBS volume from the master -* It re-tags resources to associate them with the new cluster: volumes, ELBs -* It re-tags the VPC to associate it with the new cluster - -The upgrade procedure forces you to choose a new cluster name (e.g. `k8s.mydomain.com`) - -``` -export NEW_NAME=k8s.mydomain.com -kops toolbox convert-imported --newname ${NEW_NAME} --name ${OLD_NAME} -``` - -If you now list the clusters, you should see both the old cluster & the new cluster - -``` -kops get clusters -``` - -You can also list the instance groups: `kops get ig --name ${NEW_NAME}` - -## Import the SSH public key - -The SSH public key is not easily retrieved from the old cluster, so you must add it: - -``` -kops create secret --name ${NEW_NAME} sshpublickey admin -i ~/.ssh/id_rsa.pub -``` - -## Bring up the new cluster - -Use the update command to bring up the new cluster: - -``` -kops update cluster ${NEW_NAME} -``` - -Things to check are that it is reusing the existing volume for the _main_ etcd cluster (but not the events clusters). - -And then when you are happy: - -``` -kops update cluster ${NEW_NAME} --yes -``` - - -## Export kubecfg settings to access the new cluster - -You can export a kubecfg (although update cluster did this automatically): `kops export kubecfg ${NEW_NAME}` - - -## Workaround for secret import failure - -The import procedure tries to preserve the CA certificates, but unfortunately this isn't supported -in kubernetes until [#34029](https://github.com/kubernetes/kubernetes/pull/34029) ships (should be -in 1.5). - -So you will need to delete the service-accounts, so they can be recreated with the correct keys. - -Unfortunately, until you do this, some services (most notably internal & external DNS) will not work. -Because of that you must SSH to the master to do this repair. - -You can get the public IP address of the master from the AWS console, or by doing this: - -``` -aws ec2 --region $REGION describe-instances \ - --filter Name=tag:KubernetesCluster,Values=${NEW_NAME} \ - Name=tag-key,Values=k8s.io/role/master \ - Name=instance-state-name,Values=running \ - --query Reservations[].Instances[].PublicIpAddress \ - --output text -``` - -Then `ssh admin@` (the SSH key will be the one you added above, i.e. `~/.ssh/id_rsa.pub`), and run: - -First check that the apiserver is running: -``` -kubectl get nodes -``` - -You should see only one node (the master). Then run -``` -NS=`kubectl get namespaces -o 'jsonpath={.items[*].metadata.name}'` -for i in ${NS}; do kubectl get secrets --namespace=${i} --no-headers | grep "kubernetes.io/service-account-token" | awk '{print $1}' | xargs -I {} kubectl delete secret --namespace=$i {}; done -sleep 60 # Allow for new secrets to be created -kubectl delete pods -lk8s-app=dns-controller --namespace=kube-system -kubectl delete pods -lk8s-app=kube-dns --namespace=kube-system -``` - - -You probably also want to delete the imported DNS services from prior versions: - -``` -kubectl delete rc -lk8s-app=kube-dns --namespace=kube-system -``` - - -Within a few minutes the new cluster should be running. - -Try `kubectl get nodes --show-labels`, `kubectl get pods --all-namespaces` etc until you are sure that all is well. - -This should work even without being SSH-ed into the master, although it can take a few minutes -for DNS to propagate. If it doesn't work, double-check that you have specified a valid -domain name for your cluster, that records have been created in Route53, and that you -can resolve those records from your machine (using `nslookup` or `dig`). - -## Other fixes - -* If you're using a manually created ELB, the auto-scaling groups change, so you will need to reconfigure -your ELBs to include the new auto-scaling group(s). - -* It is recommended to delete old kubernetes system services that we imported (and replace them with newer versions): - -``` -kubectl delete rc -lk8s-app=kube-dns --namespace=kube-system - -kubectl delete rc -lk8s-app=elasticsearch-logging --namespace=kube-system -kubectl delete rc -lk8s-app=kibana-logging --namespace=kube-system -kubectl delete rc -lk8s-app=kubernetes-dashboard --namespace=kube-system -kubectl delete rc -lk8s-app=influxGrafana --namespace=kube-system - -kubectl delete deployment -lk8s-app=heapster --namespace=kube-system -``` - -## Delete remaining resources of the old cluster - -`kops delete cluster ${OLD_NAME}` -> ``` -TYPE NAME ID -autoscaling-config kubernetes-minion-group-us-west-2a kubernetes-minion-group-us-west-2a -autoscaling-group kubernetes-minion kubernetes-minion-group-us-west-2a -instance kubernetes-master i-67af2ec8 -``` - -And once you've confirmed it looks right, run with `--yes` - -You will also need to release the old ElasticIP manually. +[Moved here](upgrade_from_kubeup.md) diff --git a/docs/upgrade_from_kubeup.md b/docs/upgrade_from_kubeup.md new file mode 100644 index 0000000000..71c9abf242 --- /dev/null +++ b/docs/upgrade_from_kubeup.md @@ -0,0 +1,197 @@ +# Upgrading from kube-up to kops + +Kops let you upgrade an existing kubernetes cluster installed using kube-up, to a cluster managed by +kops. + +** This is a slightly risky procedure, so we recommend backing up important data before proceeding. +Take a snapshot of your EBS volumes; export all your data from kubectl etc. ** + +Limitations: + +* kops splits etcd onto two volumes now: `main` and `events`. We will keep the `main` data, but + you will lose your events history. + +## Overview + +There are a few steps to upgrade a kubernetes cluster: + +* First you import the existing cluster state, so you can see and edit the configuration +* You verify the cluster configuration +* You move existing AWS resources to your new cluster +* You bring up the new cluster +* You can then delete the old cluster + +## Importing the existing cluster + +The `import cluster` command reverse engineers an existing cluster, and creates a cluster +configuration. + +Make sure you have set `export KOPS_STATE_STORE=s3://` + +Then import the cluster; setting `--name` and `--region` to match the old cluster. If you're not sure +of the old cluster name, you can find it by looking at the `KubernetesCluster` tag on your AWS resources. + +``` +export OLD_NAME=kubernetes +export REGION=us-west-2 +kops import cluster --region ${REGION} --name ${OLD_NAME} +``` + +## Verify the cluster configuration + +Now have a look at the cluster configuration, to make sure it looks right. If it doesn't, please +open an issue. + +``` +kops get cluster ${OLD_NAME} -oyaml +```` + +## Move resources to a new cluster + +The upgrade moves some resources so they will be adopted by the new cluster. There are a number of things +this step does: + +* It resizes existing autoscaling groups to size 0 +* It will stop the existing master +* It detaches the master EBS volume from the master +* It re-tags resources to associate them with the new cluster: volumes, ELBs +* It re-tags the VPC to associate it with the new cluster + +The upgrade procedure forces you to choose a new cluster name (e.g. `k8s.mydomain.com`) + +``` +export NEW_NAME=k8s.mydomain.com +kops toolbox convert-imported --newname ${NEW_NAME} --name ${OLD_NAME} +``` + +If you now list the clusters, you should see both the old cluster & the new cluster + +``` +kops get clusters +``` + +You can also list the instance groups: `kops get ig --name ${NEW_NAME}` + +## Import the SSH public key + +The SSH public key is not easily retrieved from the old cluster, so you must add it: + +``` +kops create secret --name ${NEW_NAME} sshpublickey admin -i ~/.ssh/id_rsa.pub +``` + +## Bring up the new cluster + +Use the update command to bring up the new cluster: + +``` +kops update cluster ${NEW_NAME} +``` + +Things to check are that it is reusing the existing volume for the _main_ etcd cluster (but not the events clusters). + +And then when you are happy: + +``` +kops update cluster ${NEW_NAME} --yes +``` + + +## Export kubecfg settings to access the new cluster + +You can export a kubecfg (although update cluster did this automatically): `kops export kubecfg ${NEW_NAME}` + + +## Workaround for secret import failure + +The import procedure tries to preserve the CA certificates, but unfortunately this isn't supported +in kubernetes until [#34029](https://github.com/kubernetes/kubernetes/pull/34029) ships (should be +in 1.5). + +So you will need to delete the service-accounts, so they can be recreated with the correct keys. + +Unfortunately, until you do this, some services (most notably internal & external DNS) will not work. +Because of that you must SSH to the master to do this repair. + +You can get the public IP address of the master from the AWS console, or by doing this: + +``` +aws ec2 --region $REGION describe-instances \ + --filter Name=tag:KubernetesCluster,Values=${NEW_NAME} \ + Name=tag-key,Values=k8s.io/role/master \ + Name=instance-state-name,Values=running \ + --query Reservations[].Instances[].PublicIpAddress \ + --output text +``` + +Then `ssh admin@` (the SSH key will be the one you added above, i.e. `~/.ssh/id_rsa.pub`), and run: + +First check that the apiserver is running: +``` +kubectl get nodes +``` + +You should see only one node (the master). Then run +``` +NS=`kubectl get namespaces -o 'jsonpath={.items[*].metadata.name}'` +for i in ${NS}; do kubectl get secrets --namespace=${i} --no-headers | grep "kubernetes.io/service-account-token" | awk '{print $1}' | xargs -I {} kubectl delete secret --namespace=$i {}; done +sleep 60 # Allow for new secrets to be created +kubectl delete pods -lk8s-app=dns-controller --namespace=kube-system +kubectl delete pods -lk8s-app=kube-dns --namespace=kube-system +``` + + +You probably also want to delete the imported DNS services from prior versions: + +``` +kubectl delete rc -lk8s-app=kube-dns --namespace=kube-system # Will work for k8s <= 1.4 +kubectl delete deployment --namespace=kube-system kube-dns # Will work for k8s >= 1.5 +``` + + +Within a few minutes the new cluster should be running. + +Try `kubectl get nodes --show-labels`, `kubectl get pods --all-namespaces` etc until you are sure that all is well. + +This should work even without being SSH-ed into the master, although it can take a few minutes +for DNS to propagate. If it doesn't work, double-check that you have specified a valid +domain name for your cluster, that records have been created in Route53, and that you +can resolve those records from your machine (using `nslookup` or `dig`). + +## Other fixes + +* If you're using a manually created ELB, the auto-scaling groups change, so you will need to reconfigure +your ELBs to include the new auto-scaling group(s). + +* It is recommended to delete old kubernetes system services that we imported (and replace them with newer versions): + +``` +kubectl delete rc -lk8s-app=kube-dns --namespace=kube-system # <= 1.4 +kubectl delete deployment --namespace=kube-system kube-dns # 1.5 + +kubectl delete rc -lk8s-app=elasticsearch-logging --namespace=kube-system + +kubectl delete rc -lk8s-app=kibana-logging --namespace=kube-system # <= 1.4 +kubectl delete deployment -lk8s-app=kibana-logging --namespace=kube-system # 1.5 + +kubectl delete rc -lk8s-app=kubernetes-dashboard --namespace=kube-system # <= 1.4 +kubectl delete deployment -lk8s-app=kubernetes-dashboard --namespace=kube-system # 1.5 + +kubectl delete rc -lk8s-app=influxGrafana --namespace=kube-system + +kubectl delete deployment -lk8s-app=heapster --namespace=kube-system +``` + +## Delete remaining resources of the old cluster + +`kops delete cluster ${OLD_NAME}` +> ``` +TYPE NAME ID +autoscaling-config kubernetes-minion-group-us-west-2a kubernetes-minion-group-us-west-2a +autoscaling-group kubernetes-minion kubernetes-minion-group-us-west-2a +instance kubernetes-master i-67af2ec8 +``` + +And once you've confirmed it looks right, run with `--yes` + +You will also need to release the old ElasticIP manually. diff --git a/upup/pkg/kutil/delete_cluster.go b/upup/pkg/kutil/delete_cluster.go index 29e4d49037..f1e0e62a00 100644 --- a/upup/pkg/kutil/delete_cluster.go +++ b/upup/pkg/kutil/delete_cluster.go @@ -20,7 +20,6 @@ import ( "bufio" "bytes" "compress/gzip" - "encoding/base64" "fmt" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/autoscaling" @@ -32,6 +31,7 @@ import ( "io" "k8s.io/kops/upup/pkg/fi" "k8s.io/kops/upup/pkg/fi/cloudup/awsup" + "k8s.io/kubernetes/pkg/util/sets" "strings" "sync" "time" @@ -118,8 +118,6 @@ func (c *DeleteCluster) ListResources() (map[string]*ResourceTracker, error) { ListELBs, // ASG ListAutoScalingGroups, - ListAutoScalingLaunchConfigurations, - // LC // Route 53 ListRoute53Records, @@ -166,6 +164,26 @@ func (c *DeleteCluster) ListResources() (map[string]*ResourceTracker, error) { } } + { + // We delete a launch configuration if it is bound to one of the tagged security groups + securityGroups := sets.NewString() + for k := range resources { + if !strings.HasPrefix(k, "security-group:") { + continue + } + id := strings.TrimPrefix(k, "security-group:") + securityGroups.Insert(id) + } + lcs, err := FindAutoScalingLaunchConfigurations(cloud, securityGroups) + if err != nil { + return nil, err + } + + for _, t := range lcs { + resources[t.Type+":"+t.ID] = t + } + } + if err := addUntaggedRouteTables(cloud, c.ClusterName, resources); err != nil { return nil, err } @@ -726,6 +744,11 @@ func DeleteKeypair(cloud fi.Cloud, r *ResourceTracker) error { } func ListKeypairs(cloud fi.Cloud, clusterName string) ([]*ResourceTracker, error) { + if !strings.Contains(clusterName, ".") { + glog.Infof("cluster %q is legacy (kube-up) cluster; won't delete keypairs", clusterName) + return nil, nil + } + c := cloud.(awsup.AWSCloud) keypairName := "kubernetes." + clusterName @@ -1312,52 +1335,40 @@ func ListAutoScalingGroups(cloud fi.Cloud, clusterName string) ([]*ResourceTrack return trackers, nil } -func ListAutoScalingLaunchConfigurations(cloud fi.Cloud, clusterName string) ([]*ResourceTracker, error) { +func FindAutoScalingLaunchConfigurations(cloud fi.Cloud, securityGroups sets.String) ([]*ResourceTracker, error) { c := cloud.(awsup.AWSCloud) - glog.V(2).Infof("Listing all Autoscaling LaunchConfigurations for cluster %q", clusterName) + glog.V(2).Infof("Finding all Autoscaling LaunchConfigurations by security group") var trackers []*ResourceTracker request := &autoscaling.DescribeLaunchConfigurationsInput{} err := c.Autoscaling().DescribeLaunchConfigurationsPages(request, func(p *autoscaling.DescribeLaunchConfigurationsOutput, lastPage bool) bool { for _, t := range p.LaunchConfigurations { - if t.UserData == nil { - continue - } - - b, err := base64.StdEncoding.DecodeString(aws.StringValue(t.UserData)) - if err != nil { - glog.Infof("Ignoring autoscaling LaunchConfiguration with invalid UserData: %v", *t.LaunchConfigurationName) - continue - } - - userData, err := UserDataToString(b) - if err != nil { - glog.Infof("Ignoring autoscaling LaunchConfiguration with invalid UserData: %v", *t.LaunchConfigurationName) - continue - } - - //I finally found what was polluting logs with the bash scripts. - //glog.V(8).Infof("UserData: %s", string(userData)) - - // Adding in strings.Contains() here on cluster name, making the grand assumption that if our clustername string is present - // in the name of the LC, it's safe to delete. This solves the bastion LC problem. - if extractClusterName(userData) == clusterName || strings.Contains(*t.LaunchConfigurationName, clusterName) { - tracker := &ResourceTracker{ - Name: aws.StringValue(t.LaunchConfigurationName), - ID: aws.StringValue(t.LaunchConfigurationName), - Type: TypeAutoscalingLaunchConfig, - deleter: DeleteAutoscalingLaunchConfiguration, + found := false + for _, sg := range t.SecurityGroups { + if securityGroups.Has(aws.StringValue(sg)) { + found = true + break } - - var blocks []string - //blocks = append(blocks, TypeAutoscalingLaunchConfig + ":" + aws.StringValue(asg.LaunchConfigurationName)) - - tracker.blocks = blocks - - trackers = append(trackers, tracker) } + if !found { + continue + } + + tracker := &ResourceTracker{ + Name: aws.StringValue(t.LaunchConfigurationName), + ID: aws.StringValue(t.LaunchConfigurationName), + Type: TypeAutoscalingLaunchConfig, + deleter: DeleteAutoscalingLaunchConfiguration, + } + + var blocks []string + //blocks = append(blocks, TypeAutoscalingLaunchConfig + ":" + aws.StringValue(asg.LaunchConfigurationName)) + + tracker.blocks = blocks + + trackers = append(trackers, tracker) } return true }) diff --git a/upup/pkg/kutil/import_cluster.go b/upup/pkg/kutil/import_cluster.go index e637cc136a..f0a927a93d 100644 --- a/upup/pkg/kutil/import_cluster.go +++ b/upup/pkg/kutil/import_cluster.go @@ -93,7 +93,11 @@ func (x *ImportCluster) ImportAWSCluster() error { subnet := subnets[subnetName] if subnet == nil { - subnet = &api.ClusterSubnetSpec{Name: subnetName} + subnet = &api.ClusterSubnetSpec{ + Name: subnetName, + Zone: zoneName, + Type: api.SubnetTypePublic, + } subnets[subnetName] = subnet }