diff --git a/cloudmock/aws/mockautoscaling/api.go b/cloudmock/aws/mockautoscaling/api.go index 44cad71a06..cd7f0e540d 100644 --- a/cloudmock/aws/mockautoscaling/api.go +++ b/cloudmock/aws/mockautoscaling/api.go @@ -29,6 +29,7 @@ type MockAutoscaling struct { mutex sync.Mutex Groups map[string]*autoscaling.Group + WarmPoolInstances map[string][]*autoscaling.Instance LaunchConfigurations map[string]*autoscaling.LaunchConfiguration } diff --git a/cloudmock/aws/mockautoscaling/group.go b/cloudmock/aws/mockautoscaling/group.go index daa7e79cf7..a10d7a0806 100644 --- a/cloudmock/aws/mockautoscaling/group.go +++ b/cloudmock/aws/mockautoscaling/group.go @@ -265,6 +265,15 @@ func (m *MockAutoscaling) TerminateInstanceInAutoScalingGroup(input *autoscaling }, nil } } + wp := m.WarmPoolInstances[*group.AutoScalingGroupName] + for i := range wp { + if aws.StringValue(wp[i].InstanceId) == aws.StringValue(input.InstanceId) { + m.WarmPoolInstances[*group.AutoScalingGroupName] = append(wp[:i], wp[i+1:]...) + return &autoscaling.TerminateInstanceInAutoScalingGroupOutput{ + Activity: nil, // TODO + }, nil + } + } } return nil, fmt.Errorf("Instance not found") diff --git a/cmd/kops/get_instances.go b/cmd/kops/get_instances.go index 92b0553af6..c92857991f 100644 --- a/cmd/kops/get_instances.go +++ b/cmd/kops/get_instances.go @@ -174,7 +174,11 @@ func instanceOutputTable(instances []*cloudinstances.CloudInstance, out io.Write t.AddColumn("MACHINE-TYPE", func(i *cloudinstances.CloudInstance) string { return i.MachineType }) - columns := []string{"ID", "NODE-NAME", "STATUS", "ROLES", "INTERNAL-IP", "INSTANCE-GROUP", "MACHINE-TYPE"} + t.AddColumn("STATE", func(i *cloudinstances.CloudInstance) string { + return string(i.State) + }) + + columns := []string{"ID", "NODE-NAME", "STATUS", "ROLES", "STATE", "INTERNAL-IP", "INSTANCE-GROUP", "MACHINE-TYPE"} return t.Render(instances, os.Stdout, columns...) } diff --git a/pkg/cloudinstances/cloud_instance.go b/pkg/cloudinstances/cloud_instance.go index b4b31f7c2a..2899729f1c 100644 --- a/pkg/cloudinstances/cloud_instance.go +++ b/pkg/cloudinstances/cloud_instance.go @@ -27,6 +27,11 @@ const CloudInstanceStatusNeedsUpdate = "NeedsUpdate" // CloudInstanceStatusReady means the instance has joined the cluster, is not detached, and is up to date. const CloudInstanceStatusUpToDate = "UpToDate" +type State string + +// WarmPool means the instance is in the warm pool +const WarmPool State = "WarmPool" + // CloudInstance describes an instance in a CloudInstanceGroup group. type CloudInstance struct { // ID is a unique identifier for the instance, meaningful to the cloud @@ -43,4 +48,6 @@ type CloudInstance struct { MachineType string // Private IP is the private ip address of the instance. PrivateIP string + // State is in which state the instance is in + State State } diff --git a/pkg/cloudinstances/cloud_instance_group.go b/pkg/cloudinstances/cloud_instance_group.go index 397b4c1ceb..70f51b6db3 100644 --- a/pkg/cloudinstances/cloud_instance_group.go +++ b/pkg/cloudinstances/cloud_instance_group.go @@ -41,7 +41,7 @@ type CloudInstanceGroup struct { } // NewCloudInstance creates a new CloudInstance -func (c *CloudInstanceGroup) NewCloudInstance(instanceId string, status string, nodeMap map[string]*v1.Node) (*CloudInstance, error) { +func (c *CloudInstanceGroup) NewCloudInstance(instanceId string, status string, node *v1.Node) (*CloudInstance, error) { if instanceId == "" { return nil, fmt.Errorf("instance id for cloud instance member cannot be empty") } @@ -58,7 +58,6 @@ func (c *CloudInstanceGroup) NewCloudInstance(instanceId string, status string, cm.Status = status - node := nodeMap[instanceId] if node != nil { cm.Node = node } else { diff --git a/pkg/instancegroups/BUILD.bazel b/pkg/instancegroups/BUILD.bazel index 4afa747ebf..cd8f60e3a4 100644 --- a/pkg/instancegroups/BUILD.bazel +++ b/pkg/instancegroups/BUILD.bazel @@ -36,6 +36,7 @@ go_test( srcs = [ "rollingupdate_os_test.go", "rollingupdate_test.go", + "rollingupdate_warmpool_test.go", "settings_test.go", ], embed = [":go_default_library"], diff --git a/pkg/instancegroups/instancegroups.go b/pkg/instancegroups/instancegroups.go index cd37c6203d..84c2e5f0e8 100644 --- a/pkg/instancegroups/instancegroups.go +++ b/pkg/instancegroups/instancegroups.go @@ -124,6 +124,21 @@ func (c *RollingUpdateCluster) rollingUpdateInstanceGroup(group *cloudinstances. } } + nonWarmPool := []*cloudinstances.CloudInstance{} + // Run through the warm pool and delete all instances directly + for _, instance := range update { + if instance.State == cloudinstances.WarmPool { + klog.Infof("deleting warm pool instance %q", instance.ID) + err := c.Cloud.DeleteInstance(instance) + if err != nil { + return fmt.Errorf("failed to delete warm pool instance %q: %w", instance.ID, err) + } + } else { + nonWarmPool = append(nonWarmPool, instance) + } + } + update = nonWarmPool + if c.Interactive { if maxSurge > 1 { maxSurge = 1 diff --git a/pkg/instancegroups/rollingupdate_test.go b/pkg/instancegroups/rollingupdate_test.go index 9cfa4edbff..ebe8c6b59c 100644 --- a/pkg/instancegroups/rollingupdate_test.go +++ b/pkg/instancegroups/rollingupdate_test.go @@ -53,7 +53,9 @@ func getTestSetup() (*RollingUpdateCluster, *awsup.MockAWSCloud) { k8sClient := fake.NewSimpleClientset() mockcloud := awsup.BuildMockAWSCloud("us-east-1", "abc") - mockAutoscaling := &mockautoscaling.MockAutoscaling{} + mockAutoscaling := &mockautoscaling.MockAutoscaling{ + WarmPoolInstances: make(map[string][]*autoscaling.Instance), + } mockcloud.MockAutoscaling = mockAutoscaling mockcloud.MockEC2 = mockAutoscaling.GetEC2Shim(mockcloud.MockEC2) @@ -135,7 +137,7 @@ func (v *assertNotCalledClusterValidator) Validate() (*validation.ValidationClus func makeGroup(groups map[string]*cloudinstances.CloudInstanceGroup, k8sClient kubernetes.Interface, cloud awsup.AWSCloud, name string, role kopsapi.InstanceGroupRole, count int, needUpdate int) { fakeClient := k8sClient.(*fake.Clientset) - groups[name] = &cloudinstances.CloudInstanceGroup{ + group := &cloudinstances.CloudInstanceGroup{ HumanName: name, InstanceGroup: &kopsapi.InstanceGroup{ ObjectMeta: v1meta.ObjectMeta{ @@ -147,6 +149,8 @@ func makeGroup(groups map[string]*cloudinstances.CloudInstanceGroup, k8sClient k }, Raw: &autoscaling.Group{AutoScalingGroupName: aws.String("asg-" + name)}, } + groups[name] = group + cloud.Autoscaling().CreateAutoScalingGroup(&autoscaling.CreateAutoScalingGroupInput{ AutoScalingGroupName: aws.String(name), DesiredCapacity: aws.Int64(int64(count)), @@ -164,16 +168,16 @@ func makeGroup(groups map[string]*cloudinstances.CloudInstanceGroup, k8sClient k } _ = fakeClient.Tracker().Add(node) } - member := cloudinstances.CloudInstance{ - ID: id, - Node: node, - CloudInstanceGroup: groups[name], - } + + var status string if i < needUpdate { - groups[name].NeedUpdate = append(groups[name].NeedUpdate, &member) + status = cloudinstances.CloudInstanceStatusNeedsUpdate } else { - groups[name].Ready = append(groups[name].Ready, &member) + status = cloudinstances.CloudInstanceStatusUpToDate } + + group.NewCloudInstance(id, status, node) + instanceIds = append(instanceIds, aws.String(id)) } cloud.Autoscaling().AttachInstances(&autoscaling.AttachInstancesInput{ diff --git a/pkg/instancegroups/rollingupdate_warmpool_test.go b/pkg/instancegroups/rollingupdate_warmpool_test.go new file mode 100644 index 0000000000..1303dd92bd --- /dev/null +++ b/pkg/instancegroups/rollingupdate_warmpool_test.go @@ -0,0 +1,116 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package instancegroups + +import ( + "testing" + + "github.com/aws/aws-sdk-go/service/autoscaling" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" + "github.com/stretchr/testify/assert" + "k8s.io/client-go/kubernetes" + "k8s.io/kops/cloudmock/aws/mockautoscaling" + "k8s.io/kops/pkg/apis/kops" + "k8s.io/kops/pkg/cloudinstances" + "k8s.io/kops/pkg/validation" + "k8s.io/kops/upup/pkg/fi/cloudup/awsup" +) + +// Here we have three nodes that are up to date, while three warm nodes need updating. +// Only the initial cluster validation should be run +func TestRollingUpdateOnlyWarmPoolNodes(t *testing.T) { + c, cloud := getTestSetup() + k8sClient := c.K8sClient + groups := make(map[string]*cloudinstances.CloudInstanceGroup) + makeGroupWithWarmPool(groups, k8sClient, cloud, "node-1", kops.InstanceGroupRoleNode, 3, 0, 3, 3) + + validator := &countingValidator{} + c.ClusterValidator = validator + + assert.Equal(t, 3, len(groups["node-1"].NeedUpdate), "number of nodes needing update") + + err := c.RollingUpdate(groups, &kops.InstanceGroupList{}) + assert.NoError(t, err, "rolling update") + assert.Equal(t, 1, validator.numValidations, "number of validations") +} + +func TestRollingWarmPoolBeforeJoinedNodes(t *testing.T) { + c, cloud := getTestSetup() + k8sClient := c.K8sClient + groups := make(map[string]*cloudinstances.CloudInstanceGroup) + makeGroupWithWarmPool(groups, k8sClient, cloud, "node-1", kops.InstanceGroupRoleNode, 3, 3, 3, 3) + + warmPoolBeforeJoinedNodesTest := &warmPoolBeforeJoinedNodesTest{ + EC2API: cloud.MockEC2, + t: t, + } + cloud.MockEC2 = warmPoolBeforeJoinedNodesTest + + err := c.RollingUpdate(groups, &kops.InstanceGroupList{}) + + assert.NoError(t, err, "rolling update") + + assert.Equal(t, 6, warmPoolBeforeJoinedNodesTest.numTerminations, "Number of terminations") +} + +type countingValidator struct { + numValidations int +} + +func (c *countingValidator) Validate() (*validation.ValidationCluster, error) { + c.numValidations++ + return &validation.ValidationCluster{}, nil +} + +func makeGroupWithWarmPool(groups map[string]*cloudinstances.CloudInstanceGroup, k8sClient kubernetes.Interface, cloud *awsup.MockAWSCloud, name string, role kops.InstanceGroupRole, count int, needUpdate int, warmCount int, warmNeedUpdate int) { + makeGroup(groups, k8sClient, cloud, name, role, count, needUpdate) + + group := groups[name] + + wpInstances := []*autoscaling.Instance{} + warmStoppedState := autoscaling.LifecycleStateWarmedStopped + for i := 0; i < warmCount; i++ { + id := name + "-wp-" + string(rune('a'+i)) + instance := &autoscaling.Instance{ + InstanceId: &id, + LifecycleState: &warmStoppedState, + } + wpInstances = append(wpInstances, instance) + + cm, _ := group.NewCloudInstance(id, cloudinstances.CloudInstanceStatusNeedsUpdate, nil) + cm.State = cloudinstances.WarmPool + + } + + // There is no API to write to warm pools, so we need to cheat. + mockASG := cloud.MockAutoscaling.(*mockautoscaling.MockAutoscaling) + mockASG.WarmPoolInstances[name] = wpInstances +} + +type warmPoolBeforeJoinedNodesTest struct { + ec2iface.EC2API + t *testing.T + numTerminations int +} + +func (t *warmPoolBeforeJoinedNodesTest) TerminateInstances(input *ec2.TerminateInstancesInput) (*ec2.TerminateInstancesOutput, error) { + + t.numTerminations++ + + return t.EC2API.TerminateInstances(input) +} diff --git a/pkg/resources/digitalocean/cloud.go b/pkg/resources/digitalocean/cloud.go index d950108f57..6590a64fa3 100644 --- a/pkg/resources/digitalocean/cloud.go +++ b/pkg/resources/digitalocean/cloud.go @@ -447,7 +447,7 @@ func buildCloudInstanceGroup(c *Cloud, ig *kops.InstanceGroup, g DOInstanceGroup for _, member := range g.Members { // TODO use a hash of the godo.DropletCreateRequest fields to calculate the second parameter. - _, err := cg.NewCloudInstance(member, cloudinstances.CloudInstanceStatusUpToDate, nodeMap) + _, err := cg.NewCloudInstance(member, cloudinstances.CloudInstanceStatusUpToDate, nodeMap[member]) if err != nil { return nil, fmt.Errorf("error creating cloud instance group member: %v", err) } diff --git a/pkg/resources/spotinst/resources.go b/pkg/resources/spotinst/resources.go index 50c338a949..acd8cbf980 100644 --- a/pkg/resources/spotinst/resources.go +++ b/pkg/resources/spotinst/resources.go @@ -383,7 +383,7 @@ func registerCloudInstances(instanceGroup *cloudinstances.CloudInstanceGroup, no status = cloudinstances.CloudInstanceStatusNeedsUpdate } if _, err := instanceGroup.NewCloudInstance( - instance.Id(), status, nodeMap); err != nil { + instance.Id(), status, nodeMap[instance.Id()]); err != nil { return fmt.Errorf("error creating cloud instance group member: %v", err) } } diff --git a/pkg/validation/validate_cluster.go b/pkg/validation/validate_cluster.go index 1b3dd3e933..80045ba0fc 100644 --- a/pkg/validation/validate_cluster.go +++ b/pkg/validation/validate_cluster.go @@ -307,6 +307,9 @@ func (v *ValidationCluster) validateNodes(cloudGroups map[string]*cloudinstances // bastion nodes don't join the cluster nodeExpectedToJoin = false } + if member.State == cloudinstances.WarmPool { + nodeExpectedToJoin = false + } if nodeExpectedToJoin { v.addError(&ValidationError{ diff --git a/upup/pkg/fi/cloudup/aliup/status.go b/upup/pkg/fi/cloudup/aliup/status.go index ee1be3b9d6..3cb0dcfee6 100644 --- a/upup/pkg/fi/cloudup/aliup/status.go +++ b/upup/pkg/fi/cloudup/aliup/status.go @@ -279,7 +279,7 @@ func buildCloudInstanceGroup(c ALICloud, ig *kops.InstanceGroup, g ess.ScalingGr if newLaunchConfigName != i.ScalingConfigurationId { status = cloudinstances.CloudInstanceStatusNeedsUpdate } - _, err := cg.NewCloudInstance(instanceId, status, nodeMap) + _, err := cg.NewCloudInstance(instanceId, status, nodeMap[instanceId]) if err != nil { return nil, fmt.Errorf("error creating cloud instance group member: %v", err) } diff --git a/upup/pkg/fi/cloudup/awsup/aws_cloud.go b/upup/pkg/fi/cloudup/awsup/aws_cloud.go index 7f89110ddb..6dfbfc6a20 100644 --- a/upup/pkg/fi/cloudup/awsup/aws_cloud.go +++ b/upup/pkg/fi/cloudup/awsup/aws_cloud.go @@ -780,34 +780,24 @@ func awsBuildCloudInstanceGroup(c AWSCloud, cluster *kops.Cluster, ig *kops.Inst } for _, i := range g.Instances { - id := aws.StringValue(i.InstanceId) - if id == "" { - klog.Warningf("ignoring instance with no instance id: %s in autoscaling group: %s", id, cg.HumanName) - continue - } - instanceSeen[id] = true - // @step: check if the instance is terminating - if aws.StringValue(i.LifecycleState) == autoscaling.LifecycleStateTerminating { - klog.Warningf("ignoring instance as it is terminating: %s in autoscaling group: %s", id, cg.HumanName) - continue - } - if instances[id] == nil { - continue - } - currentConfigName := findInstanceLaunchConfiguration(i) - status := cloudinstances.CloudInstanceStatusUpToDate - if newConfigName != currentConfigName { - status = cloudinstances.CloudInstanceStatusNeedsUpdate - } - cm, err := cg.NewCloudInstance(id, status, nodeMap) + err := buildCloudInstance(i, instances, instanceSeen, nodeMap, cg, newConfigName) if err != nil { - return nil, fmt.Errorf("error creating cloud instance group member: %v", err) + return nil, err } - - addCloudInstanceData(cm, instances[id]) - } + result, err := c.Autoscaling().DescribeWarmPool(&autoscaling.DescribeWarmPoolInput{ + AutoScalingGroupName: g.AutoScalingGroupName, + }) + if err != nil { + return nil, err + } + for _, i := range result.Instances { + err := buildCloudInstance(i, instances, instanceSeen, nodeMap, cg, newConfigName) + if err != nil { + return nil, err + } + } var detached []*string for id, instance := range instances { for _, tag := range instance.Tags { @@ -821,7 +811,7 @@ func awsBuildCloudInstanceGroup(c AWSCloud, cluster *kops.Cluster, ig *kops.Inst } for _, id := range detached { if id != nil && *id != "" && !instanceSeen[*id] { - cm, err := cg.NewCloudInstance(*id, cloudinstances.CloudInstanceStatusDetached, nodeMap) + cm, err := cg.NewCloudInstance(*id, cloudinstances.CloudInstanceStatusDetached, nodeMap[*id]) if err != nil { return nil, fmt.Errorf("error creating cloud instance group member: %v", err) } @@ -833,6 +823,38 @@ func awsBuildCloudInstanceGroup(c AWSCloud, cluster *kops.Cluster, ig *kops.Inst return cg, nil } +func buildCloudInstance(i *autoscaling.Instance, instances map[string]*ec2.Instance, instanceSeen map[string]bool, nodeMap map[string]*v1.Node, cg *cloudinstances.CloudInstanceGroup, newConfigName string) error { + id := aws.StringValue(i.InstanceId) + if id == "" { + klog.Warningf("ignoring instance with no instance id: %s in autoscaling group: %s", id, cg.HumanName) + return nil + } + instanceSeen[id] = true + // @step: check if the instance is terminating + if aws.StringValue(i.LifecycleState) == autoscaling.LifecycleStateTerminating { + klog.Warningf("ignoring instance as it is terminating: %s in autoscaling group: %s", id, cg.HumanName) + return nil + } + if instances[id] == nil { + return nil + } + currentConfigName := findInstanceLaunchConfiguration(i) + status := cloudinstances.CloudInstanceStatusUpToDate + if newConfigName != currentConfigName { + status = cloudinstances.CloudInstanceStatusNeedsUpdate + } + cm, err := cg.NewCloudInstance(id, status, nodeMap[id]) + if err != nil { + return fmt.Errorf("error creating cloud instance group member: %v", err) + } + if strings.HasPrefix(*i.LifecycleState, "Warmed") { + cm.State = cloudinstances.WarmPool + } + + addCloudInstanceData(cm, instances[id]) + return nil +} + func addCloudInstanceData(cm *cloudinstances.CloudInstance, instance *ec2.Instance) { cm.MachineType = aws.StringValue(instance.InstanceType) for _, tag := range instance.Tags { diff --git a/upup/pkg/fi/cloudup/azure/status.go b/upup/pkg/fi/cloudup/azure/status.go index 6607ff1d64..bc0054b1ea 100644 --- a/upup/pkg/fi/cloudup/azure/status.go +++ b/upup/pkg/fi/cloudup/azure/status.go @@ -183,7 +183,7 @@ func (c *azureCloudImplementation) buildCloudInstanceGroup( // TODO(kenji): Set the status properly so that kops can // tell whether a VM is up-to-date or not. status := cloudinstances.CloudInstanceStatusUpToDate - _, err := cg.NewCloudInstance(*vm.Name, status, nodeMap) + _, err := cg.NewCloudInstance(*vm.Name, status, nodeMap[*vm.Name]) if err != nil { return nil, fmt.Errorf("error creating cloud instance group member: %s", err) } diff --git a/upup/pkg/fi/cloudup/openstack/server_group.go b/upup/pkg/fi/cloudup/openstack/server_group.go index d33e925a5f..6ca5920ada 100644 --- a/upup/pkg/fi/cloudup/openstack/server_group.go +++ b/upup/pkg/fi/cloudup/openstack/server_group.go @@ -137,7 +137,7 @@ func osBuildCloudInstanceGroup(c OpenstackCloud, cluster *kops.Cluster, ig *kops if generationName != observedName { status = cloudinstances.CloudInstanceStatusNeedsUpdate } - cm, err := cg.NewCloudInstance(instanceId, status, nodeMap) + cm, err := cg.NewCloudInstance(instanceId, status, nodeMap[instanceId]) if err != nil { return nil, fmt.Errorf("error creating cloud instance group member: %v", err) }