add instance connection draining for NLBs

This commit is contained in:
Bronson Mirafuentes 2021-12-20 10:08:05 -08:00
parent cc26c87b89
commit fa956e3909
2 changed files with 76 additions and 10 deletions

View File

@ -56,6 +56,7 @@ go_library(
"//vendor/github.com/aws/aws-sdk-go/service/sqs:go_default_library",
"//vendor/github.com/aws/aws-sdk-go/service/sqs/sqsiface:go_default_library",
"//vendor/github.com/aws/aws-sdk-go/service/sts:go_default_library",
"//vendor/golang.org/x/sync/errgroup:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",

View File

@ -27,6 +27,7 @@ import (
"github.com/aws/aws-sdk-go/service/eventbridge/eventbridgeiface"
"github.com/aws/aws-sdk-go/service/sqs"
"github.com/aws/aws-sdk-go/service/sqs/sqsiface"
"golang.org/x/sync/errgroup"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/arn"
@ -547,7 +548,7 @@ func deleteInstance(c AWSCloud, i *cloudinstances.CloudInstance) error {
}
if i.CloudInstanceGroup.InstanceGroup.Spec.Manager != kops.InstanceManagerKarpenter {
err := deregisterInstanceFromClassicLoadBalancer(c, i)
err := deregisterInstance(c, i)
if err != nil {
return fmt.Errorf("failed to deregister instance from loadBalancer before terminating: %v", err)
}
@ -570,9 +571,8 @@ func deleteInstance(c AWSCloud, i *cloudinstances.CloudInstance) error {
return nil
}
// deregisterInstanceFromClassicLoadBalancer ensures that connectionDraining completes for the associated loadBalancer to ensure no dropped connections.
// if instance is associated with an NLB, this method no-ops.
func deregisterInstanceFromClassicLoadBalancer(c AWSCloud, i *cloudinstances.CloudInstance) error {
// deregisterInstance ensures that the instance is fully drained/removed from all associated loadBalancers and targetGroups before termination.
func deregisterInstance(c AWSCloud, i *cloudinstances.CloudInstance) error {
asg := i.CloudInstanceGroup.Raw.(*autoscaling.Group)
asgDetails, err := c.Autoscaling().DescribeAutoScalingGroups(&autoscaling.DescribeAutoScalingGroupsInput{
@ -587,17 +587,41 @@ func deregisterInstanceFromClassicLoadBalancer(c AWSCloud, i *cloudinstances.Clo
}
// there will always be only one ASG in the DescribeAutoScalingGroups response.
loadBalancerNames := asgDetails.AutoScalingGroups[0].LoadBalancerNames
loadBalancerNames := aws.StringValueSlice(asgDetails.AutoScalingGroups[0].LoadBalancerNames)
targetGroupArns := aws.StringValueSlice(asgDetails.AutoScalingGroups[0].TargetGroupARNs)
klog.Infof("Deregistering instance from classic loadBalancers: %v", aws.StringValueSlice(loadBalancerNames))
eg, _ := errgroup.WithContext(aws.BackgroundContext())
if len(loadBalancerNames) != 0 {
eg.Go(func() error {
return deregisterInstanceFromClassicLoadBalancer(c, loadBalancerNames, i.ID)
})
}
if len(targetGroupArns) != 0 {
eg.Go(func() error {
return deregisterInstanceFromTargetGroups(c, targetGroupArns, i.ID)
})
}
if err := eg.Wait(); err != nil {
return fmt.Errorf("failed to deregister instance from load balancers: %v", err)
}
return nil
}
// deregisterInstanceFromClassicLoadBalancer ensures that connectionDraining completes for the associated classic loadBalancer to ensure no dropped connections.
func deregisterInstanceFromClassicLoadBalancer(c AWSCloud, loadBalancerNames []string, instanceId string) error {
klog.Infof("Deregistering instance from classic loadBalancers: %v", loadBalancerNames)
for {
instanceDraining := false
for _, loadBalancerName := range loadBalancerNames {
response, err := c.ELB().DescribeInstanceHealth(&elb.DescribeInstanceHealthInput{
LoadBalancerName: loadBalancerName,
LoadBalancerName: aws.String(loadBalancerName),
Instances: []*elb.Instance{{
InstanceId: aws.String(i.ID),
InstanceId: aws.String(instanceId),
}},
})
if err != nil {
@ -612,9 +636,9 @@ func deregisterInstanceFromClassicLoadBalancer(c AWSCloud, i *cloudinstances.Clo
// there will be only one instance in the DescribeInstanceHealth response.
if aws.StringValue(response.InstanceStates[0].State) == instanceInServiceState {
c.ELB().DeregisterInstancesFromLoadBalancer(&elb.DeregisterInstancesFromLoadBalancerInput{
LoadBalancerName: loadBalancerName,
LoadBalancerName: aws.String(loadBalancerName),
Instances: []*elb.Instance{{
InstanceId: aws.String(i.ID),
InstanceId: aws.String(instanceId),
}},
})
instanceDraining = true
@ -627,7 +651,48 @@ func deregisterInstanceFromClassicLoadBalancer(c AWSCloud, i *cloudinstances.Clo
time.Sleep(5 * time.Second)
}
return nil
}
// deregisterInstanceFromTargetGroups ensures that instances are fully unused in the corresponding targetGroups before instance termination.
// this ensures that connections are fully drained from the instance before terminating.
func deregisterInstanceFromTargetGroups(c AWSCloud, targetGroupArns []string, instanceId string) error {
klog.Infof("Deregistering instance from targetGroups: %v", targetGroupArns)
for {
instanceDraining := false
for _, targetGroupArn := range targetGroupArns {
response, err := c.ELBV2().DescribeTargetHealth(&elbv2.DescribeTargetHealthInput{
TargetGroupArn: aws.String(targetGroupArn),
Targets: []*elbv2.TargetDescription{{
Id: aws.String(instanceId),
}},
})
if err != nil {
return fmt.Errorf("error describing target health: %v", err)
}
// there will be only one target in the DescribeTargetHealth response.
// DescribeTargetHealth response will contain a target even if the targetId doesn't exist.
// all other states besides TargetHealthStateUnused means that the instance may still be serving traffic.
if aws.StringValue(response.TargetHealthDescriptions[0].TargetHealth.State) != elbv2.TargetHealthStateEnumUnused {
c.ELBV2().DeregisterTargets(&elbv2.DeregisterTargetsInput{
TargetGroupArn: aws.String(targetGroupArn),
Targets: []*elbv2.TargetDescription{{
Id: aws.String(instanceId),
}},
})
instanceDraining = true
}
}
if !instanceDraining {
break
}
time.Sleep(5 * time.Second)
}
return nil
}