rolling-update: don't deregister our only apiserver

If we do, we can't drain the node afterwards.  We also are going to
have dropped connections in this case anyway.
This commit is contained in:
justinsb 2022-01-30 13:18:09 -05:00
parent 280a4a94ad
commit 4b2f773748
3 changed files with 50 additions and 3 deletions

View File

@ -140,6 +140,9 @@ type RollingUpdateOptions struct {
// InstanceGroupRoles is the list of roles we should rolling-update
// if not specified, all instance groups will be updated
InstanceGroupRoles []string
// TODO: Move more/all above options to RollingUpdateOptions
instancegroups.RollingUpdateOptions
}
func (o *RollingUpdateOptions) InitDefaults() {
@ -159,6 +162,8 @@ func (o *RollingUpdateOptions) InitDefaults() {
o.ValidateCount = 2
o.DrainTimeout = 15 * time.Minute
o.RollingUpdateOptions.InitDefaults()
}
func NewCmdRollingUpdateCluster(f *util.Factory, out io.Writer) *cobra.Command {
@ -262,9 +267,21 @@ func RunRollingUpdateCluster(ctx context.Context, f *util.Factory, out io.Writer
return err
}
countByRole := make(map[kopsapi.InstanceGroupRole]int32)
var instanceGroups []*kopsapi.InstanceGroup
for i := range list.Items {
instanceGroups = append(instanceGroups, &list.Items[i])
instanceGroup := &list.Items[i]
instanceGroups = append(instanceGroups, instanceGroup)
minSize := int32(1)
if instanceGroup.Spec.MinSize != nil {
minSize = *instanceGroup.Spec.MinSize
}
countByRole[instanceGroup.Spec.Role] = countByRole[instanceGroup.Spec.Role] + minSize
}
if countByRole[kopsapi.InstanceGroupRoleAPIServer]+countByRole[kopsapi.InstanceGroupRoleMaster] <= 1 {
fmt.Fprintf(out, "Detected single-control-plane cluster; won't detach before draining\n")
options.DeregisterControlPlaneNodes = false
}
warnUnmatched := true
@ -346,6 +363,9 @@ func RunRollingUpdateCluster(ctx context.Context, f *util.Factory, out io.Writer
// TODO should we expose this to the UI?
ValidateTickDuration: 30 * time.Second,
ValidateSuccessDuration: 10 * time.Second,
// TODO: Move more of the passthrough options here, instead of duplicating them.
Options: options.RollingUpdateOptions,
}
err = d.AdjustNeedUpdate(groups)

View File

@ -647,8 +647,22 @@ func (c *RollingUpdateCluster) drainNode(u *cloudinstances.CloudInstance) error
return fmt.Errorf("error excluding node from load balancer: %v", err)
}
if err := c.Cloud.DeregisterInstance(u); err != nil {
return fmt.Errorf("error deregistering instance %q, node %q: %v", u.ID, u.Node.Name, err)
shouldDeregister := true
if !c.Options.DeregisterControlPlaneNodes {
if u.CloudInstanceGroup != nil && u.CloudInstanceGroup.InstanceGroup != nil {
role := u.CloudInstanceGroup.InstanceGroup.Spec.Role
switch role {
case api.InstanceGroupRoleAPIServer, api.InstanceGroupRoleMaster:
klog.Infof("skipping deregistration of instance %q, as part of instancegroup with role %q", u.ID, role)
shouldDeregister = false
}
}
}
if shouldDeregister {
if err := c.Cloud.DeregisterInstance(u); err != nil {
return fmt.Errorf("error deregistering instance %q, node %q: %w", u.ID, u.Node.Name, err)
}
}
if err := drain.RunNodeDrain(helper, u.Node.Name); err != nil {

View File

@ -80,6 +80,19 @@ type RollingUpdateCluster struct {
// DrainTimeout is the maximum amount of time to wait while draining a node.
DrainTimeout time.Duration
// Options holds user-specified options
Options RollingUpdateOptions
}
type RollingUpdateOptions struct {
// DeregisterControlPlaneNodes controls if we deregister control plane instances from load balacners etc before draining/terminating.
// When a cluster only has a single apiserver, we don't want to do this, as we can't drain after deregistering it.
DeregisterControlPlaneNodes bool
}
func (o *RollingUpdateOptions) InitDefaults() {
o.DeregisterControlPlaneNodes = true
}
// AdjustNeedUpdate adjusts the set of instances that need updating, using factors outside those known by the cloud implementation