cluster-api-provider-rke2/pkg/rke2/workload_cluster_etcd.go

228 lines
6.8 KiB
Go

/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package rke2
import (
"context"
"fmt"
"github.com/pkg/errors"
"sigs.k8s.io/controller-runtime/pkg/log"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
etcdutil "github.com/rancher/cluster-api-provider-rke2/pkg/etcd/util"
)
const (
// EtcdNodeRemoveAnnotation is a Node annotation used to notify RKE2 of removing the etcd member for a node.
EtcdNodeRemoveAnnotation = "etcd.rke2.cattle.io/remove"
// EtcdNodeRemovedNodeNameAnnotation is a Node annotation used by RKE2 to notify the etcd member has been successfully removed.
EtcdNodeRemovedNodeNameAnnotation = "etcd.rke2.cattle.io/removed-node-name"
)
// RemoveEtcdMemberForMachine removes the etcd member from the target cluster's etcd cluster.
// Removing the last remaining member of the cluster is not supported.
func (w *Workload) RemoveEtcdMemberForMachine(ctx context.Context, machine *clusterv1.Machine) error {
if machine == nil || machine.Status.NodeRef == nil {
// Nothing to do, no node for Machine
return nil
}
return w.removeMemberForNode(ctx, machine.Status.NodeRef.Name)
}
func (w *Workload) removeMemberForNode(ctx context.Context, name string) error {
controlPlaneNodes, err := w.getControlPlaneNodes(ctx)
if err != nil {
return fmt.Errorf("getting control plane nodes: %w", err)
}
for _, node := range controlPlaneNodes.Items {
if node.Name == name {
if node.Annotations == nil {
node.Annotations = map[string]string{}
}
node.Annotations[EtcdNodeRemoveAnnotation] = "true"
if helper, ok := w.nodePatchHelpers[node.Name]; ok {
if err := helper.Patch(ctx, &node); err != nil {
return fmt.Errorf("patching node %s with etcd remove annotation: %w", node.Name, err)
}
} else {
return fmt.Errorf("could not find patch helper for node %s", node.Name)
}
break
}
}
log.FromContext(ctx).Info("Removed member: " + name)
return nil
}
// IsEtcdMemberSafelyRemovedForMachine checks whether the node contains the `etcd.rke2.cattle.io/removed-node-name` annotation.
func (w *Workload) IsEtcdMemberSafelyRemovedForMachine(ctx context.Context, machine *clusterv1.Machine) (bool, error) {
if machine == nil || machine.Status.NodeRef == nil {
// Nothing to do, no node for Machine
return true, nil
}
return w.isMemberRemovedForNode(ctx, machine.Status.NodeRef.Name)
}
func (w *Workload) isMemberRemovedForNode(ctx context.Context, name string) (bool, error) {
controlPlaneNodes, err := w.getControlPlaneNodes(ctx)
if err != nil {
return false, fmt.Errorf("getting control plane nodes: %w", err)
}
for _, node := range controlPlaneNodes.Items {
if node.Name == name {
if node.Annotations == nil {
return false, fmt.Errorf("node is missing the %s annotation", EtcdNodeRemoveAnnotation)
}
removedNodeName, found := node.Annotations[EtcdNodeRemovedNodeNameAnnotation]
if !found {
return false, nil
}
if removedNodeName != "" {
log.FromContext(ctx).Info("Removed member: " + name)
return true, nil
}
return false, nil
}
}
return false, nil
}
// ForwardEtcdLeadership forwards etcd leadership to the first follower.
func (w *Workload) ForwardEtcdLeadership(ctx context.Context, machine *clusterv1.Machine, leaderCandidate *clusterv1.Machine) error {
if machine == nil || machine.Status.NodeRef == nil {
return nil
}
if leaderCandidate == nil {
return errors.New("leader candidate cannot be nil")
}
if leaderCandidate.Status.NodeRef == nil {
return errors.New("leader has no node reference")
}
// Return early for clusters without an etcd certificate secret
if w.etcdClientGenerator == nil {
return nil
}
nodes, err := w.getControlPlaneNodes(ctx)
if err != nil {
return errors.Wrap(err, "failed to list control plane nodes")
}
nodeNames := make([]string, 0, len(nodes.Items))
for _, node := range nodes.Items {
nodeNames = append(nodeNames, node.Name)
}
etcdClient, err := w.etcdClientGenerator.ForLeader(ctx, nodeNames)
if err != nil {
return errors.Wrap(err, "failed to create etcd client")
}
defer etcdClient.Close()
members, err := etcdClient.Members(ctx)
if err != nil {
return errors.Wrap(err, "failed to list etcd members using etcd client")
}
currentMember := etcdutil.MemberForName(members, machine.Status.NodeRef.Name)
if currentMember == nil || currentMember.ID != etcdClient.LeaderID {
// nothing to do, this is not the etcd leader
return nil
}
// Move the leader to the provided candidate.
nextLeader := etcdutil.MemberForName(members, leaderCandidate.Status.NodeRef.Name)
if nextLeader == nil {
return errors.Errorf("failed to get etcd member from node %q", leaderCandidate.Status.NodeRef.Name)
}
log.FromContext(ctx).Info(fmt.Sprintf("Moving leader from %s to %s", currentMember.Name, nextLeader.Name))
if err := etcdClient.MoveLeader(ctx, nextLeader.ID); err != nil {
return errors.Wrapf(err, "failed to move leader")
}
return nil
}
// EtcdMemberStatus contains status information for a single etcd member.
type EtcdMemberStatus struct {
Name string
Responsive bool
}
// EtcdMembers returns the current set of members in an etcd cluster.
//
// NOTE: This methods uses control plane machines/nodes only to get in contact with etcd,
// but then it relies on etcd as ultimate source of truth for the list of members.
// This is intended to allow informed decisions on actions impacting etcd quorum.
func (w *Workload) EtcdMembers(ctx context.Context) ([]string, error) {
// Return early for clusters without an etcd certificate secret
if w.etcdClientGenerator == nil {
return []string{}, nil
}
nodes, err := w.getControlPlaneNodes(ctx)
if err != nil {
return nil, errors.Wrap(err, "failed to list control plane nodes")
}
nodeNames := make([]string, 0, len(nodes.Items))
for _, node := range nodes.Items {
nodeNames = append(nodeNames, node.Name)
}
etcdClient, err := w.etcdClientGenerator.ForLeader(ctx, nodeNames)
if err != nil {
return nil, errors.Wrap(err, "failed to create etcd client")
}
defer etcdClient.Close()
members, err := etcdClient.Members(ctx)
if err != nil {
return nil, errors.Wrap(err, "failed to list etcd members using etcd client")
}
names := []string{}
for _, member := range members {
// Convert etcd member to node name
names = append(names, etcdutil.NodeNameFromMember(member))
}
return names, nil
}