525 lines
18 KiB
Go
525 lines
18 KiB
Go
package status
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"sync"
|
|
"time"
|
|
|
|
corev1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/api/equality"
|
|
"k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
|
|
"k8s.io/apimachinery/pkg/labels"
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
"k8s.io/apimachinery/pkg/runtime/schema"
|
|
"k8s.io/apimachinery/pkg/util/clock"
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
"k8s.io/client-go/kubernetes"
|
|
clientset "k8s.io/client-go/kubernetes"
|
|
"k8s.io/client-go/tools/record"
|
|
"k8s.io/component-helpers/apimachinery/lease"
|
|
"k8s.io/klog/v2"
|
|
controllerruntime "sigs.k8s.io/controller-runtime"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
|
|
"sigs.k8s.io/controller-runtime/pkg/predicate"
|
|
|
|
"github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
|
|
"github.com/karmada-io/karmada/pkg/util"
|
|
"github.com/karmada-io/karmada/pkg/util/informermanager"
|
|
)
|
|
|
|
const (
|
|
// ControllerName is the controller name that will be used when reporting events.
|
|
ControllerName = "cluster-status-controller"
|
|
clusterReady = "ClusterReady"
|
|
clusterHealthy = "cluster is reachable and health endpoint responded with ok"
|
|
clusterNotReady = "ClusterNotReady"
|
|
clusterUnhealthy = "cluster is reachable but health endpoint responded without ok"
|
|
clusterNotReachableReason = "ClusterNotReachable"
|
|
clusterNotReachableMsg = "cluster is not reachable"
|
|
// clusterStatusRetryInterval specifies the interval between two retries.
|
|
clusterStatusRetryInterval = 500 * time.Millisecond
|
|
// clusterStatusRetryTimeout specifies the maximum time to wait for cluster status.
|
|
clusterStatusRetryTimeout = 2 * time.Second
|
|
)
|
|
|
|
var (
|
|
nodeGVR = corev1.SchemeGroupVersion.WithResource("nodes")
|
|
podGVR = corev1.SchemeGroupVersion.WithResource("pods")
|
|
)
|
|
|
|
// ClusterStatusController is to sync status of Cluster.
|
|
type ClusterStatusController struct {
|
|
client.Client // used to operate Cluster resources.
|
|
KubeClient clientset.Interface
|
|
EventRecorder record.EventRecorder
|
|
PredicateFunc predicate.Predicate
|
|
InformerManager informermanager.MultiClusterInformerManager
|
|
StopChan <-chan struct{}
|
|
ClusterClientSetFunc func(c *v1alpha1.Cluster, client client.Client) (*util.ClusterClient, error)
|
|
ClusterDynamicClientSetFunc func(c *v1alpha1.Cluster, client client.Client) (*util.DynamicClusterClient, error)
|
|
|
|
// ClusterStatusUpdateFrequency is the frequency that controller computes and report cluster status.
|
|
ClusterStatusUpdateFrequency metav1.Duration
|
|
// ClusterLeaseDuration is a duration that candidates for a lease need to wait to force acquire it.
|
|
// This is measure against time of last observed lease RenewTime.
|
|
ClusterLeaseDuration metav1.Duration
|
|
// ClusterLeaseRenewIntervalFraction is a fraction coordinated with ClusterLeaseDuration that
|
|
// how long the current holder of a lease has last updated the lease.
|
|
ClusterLeaseRenewIntervalFraction float64
|
|
// ClusterLeaseControllers store clusters and their corresponding lease controllers.
|
|
ClusterLeaseControllers sync.Map
|
|
}
|
|
|
|
// Reconcile syncs status of the given member cluster.
|
|
// The Controller will requeue the Request to be processed again if an error is non-nil or
|
|
// Result.Requeue is true, otherwise upon completion it will requeue the reconcile key after the duration.
|
|
func (c *ClusterStatusController) Reconcile(ctx context.Context, req controllerruntime.Request) (controllerruntime.Result, error) {
|
|
klog.V(4).Infof("Syncing cluster status: %s", req.NamespacedName.String())
|
|
|
|
cluster := &v1alpha1.Cluster{}
|
|
if err := c.Client.Get(context.TODO(), req.NamespacedName, cluster); err != nil {
|
|
// The resource may no longer exist, in which case we stop the informer.
|
|
if errors.IsNotFound(err) {
|
|
// TODO(Garrybest): stop the informer and delete the cluster manager
|
|
return controllerruntime.Result{}, nil
|
|
}
|
|
|
|
return controllerruntime.Result{Requeue: true}, err
|
|
}
|
|
|
|
if !cluster.DeletionTimestamp.IsZero() {
|
|
return controllerruntime.Result{}, nil
|
|
}
|
|
|
|
// start syncing status only when the finalizer is present on the given Cluster to
|
|
// avoid conflict with cluster controller.
|
|
if !controllerutil.ContainsFinalizer(cluster, util.ClusterControllerFinalizer) {
|
|
klog.V(2).Infof("waiting finalizer present for member cluster: %s", cluster.Name)
|
|
return controllerruntime.Result{Requeue: true}, nil
|
|
}
|
|
|
|
return c.syncClusterStatus(cluster)
|
|
}
|
|
|
|
// SetupWithManager creates a controller and register to controller manager.
|
|
func (c *ClusterStatusController) SetupWithManager(mgr controllerruntime.Manager) error {
|
|
return controllerruntime.NewControllerManagedBy(mgr).For(&v1alpha1.Cluster{}).WithEventFilter(c.PredicateFunc).Complete(c)
|
|
}
|
|
|
|
func (c *ClusterStatusController) syncClusterStatus(cluster *v1alpha1.Cluster) (controllerruntime.Result, error) {
|
|
// create a ClusterClient for the given member cluster
|
|
clusterClient, err := c.ClusterClientSetFunc(cluster, c.Client)
|
|
if err != nil {
|
|
klog.Errorf("Failed to create a ClusterClient for the given member cluster: %v, err is : %v", cluster.Name, err)
|
|
return controllerruntime.Result{Requeue: true}, err
|
|
}
|
|
|
|
// get or create informer for pods and nodes in member cluster
|
|
clusterInformerManager, err := c.buildInformerForCluster(cluster)
|
|
if err != nil {
|
|
klog.Errorf("Failed to get or create informer for Cluster %s. Error: %v.", cluster.GetName(), err)
|
|
return controllerruntime.Result{Requeue: true}, err
|
|
}
|
|
|
|
// init the lease controller for every cluster
|
|
c.initLeaseController(cluster)
|
|
|
|
var currentClusterStatus = v1alpha1.ClusterStatus{}
|
|
|
|
// get the health status of member cluster
|
|
online, healthy := getClusterHealthStatus(clusterClient)
|
|
|
|
// in case of cluster offline, retry a few times to avoid network unstable problems.
|
|
// Note: retry timeout should not be too long, otherwise will block other cluster reconcile.
|
|
if !online {
|
|
err := wait.Poll(clusterStatusRetryInterval, clusterStatusRetryTimeout, func() (done bool, err error) {
|
|
online, healthy = getClusterHealthStatus(clusterClient)
|
|
if !online {
|
|
return false, nil
|
|
}
|
|
klog.V(2).Infof("Cluster(%s) back to online after retry.", cluster.Name)
|
|
return true, nil
|
|
})
|
|
// error indicates that retry timeout, update cluster status immediately and return.
|
|
if err != nil {
|
|
currentClusterStatus.Conditions = generateReadyCondition(false, false)
|
|
setTransitionTime(&cluster.Status, ¤tClusterStatus)
|
|
return c.updateStatusIfNeeded(cluster, currentClusterStatus)
|
|
}
|
|
}
|
|
|
|
clusterVersion, err := getKubernetesVersion(clusterClient)
|
|
if err != nil {
|
|
klog.Errorf("Failed to get server version of the member cluster: %v, err is : %v", cluster.Name, err)
|
|
return controllerruntime.Result{Requeue: true}, err
|
|
}
|
|
|
|
// get the list of APIs installed in the member cluster
|
|
apiEnables, err := getAPIEnablements(clusterClient)
|
|
if err != nil {
|
|
klog.Errorf("Failed to get APIs installed in the member cluster: %v, err is : %v", cluster.Name, err)
|
|
return controllerruntime.Result{Requeue: true}, err
|
|
}
|
|
|
|
nodes, err := listNodes(clusterInformerManager)
|
|
if err != nil {
|
|
klog.Errorf("Failed to list nodes of cluster(%s), err: %v", cluster.Name, err)
|
|
return controllerruntime.Result{Requeue: true}, err
|
|
}
|
|
|
|
pods, err := listPods(clusterInformerManager)
|
|
if err != nil {
|
|
klog.Errorf("Failed to list pods of cluster(%s), err: %v", cluster.Name, err)
|
|
return controllerruntime.Result{Requeue: true}, err
|
|
}
|
|
|
|
currentClusterStatus.Conditions = generateReadyCondition(online, healthy)
|
|
setTransitionTime(&cluster.Status, ¤tClusterStatus)
|
|
currentClusterStatus.KubernetesVersion = clusterVersion
|
|
currentClusterStatus.APIEnablements = apiEnables
|
|
currentClusterStatus.NodeSummary = getNodeSummary(nodes)
|
|
currentClusterStatus.ResourceSummary = getResourceSummary(nodes, pods)
|
|
|
|
return c.updateStatusIfNeeded(cluster, currentClusterStatus)
|
|
}
|
|
|
|
// updateStatusIfNeeded calls updateStatus only if the status of the member cluster is not the same as the old status
|
|
func (c *ClusterStatusController) updateStatusIfNeeded(cluster *v1alpha1.Cluster, currentClusterStatus v1alpha1.ClusterStatus) (controllerruntime.Result, error) {
|
|
if !equality.Semantic.DeepEqual(cluster.Status, currentClusterStatus) {
|
|
klog.V(4).Infof("Start to update cluster status: %s", cluster.Name)
|
|
cluster.Status = currentClusterStatus
|
|
err := c.Client.Status().Update(context.TODO(), cluster)
|
|
if err != nil {
|
|
klog.Errorf("Failed to update health status of the member cluster: %v, err is : %v", cluster.Name, err)
|
|
return controllerruntime.Result{Requeue: true}, err
|
|
}
|
|
}
|
|
|
|
return controllerruntime.Result{RequeueAfter: c.ClusterStatusUpdateFrequency.Duration}, nil
|
|
}
|
|
|
|
// buildInformerForCluster builds informer manager for cluster if it doesn't exist, then constructs informers for node
|
|
// and pod and start it. If the informer manager exist, return it.
|
|
func (c *ClusterStatusController) buildInformerForCluster(cluster *v1alpha1.Cluster) (informermanager.SingleClusterInformerManager, error) {
|
|
singleClusterInformerManager := c.InformerManager.GetSingleClusterManager(cluster.Name)
|
|
if singleClusterInformerManager != nil {
|
|
return singleClusterInformerManager, nil
|
|
}
|
|
|
|
clusterClient, err := c.ClusterDynamicClientSetFunc(cluster, c.Client)
|
|
if err != nil {
|
|
klog.Errorf("Failed to build dynamic cluster client for cluster %s.", cluster.Name)
|
|
return nil, err
|
|
}
|
|
singleClusterInformerManager = c.InformerManager.ForCluster(clusterClient.ClusterName, clusterClient.DynamicClientSet, 0)
|
|
|
|
gvrs := []schema.GroupVersionResource{
|
|
nodeGVR,
|
|
podGVR,
|
|
}
|
|
|
|
// create the informer for pods and nodes
|
|
for _, gvr := range gvrs {
|
|
singleClusterInformerManager.Lister(gvr)
|
|
}
|
|
|
|
c.InformerManager.Start(cluster.Name, c.StopChan)
|
|
synced := c.InformerManager.WaitForCacheSync(cluster.Name, c.StopChan)
|
|
if synced == nil {
|
|
klog.Errorf("The informer factory for cluster(%s) does not exist.", cluster.Name)
|
|
return nil, fmt.Errorf("informer factory for cluster(%s) does not exist", cluster.Name)
|
|
}
|
|
for _, gvr := range gvrs {
|
|
if !synced[gvr] {
|
|
klog.Errorf("Informer for %s hasn't synced.", gvr)
|
|
return nil, fmt.Errorf("informer for %s hasn't synced", gvr)
|
|
}
|
|
}
|
|
return singleClusterInformerManager, nil
|
|
}
|
|
|
|
func (c *ClusterStatusController) initLeaseController(cluster *v1alpha1.Cluster) {
|
|
// If lease controller has been registered, we skip this function.
|
|
if _, exists := c.ClusterLeaseControllers.Load(cluster.Name); exists {
|
|
return
|
|
}
|
|
|
|
// renewInterval is how often the lease renew time is updated.
|
|
renewInterval := time.Duration(float64(c.ClusterLeaseDuration.Nanoseconds()) * c.ClusterLeaseRenewIntervalFraction)
|
|
|
|
nodeLeaseController := lease.NewController(
|
|
clock.RealClock{},
|
|
c.KubeClient,
|
|
cluster.Name,
|
|
int32(c.ClusterLeaseDuration.Seconds()),
|
|
nil,
|
|
renewInterval,
|
|
util.NamespaceClusterLease,
|
|
util.SetLeaseOwnerFunc(c.Client, cluster.Name))
|
|
|
|
c.ClusterLeaseControllers.Store(cluster.Name, nodeLeaseController)
|
|
|
|
// start syncing lease
|
|
// todo(garryfang): stop the lease controller when cluster does not exist according to #384
|
|
go func() {
|
|
nodeLeaseController.Run(c.StopChan)
|
|
<-c.StopChan
|
|
c.ClusterLeaseControllers.Delete(cluster.Name)
|
|
}()
|
|
}
|
|
|
|
func getClusterHealthStatus(clusterClient *util.ClusterClient) (online, healthy bool) {
|
|
healthStatus, err := healthEndpointCheck(clusterClient.KubeClient, "/readyz")
|
|
if err != nil && healthStatus == http.StatusNotFound {
|
|
// do health check with healthz endpoint if the readyz endpoint is not installed in member cluster
|
|
healthStatus, err = healthEndpointCheck(clusterClient.KubeClient, "/healthz")
|
|
}
|
|
|
|
if err != nil {
|
|
klog.Errorf("Failed to do cluster health check for cluster %v, err is : %v ", clusterClient.ClusterName, err)
|
|
return false, false
|
|
}
|
|
|
|
if healthStatus != http.StatusOK {
|
|
klog.Infof("Member cluster %v isn't healthy", clusterClient.ClusterName)
|
|
return true, false
|
|
}
|
|
|
|
return true, true
|
|
}
|
|
|
|
func healthEndpointCheck(client *kubernetes.Clientset, path string) (int, error) {
|
|
var healthStatus int
|
|
resp := client.DiscoveryClient.RESTClient().Get().AbsPath(path).Do(context.TODO()).StatusCode(&healthStatus)
|
|
return healthStatus, resp.Error()
|
|
}
|
|
|
|
func generateReadyCondition(online, healthy bool) []metav1.Condition {
|
|
var conditions []metav1.Condition
|
|
currentTime := metav1.Now()
|
|
|
|
newClusterOfflineCondition := metav1.Condition{
|
|
Type: v1alpha1.ClusterConditionReady,
|
|
Status: metav1.ConditionFalse,
|
|
Reason: clusterNotReachableReason,
|
|
Message: clusterNotReachableMsg,
|
|
LastTransitionTime: currentTime,
|
|
}
|
|
|
|
newClusterReadyCondition := metav1.Condition{
|
|
Type: v1alpha1.ClusterConditionReady,
|
|
Status: metav1.ConditionTrue,
|
|
Reason: clusterReady,
|
|
Message: clusterHealthy,
|
|
LastTransitionTime: currentTime,
|
|
}
|
|
|
|
newClusterNotReadyCondition := metav1.Condition{
|
|
Type: v1alpha1.ClusterConditionReady,
|
|
Status: metav1.ConditionFalse,
|
|
Reason: clusterNotReady,
|
|
Message: clusterUnhealthy,
|
|
LastTransitionTime: currentTime,
|
|
}
|
|
|
|
if !online {
|
|
conditions = append(conditions, newClusterOfflineCondition)
|
|
} else {
|
|
if !healthy {
|
|
conditions = append(conditions, newClusterNotReadyCondition)
|
|
} else {
|
|
conditions = append(conditions, newClusterReadyCondition)
|
|
}
|
|
}
|
|
|
|
return conditions
|
|
}
|
|
|
|
func setTransitionTime(oldClusterStatus, newClusterStatus *v1alpha1.ClusterStatus) {
|
|
// preserve the last transition time if the status of member cluster not changed
|
|
if util.IsClusterReady(oldClusterStatus) == util.IsClusterReady(newClusterStatus) {
|
|
if len(oldClusterStatus.Conditions) != 0 {
|
|
for i := 0; i < len(newClusterStatus.Conditions); i++ {
|
|
newClusterStatus.Conditions[i].LastTransitionTime = oldClusterStatus.Conditions[0].LastTransitionTime
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func getKubernetesVersion(clusterClient *util.ClusterClient) (string, error) {
|
|
clusterVersion, err := clusterClient.KubeClient.Discovery().ServerVersion()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return clusterVersion.GitVersion, nil
|
|
}
|
|
|
|
func getAPIEnablements(clusterClient *util.ClusterClient) ([]v1alpha1.APIEnablement, error) {
|
|
_, apiResourceList, err := clusterClient.KubeClient.Discovery().ServerGroupsAndResources()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var apiEnablements []v1alpha1.APIEnablement
|
|
|
|
for _, list := range apiResourceList {
|
|
var apiResources []v1alpha1.APIResource
|
|
for _, resource := range list.APIResources {
|
|
apiResource := v1alpha1.APIResource{
|
|
Name: resource.Name,
|
|
Kind: resource.Kind,
|
|
}
|
|
|
|
apiResources = append(apiResources, apiResource)
|
|
}
|
|
apiEnablements = append(apiEnablements, v1alpha1.APIEnablement{GroupVersion: list.GroupVersion, Resources: apiResources})
|
|
}
|
|
|
|
return apiEnablements, nil
|
|
}
|
|
|
|
// listPods returns the Pod list from the informerManager cache.
|
|
func listPods(informerManager informermanager.SingleClusterInformerManager) ([]*corev1.Pod, error) {
|
|
podLister := informerManager.Lister(podGVR)
|
|
|
|
podList, err := podLister.List(labels.Everything())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
pods, err := convertObjectsToPods(podList)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return pods, nil
|
|
}
|
|
|
|
// listNodes returns the Node list from the informerManager cache.
|
|
func listNodes(informerManager informermanager.SingleClusterInformerManager) ([]*corev1.Node, error) {
|
|
nodeLister := informerManager.Lister(nodeGVR)
|
|
|
|
nodeList, err := nodeLister.List(labels.Everything())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
nodes, err := convertObjectsToNodes(nodeList)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return nodes, nil
|
|
}
|
|
|
|
func getNodeSummary(nodes []*corev1.Node) *v1alpha1.NodeSummary {
|
|
totalNum := len(nodes)
|
|
readyNum := 0
|
|
|
|
for _, node := range nodes {
|
|
if getReadyStatusForNode(node.Status) {
|
|
readyNum++
|
|
}
|
|
}
|
|
|
|
var nodeSummary = &v1alpha1.NodeSummary{}
|
|
nodeSummary.TotalNum = int32(totalNum)
|
|
nodeSummary.ReadyNum = int32(readyNum)
|
|
|
|
return nodeSummary
|
|
}
|
|
|
|
func getResourceSummary(nodes []*corev1.Node, pods []*corev1.Pod) *v1alpha1.ResourceSummary {
|
|
allocatable := getClusterAllocatable(nodes)
|
|
allocating := getAllocatingResource(pods)
|
|
allocated := getAllocatedResource(pods)
|
|
|
|
var resourceSummary = &v1alpha1.ResourceSummary{}
|
|
resourceSummary.Allocatable = allocatable
|
|
resourceSummary.Allocating = allocating
|
|
resourceSummary.Allocated = allocated
|
|
|
|
return resourceSummary
|
|
}
|
|
|
|
func convertObjectsToNodes(nodeList []runtime.Object) ([]*corev1.Node, error) {
|
|
nodes := make([]*corev1.Node, 0, len(nodeList))
|
|
for _, obj := range nodeList {
|
|
unstructObj := obj.(*unstructured.Unstructured)
|
|
node := &corev1.Node{}
|
|
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstructObj.UnstructuredContent(), node); err != nil {
|
|
return nil, fmt.Errorf("failed to convert unstructured to typed object: %v", err)
|
|
}
|
|
nodes = append(nodes, node)
|
|
}
|
|
return nodes, nil
|
|
}
|
|
|
|
func convertObjectsToPods(podList []runtime.Object) ([]*corev1.Pod, error) {
|
|
pods := make([]*corev1.Pod, 0, len(podList))
|
|
for _, obj := range podList {
|
|
unstructObj := obj.(*unstructured.Unstructured)
|
|
pod := &corev1.Pod{}
|
|
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstructObj.UnstructuredContent(), pod); err != nil {
|
|
return nil, fmt.Errorf("failed to convert unstructured to typed object: %v", err)
|
|
}
|
|
pods = append(pods, pod)
|
|
}
|
|
return pods, nil
|
|
}
|
|
|
|
func getReadyStatusForNode(nodeStatus corev1.NodeStatus) bool {
|
|
for _, condition := range nodeStatus.Conditions {
|
|
if condition.Type == corev1.NodeReady {
|
|
if condition.Status == corev1.ConditionTrue {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func getClusterAllocatable(nodeList []*corev1.Node) (allocatable corev1.ResourceList) {
|
|
allocatable = make(corev1.ResourceList)
|
|
for _, node := range nodeList {
|
|
for key, val := range node.Status.Allocatable {
|
|
tmpCap, ok := allocatable[key]
|
|
if ok {
|
|
tmpCap.Add(val)
|
|
} else {
|
|
tmpCap = val
|
|
}
|
|
allocatable[key] = tmpCap
|
|
}
|
|
}
|
|
|
|
return allocatable
|
|
}
|
|
|
|
func getAllocatingResource(podList []*corev1.Pod) corev1.ResourceList {
|
|
allocating := util.EmptyResource()
|
|
for _, pod := range podList {
|
|
if len(pod.Spec.NodeName) == 0 {
|
|
allocating.AddPodRequest(pod)
|
|
}
|
|
}
|
|
|
|
return allocating.ResourceList()
|
|
}
|
|
|
|
func getAllocatedResource(podList []*corev1.Pod) corev1.ResourceList {
|
|
allocated := util.EmptyResource()
|
|
for _, pod := range podList {
|
|
// When the phase of a pod is Succeeded or Failed, kube-scheduler would not consider its resource occupation.
|
|
if len(pod.Spec.NodeName) != 0 && pod.Status.Phase != corev1.PodSucceeded && pod.Status.Phase != corev1.PodFailed {
|
|
allocated.AddPodRequest(pod)
|
|
}
|
|
}
|
|
|
|
return allocated.ResourceList()
|
|
}
|