update object watcher metrics and simplify cluster object version recording
Signed-off-by: zach593 <zach_li@outlook.com>
This commit is contained in:
parent
3b6c0e0fa2
commit
d226ce11f3
|
@ -212,7 +212,9 @@ func (c *Controller) cleanupPolicyClaimMetadata(ctx context.Context, work *workv
|
|||
util.RemoveLabels(workload, util.ManagedResourceLabels...)
|
||||
util.RemoveAnnotations(workload, util.ManagedResourceAnnotations...)
|
||||
|
||||
if err := c.ObjectWatcher.Update(ctx, cluster.Name, workload, clusterObj); err != nil {
|
||||
operationResult, err := c.ObjectWatcher.Update(ctx, cluster.Name, workload, clusterObj)
|
||||
metrics.CountUpdateResourceToCluster(err, workload.GetAPIVersion(), workload.GetKind(), cluster.Name, string(operationResult))
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to update metadata in the given member cluster %v, err is %v", cluster.Name, err)
|
||||
return err
|
||||
}
|
||||
|
@ -232,6 +234,7 @@ func (c *Controller) tryDeleteWorkload(ctx context.Context, clusterName string,
|
|||
}
|
||||
|
||||
err = c.ObjectWatcher.Delete(ctx, clusterName, workload)
|
||||
metrics.CountDeleteResourceFromCluster(err, workload.GetAPIVersion(), workload.GetKind(), clusterName)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to delete resource in the given member cluster %v, err is %v", clusterName, err)
|
||||
return err
|
||||
|
@ -312,13 +315,15 @@ func (c *Controller) tryCreateOrUpdateWorkload(ctx context.Context, clusterName
|
|||
return err
|
||||
}
|
||||
err = c.ObjectWatcher.Create(ctx, clusterName, workload)
|
||||
metrics.CountCreateResourceToCluster(err, workload.GetAPIVersion(), workload.GetKind(), clusterName, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
err = c.ObjectWatcher.Update(ctx, clusterName, workload, clusterObj)
|
||||
operationResult, err := c.ObjectWatcher.Update(ctx, clusterName, workload, clusterObj)
|
||||
metrics.CountUpdateResourceToCluster(err, workload.GetAPIVersion(), workload.GetKind(), clusterName, string(operationResult))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -255,8 +255,8 @@ func (c *WorkStatusController) updateResource(ctx context.Context, observedObj *
|
|||
}
|
||||
|
||||
if needUpdate {
|
||||
updateErr := c.ObjectWatcher.Update(ctx, clusterName, desiredObj, observedObj)
|
||||
metrics.CountUpdateResourceToCluster(updateErr, desiredObj.GetAPIVersion(), desiredObj.GetKind(), clusterName)
|
||||
operationResult, updateErr := c.ObjectWatcher.Update(ctx, clusterName, desiredObj, observedObj)
|
||||
metrics.CountUpdateResourceToCluster(updateErr, desiredObj.GetAPIVersion(), desiredObj.GetKind(), clusterName, string(operationResult))
|
||||
if updateErr != nil {
|
||||
klog.Errorf("Updating %s failed: %v", fedKey.String(), updateErr)
|
||||
return updateErr
|
||||
|
@ -299,7 +299,6 @@ func (c *WorkStatusController) handleDeleteEvent(ctx context.Context, key keys.F
|
|||
}
|
||||
|
||||
reCreateErr := c.recreateResourceIfNeeded(ctx, work, key)
|
||||
metrics.CountRecreateResourceToCluster(reCreateErr, key.GroupVersion().String(), key.Kind, key.Cluster)
|
||||
if reCreateErr != nil {
|
||||
c.updateAppliedCondition(ctx, work, metav1.ConditionFalse, "ReCreateFailed", reCreateErr.Error())
|
||||
return reCreateErr
|
||||
|
@ -321,6 +320,7 @@ func (c *WorkStatusController) recreateResourceIfNeeded(ctx context.Context, wor
|
|||
manifest.GetName() == workloadKey.Name {
|
||||
klog.Infof("Recreating resource(%s).", workloadKey.String())
|
||||
err := c.ObjectWatcher.Create(ctx, workloadKey.Cluster, manifest)
|
||||
metrics.CountCreateResourceToCluster(err, workloadKey.GroupVersion().String(), workloadKey.Kind, workloadKey.Cluster, true)
|
||||
if err != nil {
|
||||
c.eventf(manifest, corev1.EventTypeWarning, events.EventReasonSyncWorkloadFailed, "Failed to create or update resource(%s/%s) in member cluster(%s): %v", manifest.GetNamespace(), manifest.GetName(), workloadKey.Cluster, err)
|
||||
return err
|
||||
|
|
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||
package metrics
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
@ -30,8 +31,9 @@ const (
|
|||
policyApplyAttemptsMetricsName = "policy_apply_attempts_total"
|
||||
syncWorkDurationMetricsName = "binding_sync_work_duration_seconds"
|
||||
syncWorkloadDurationMetricsName = "work_sync_workload_duration_seconds"
|
||||
recreateResourceToCluster = "recreate_resource_to_cluster"
|
||||
createResourceToCluster = "create_resource_to_cluster"
|
||||
updateResourceToCluster = "update_resource_to_cluster"
|
||||
deleteResourceFromCluster = "delete_resource_from_cluster"
|
||||
policyPreemptionMetricsName = "policy_preemption_total"
|
||||
cronFederatedHPADurationMetricsName = "cronfederatedhpa_process_duration_seconds"
|
||||
cronFederatedHPARuleDurationMetricsName = "cronfederatedhpa_rule_process_duration_seconds"
|
||||
|
@ -69,14 +71,19 @@ var (
|
|||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 12),
|
||||
}, []string{"result"})
|
||||
|
||||
recreateResourceWhenSyncWorkStatus = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: recreateResourceToCluster,
|
||||
Help: "Number of recreating operation of the resource to a target member cluster. By the result, 'error' means a resource recreated failed. Otherwise 'success'. Cluster means the target member cluster.",
|
||||
}, []string{"result", "apiversion", "kind", "cluster"})
|
||||
createResourceWhenSyncWork = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: createResourceToCluster,
|
||||
Help: "Number of creation operations against a target member cluster. The 'result' label indicates outcome ('success' or 'error'), 'recreate' indicates whether the operation is recreated (true/false). Labels 'apiversion', 'kind', and 'cluster' specify the resource type, API version, and target cluster respectively.",
|
||||
}, []string{"result", "apiversion", "kind", "cluster", "recreate"})
|
||||
|
||||
updateResourceWhenSyncWorkStatus = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
updateResourceWhenSyncWork = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: updateResourceToCluster,
|
||||
Help: "Number of updating operation of the resource to a target member cluster. By the result, 'error' means a resource updated failed. Otherwise 'success'. Cluster means the target member cluster.",
|
||||
Help: "Number of updating operation of the resource to a target member cluster. By the result, 'error' means a resource updated failed. Otherwise 'success'. Cluster means the target member cluster. operationResult means the result of the update operation.",
|
||||
}, []string{"result", "apiversion", "kind", "cluster", "operationResult"})
|
||||
|
||||
deleteResourceWhenSyncWork = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: deleteResourceFromCluster,
|
||||
Help: "Number of deletion operations against a target member cluster. The 'result' label indicates outcome ('success' or 'error'). Labels 'apiversion', 'kind', and 'cluster' specify the resource's API version, type, and source cluster respectively.",
|
||||
}, []string{"result", "apiversion", "kind", "cluster"})
|
||||
|
||||
policyPreemptionCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
|
@ -130,14 +137,19 @@ func ObserveSyncWorkloadLatency(err error, start time.Time) {
|
|||
syncWorkloadDurationHistogram.WithLabelValues(utilmetrics.GetResultByError(err)).Observe(utilmetrics.DurationInSeconds(start))
|
||||
}
|
||||
|
||||
// CountRecreateResourceToCluster records the number of recreating operation of the resource to a target member cluster.
|
||||
func CountRecreateResourceToCluster(err error, apiVersion, kind, cluster string) {
|
||||
recreateResourceWhenSyncWorkStatus.WithLabelValues(utilmetrics.GetResultByError(err), apiVersion, kind, cluster).Inc()
|
||||
// CountCreateResourceToCluster records the number of creation operations of the resource for a target member cluster.
|
||||
func CountCreateResourceToCluster(err error, apiVersion, kind, cluster string, recreate bool) {
|
||||
createResourceWhenSyncWork.WithLabelValues(utilmetrics.GetResultByError(err), apiVersion, kind, cluster, strconv.FormatBool(recreate)).Inc()
|
||||
}
|
||||
|
||||
// CountUpdateResourceToCluster records the number of updating operation of the resource to a target member cluster.
|
||||
func CountUpdateResourceToCluster(err error, apiVersion, kind, cluster string) {
|
||||
updateResourceWhenSyncWorkStatus.WithLabelValues(utilmetrics.GetResultByError(err), apiVersion, kind, cluster).Inc()
|
||||
func CountUpdateResourceToCluster(err error, apiVersion, kind, cluster string, operationResult string) {
|
||||
updateResourceWhenSyncWork.WithLabelValues(utilmetrics.GetResultByError(err), apiVersion, kind, cluster, operationResult).Inc()
|
||||
}
|
||||
|
||||
// CountDeleteResourceFromCluster records the number of deletion operations of the resource from a target member cluster.
|
||||
func CountDeleteResourceFromCluster(err error, apiVersion, kind, cluster string) {
|
||||
deleteResourceWhenSyncWork.WithLabelValues(utilmetrics.GetResultByError(err), apiVersion, kind, cluster).Inc()
|
||||
}
|
||||
|
||||
// CountPolicyPreemption records the numbers of policy preemption.
|
||||
|
@ -173,8 +185,9 @@ func ResourceCollectors() []prometheus.Collector {
|
|||
policyApplyAttempts,
|
||||
syncWorkDurationHistogram,
|
||||
syncWorkloadDurationHistogram,
|
||||
recreateResourceWhenSyncWorkStatus,
|
||||
updateResourceWhenSyncWorkStatus,
|
||||
createResourceWhenSyncWork,
|
||||
updateResourceWhenSyncWork,
|
||||
deleteResourceWhenSyncWork,
|
||||
policyPreemptionCounter,
|
||||
cronFederatedHPADurationHistogram,
|
||||
cronFederatedHPARuleDurationHistogram,
|
||||
|
|
|
@ -39,10 +39,22 @@ import (
|
|||
"github.com/karmada-io/karmada/pkg/util/restmapper"
|
||||
)
|
||||
|
||||
// OperationResult is the action result of an Update call.
|
||||
type OperationResult string
|
||||
|
||||
const (
|
||||
// OperationResultNone means that the update operation was not performed and the resource has not been changed.
|
||||
OperationResultNone OperationResult = "none"
|
||||
// OperationResultUnchanged means that the update operation was performed but the resource did not change.
|
||||
OperationResultUnchanged OperationResult = "unchanged"
|
||||
// OperationResultUpdated means that an existing resource is updated.
|
||||
OperationResultUpdated OperationResult = "updated"
|
||||
)
|
||||
|
||||
// ObjectWatcher manages operations for object dispatched to member clusters.
|
||||
type ObjectWatcher interface {
|
||||
Create(ctx context.Context, clusterName string, desireObj *unstructured.Unstructured) error
|
||||
Update(ctx context.Context, clusterName string, desireObj, clusterObj *unstructured.Unstructured) error
|
||||
Update(ctx context.Context, clusterName string, desireObj, clusterObj *unstructured.Unstructured) (operationResult OperationResult, err error)
|
||||
Delete(ctx context.Context, clusterName string, desireObj *unstructured.Unstructured) error
|
||||
NeedsUpdate(clusterName string, desiredObj, clusterObj *unstructured.Unstructured) (bool, error)
|
||||
}
|
||||
|
@ -138,43 +150,48 @@ func (o *objectWatcherImpl) retainClusterFields(desired, observed *unstructured.
|
|||
return desired, nil
|
||||
}
|
||||
|
||||
func (o *objectWatcherImpl) Update(ctx context.Context, clusterName string, desireObj, clusterObj *unstructured.Unstructured) error {
|
||||
func (o *objectWatcherImpl) Update(ctx context.Context, clusterName string, desireObj, clusterObj *unstructured.Unstructured) (OperationResult, error) {
|
||||
updateAllowed := o.allowUpdate(clusterName, desireObj, clusterObj)
|
||||
if !updateAllowed {
|
||||
// The existing resource is not managed by Karmada, and no conflict resolution found, avoid updating the existing resource by default.
|
||||
return fmt.Errorf("resource(kind=%s, %s/%s) already exists in the cluster %v and the %s strategy value is empty, Karmada will not manage this resource",
|
||||
return OperationResultNone, fmt.Errorf("resource(kind=%s, %s/%s) already exists in the cluster %v and the %s strategy value is empty, Karmada will not manage this resource",
|
||||
desireObj.GetKind(), desireObj.GetNamespace(), desireObj.GetName(), clusterName, workv1alpha2.ResourceConflictResolutionAnnotation)
|
||||
}
|
||||
|
||||
dynamicClusterClient, err := o.ClusterClientSetFunc(clusterName, o.KubeClientSet)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to build dynamic cluster client for cluster %s, err: %v.", clusterName, err)
|
||||
return err
|
||||
return OperationResultNone, err
|
||||
}
|
||||
|
||||
gvr, err := restmapper.GetGroupVersionResource(o.RESTMapper, desireObj.GroupVersionKind())
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to update the resource(kind=%s, %s/%s) in the cluster %s as mapping GVK to GVR failed: %v", desireObj.GetKind(), desireObj.GetNamespace(), desireObj.GetName(), clusterName, err)
|
||||
return err
|
||||
return OperationResultNone, err
|
||||
}
|
||||
|
||||
desireObj, err = o.retainClusterFields(desireObj, clusterObj)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to retain fields for resource(kind=%s, %s/%s) in cluster %s: %v", clusterObj.GetKind(), clusterObj.GetNamespace(), clusterObj.GetName(), clusterName, err)
|
||||
return err
|
||||
return OperationResultNone, err
|
||||
}
|
||||
|
||||
resource, err := dynamicClusterClient.DynamicClientSet.Resource(gvr).Namespace(desireObj.GetNamespace()).Update(ctx, desireObj, metav1.UpdateOptions{})
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to update resource(kind=%s, %s/%s) in cluster %s, err: %v.", desireObj.GetKind(), desireObj.GetNamespace(), desireObj.GetName(), clusterName, err)
|
||||
return err
|
||||
return OperationResultNone, err
|
||||
}
|
||||
|
||||
klog.Infof("Updated the resource(kind=%s, %s/%s) on cluster(%s).", desireObj.GetKind(), desireObj.GetNamespace(), desireObj.GetName(), clusterName)
|
||||
|
||||
// record version
|
||||
o.recordVersion(resource, clusterName)
|
||||
return nil
|
||||
|
||||
if clusterObj.GetResourceVersion() == resource.GetResourceVersion() {
|
||||
klog.Infof("Updated the resource(kind=%s, %s/%s) on cluster(%s) but the cluster object was not changed.", desireObj.GetKind(), desireObj.GetNamespace(), desireObj.GetName(), clusterName)
|
||||
return OperationResultUnchanged, nil
|
||||
}
|
||||
|
||||
klog.Infof("Updated the resource(kind=%s, %s/%s) on cluster(%s).", desireObj.GetKind(), desireObj.GetNamespace(), desireObj.GetName(), clusterName)
|
||||
return OperationResultUpdated, nil
|
||||
}
|
||||
|
||||
func (o *objectWatcherImpl) Delete(ctx context.Context, clusterName string, desireObj *unstructured.Unstructured) error {
|
||||
|
@ -242,21 +259,13 @@ func (o *objectWatcherImpl) genObjectKey(obj *unstructured.Unstructured) string
|
|||
func (o *objectWatcherImpl) recordVersion(clusterObj *unstructured.Unstructured, clusterName string) {
|
||||
objVersion := lifted.ObjectVersion(clusterObj)
|
||||
objectKey := o.genObjectKey(clusterObj)
|
||||
if o.isClusterVersionRecordExist(clusterName) {
|
||||
o.updateVersionRecord(clusterName, objectKey, objVersion)
|
||||
} else {
|
||||
o.addVersionRecord(clusterName, objectKey, objVersion)
|
||||
|
||||
o.Lock.Lock()
|
||||
defer o.Lock.Unlock()
|
||||
if o.VersionRecord[clusterName] == nil {
|
||||
o.VersionRecord[clusterName] = make(map[string]string)
|
||||
}
|
||||
}
|
||||
|
||||
// isClusterVersionRecordExist checks if the version record map of given member cluster exist
|
||||
func (o *objectWatcherImpl) isClusterVersionRecordExist(clusterName string) bool {
|
||||
o.Lock.RLock()
|
||||
defer o.Lock.RUnlock()
|
||||
|
||||
_, exist := o.VersionRecord[clusterName]
|
||||
|
||||
return exist
|
||||
o.VersionRecord[clusterName][objectKey] = objVersion
|
||||
}
|
||||
|
||||
// getVersionRecord will return the recorded version of given resource(if exist)
|
||||
|
@ -268,20 +277,6 @@ func (o *objectWatcherImpl) getVersionRecord(clusterName, resourceName string) (
|
|||
return version, exist
|
||||
}
|
||||
|
||||
// addVersionRecord will add new version record of given resource
|
||||
func (o *objectWatcherImpl) addVersionRecord(clusterName, resourceName, version string) {
|
||||
o.Lock.Lock()
|
||||
defer o.Lock.Unlock()
|
||||
o.VersionRecord[clusterName] = map[string]string{resourceName: version}
|
||||
}
|
||||
|
||||
// updateVersionRecord will update the recorded version of given resource
|
||||
func (o *objectWatcherImpl) updateVersionRecord(clusterName, resourceName, version string) {
|
||||
o.Lock.Lock()
|
||||
defer o.Lock.Unlock()
|
||||
o.VersionRecord[clusterName][resourceName] = version
|
||||
}
|
||||
|
||||
// deleteVersionRecord will delete the recorded version of given resource
|
||||
func (o *objectWatcherImpl) deleteVersionRecord(clusterName, resourceName string) {
|
||||
o.Lock.Lock()
|
||||
|
|
Loading…
Reference in New Issue