Compare commits
3 Commits
f887e3cc56
...
dc2befb68d
Author | SHA1 | Date |
---|---|---|
|
dc2befb68d | |
|
65668934a8 | |
|
ee08504864 |
|
@ -41,9 +41,21 @@ Total number of rediscluster rebalance operations. Type: Counter.
|
|||
### rediscluster_remove_follower_attempt
|
||||
Number of times to remove follower attempts. Type: Counter.
|
||||
|
||||
### rediscluster_repair_disconnected_attempt
|
||||
Number of times to repair a Redis cluster disconnected from the cluster. Type: Counter.
|
||||
|
||||
### rediscluster_repair_failed
|
||||
Number of times to repair a Redis cluster failed. Type: Counter.
|
||||
|
||||
### rediscluster_replicas_size_desired
|
||||
Total desired number of rediscluster replicas. Type: Gauge.
|
||||
|
||||
### rediscluster_reset_attempt
|
||||
Number of times to reset a Redis cluster. Type: Counter.
|
||||
|
||||
### rediscluster_reset_failed
|
||||
Number of times to reset a Redis cluster failed. Type: Counter.
|
||||
|
||||
### rediscluster_reshard_total
|
||||
Total number of rediscluster reshard operations. Type: Counter.
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
|
|||
}
|
||||
return intctrlutil.Reconciled()
|
||||
}
|
||||
monitoring.RedisReplicationSkipReconcile.WithLabelValues(instance.Namespace, instance.Name).Set(0)
|
||||
monitoring.RedisClusterSkipReconcile.WithLabelValues(instance.Namespace, instance.Name).Set(0)
|
||||
if common.IsSkipReconcile(ctx, instance) {
|
||||
monitoring.RedisClusterSkipReconcile.WithLabelValues(instance.Namespace, instance.Name).Set(1)
|
||||
return intctrlutil.Reconciled()
|
||||
|
@ -230,7 +230,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
|
|||
}
|
||||
|
||||
logger.Info("healthy leader count does not match desired; attempting to repair disconnected masters")
|
||||
monitoring.RedisClusterRepairDisconnectedAttempt.WithLabelValues(instance.Namespace, instance.Name).Inc()
|
||||
if err = k8sutils.RepairDisconnectedMasters(ctx, r.K8sClient, instance); err != nil {
|
||||
monitoring.RedisClusterRepairDisconnectedFailed.WithLabelValues(instance.Namespace, instance.Name).Inc()
|
||||
logger.Error(err, "failed to repair disconnected masters")
|
||||
}
|
||||
|
||||
|
@ -256,7 +258,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
|
|||
}
|
||||
if int(totalReplicas) > 1 && unhealthyNodeCount >= int(totalReplicas)-1 {
|
||||
logger.Info("unhealthy nodes exist after attempting to repair disconnected masters; starting failover")
|
||||
monitoring.RedisClusterResetAttempt.WithLabelValues(instance.Namespace, instance.Name).Inc()
|
||||
if err = k8sutils.ExecuteFailoverOperation(ctx, r.K8sClient, instance); err != nil {
|
||||
monitoring.RedisClusterResetFailed.WithLabelValues(instance.Namespace, instance.Name).Inc()
|
||||
return intctrlutil.RequeueE(ctx, err, "")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,5 +32,9 @@ func RegisterRedisClusterMetrics() {
|
|||
RedisClusterRebalanceTotal,
|
||||
RedisClusterRemoveFollowerAttempt,
|
||||
RedisClusterReshardTotal,
|
||||
RedisClusterRepairDisconnectedAttempt,
|
||||
RedisClusterRepairDisconnectedFailed,
|
||||
RedisClusterResetAttempt,
|
||||
RedisClusterResetFailed,
|
||||
)
|
||||
}
|
||||
|
|
|
@ -48,6 +48,30 @@ var RedisClusterDescription = map[string]MetricDescription{
|
|||
Type: "Counter",
|
||||
labels: []string{"namespace", "instance"},
|
||||
},
|
||||
"RedisClusterRepairDisconnectedAttempt": {
|
||||
Name: "rediscluster_repair_disconnected_attempt",
|
||||
Help: "Number of times to repair a Redis cluster disconnected from the cluster.",
|
||||
Type: "Counter",
|
||||
labels: []string{"namespace", "instance"},
|
||||
},
|
||||
"RedisClusterRepairFailed": {
|
||||
Name: "rediscluster_repair_failed",
|
||||
Help: "Number of times to repair a Redis cluster failed.",
|
||||
Type: "Counter",
|
||||
labels: []string{"namespace", "instance"},
|
||||
},
|
||||
"RedisClusterResetAttempt": {
|
||||
Name: "rediscluster_reset_attempt",
|
||||
Help: "Number of times to reset a Redis cluster.",
|
||||
Type: "Counter",
|
||||
labels: []string{"namespace", "instance"},
|
||||
},
|
||||
"RedisClusterResetFailed": {
|
||||
Name: "rediscluster_reset_failed",
|
||||
Help: "Number of times to reset a Redis cluster failed.",
|
||||
Type: "Counter",
|
||||
labels: []string{"namespace", "instance"},
|
||||
},
|
||||
}
|
||||
|
||||
var (
|
||||
|
@ -114,6 +138,38 @@ var (
|
|||
},
|
||||
RedisClusterDescription["RedisClusterAddingNodeAttempt"].labels,
|
||||
)
|
||||
|
||||
RedisClusterRepairDisconnectedAttempt = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: RedisClusterDescription["RedisClusterRepairDisconnectedAttempt"].Name,
|
||||
Help: RedisClusterDescription["RedisClusterRepairDisconnectedAttempt"].Help,
|
||||
},
|
||||
RedisClusterDescription["RedisClusterRepairDisconnectedAttempt"].labels,
|
||||
)
|
||||
|
||||
RedisClusterRepairDisconnectedFailed = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: RedisClusterDescription["RedisClusterRepairDisconnectedFailed"].Name,
|
||||
Help: RedisClusterDescription["RedisClusterRepairDisconnectedFailed"].Help,
|
||||
},
|
||||
RedisClusterDescription["RedisClusterRepairDisconnectedFailed"].labels,
|
||||
)
|
||||
|
||||
RedisClusterResetAttempt = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: RedisClusterDescription["RedisClusterResetAttempt"].Name,
|
||||
Help: RedisClusterDescription["RedisClusterResetAttempt"].Help,
|
||||
},
|
||||
RedisClusterDescription["RedisClusterResetAttempt"].labels,
|
||||
)
|
||||
|
||||
RedisClusterResetFailed = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: RedisClusterDescription["RedisClusterResetFailed"].Name,
|
||||
Help: RedisClusterDescription["RedisClusterResetFailed"].Help,
|
||||
},
|
||||
RedisClusterDescription["RedisClusterResetFailed"].labels,
|
||||
)
|
||||
)
|
||||
|
||||
// ListMetrics will create a slice with the metrics available in metricDescription
|
||||
|
|
Loading…
Reference in New Issue