Compare commits
3 Commits
f887e3cc56
...
dc2befb68d
Author | SHA1 | Date |
---|---|---|
|
dc2befb68d | |
|
65668934a8 | |
|
ee08504864 |
|
@ -41,9 +41,21 @@ Total number of rediscluster rebalance operations. Type: Counter.
|
||||||
### rediscluster_remove_follower_attempt
|
### rediscluster_remove_follower_attempt
|
||||||
Number of times to remove follower attempts. Type: Counter.
|
Number of times to remove follower attempts. Type: Counter.
|
||||||
|
|
||||||
|
### rediscluster_repair_disconnected_attempt
|
||||||
|
Number of times to repair a Redis cluster disconnected from the cluster. Type: Counter.
|
||||||
|
|
||||||
|
### rediscluster_repair_failed
|
||||||
|
Number of times to repair a Redis cluster failed. Type: Counter.
|
||||||
|
|
||||||
### rediscluster_replicas_size_desired
|
### rediscluster_replicas_size_desired
|
||||||
Total desired number of rediscluster replicas. Type: Gauge.
|
Total desired number of rediscluster replicas. Type: Gauge.
|
||||||
|
|
||||||
|
### rediscluster_reset_attempt
|
||||||
|
Number of times to reset a Redis cluster. Type: Counter.
|
||||||
|
|
||||||
|
### rediscluster_reset_failed
|
||||||
|
Number of times to reset a Redis cluster failed. Type: Counter.
|
||||||
|
|
||||||
### rediscluster_reshard_total
|
### rediscluster_reshard_total
|
||||||
Total number of rediscluster reshard operations. Type: Counter.
|
Total number of rediscluster reshard operations. Type: Counter.
|
||||||
|
|
||||||
|
|
|
@ -66,7 +66,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
|
||||||
}
|
}
|
||||||
return intctrlutil.Reconciled()
|
return intctrlutil.Reconciled()
|
||||||
}
|
}
|
||||||
monitoring.RedisReplicationSkipReconcile.WithLabelValues(instance.Namespace, instance.Name).Set(0)
|
monitoring.RedisClusterSkipReconcile.WithLabelValues(instance.Namespace, instance.Name).Set(0)
|
||||||
if common.IsSkipReconcile(ctx, instance) {
|
if common.IsSkipReconcile(ctx, instance) {
|
||||||
monitoring.RedisClusterSkipReconcile.WithLabelValues(instance.Namespace, instance.Name).Set(1)
|
monitoring.RedisClusterSkipReconcile.WithLabelValues(instance.Namespace, instance.Name).Set(1)
|
||||||
return intctrlutil.Reconciled()
|
return intctrlutil.Reconciled()
|
||||||
|
@ -230,7 +230,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.Info("healthy leader count does not match desired; attempting to repair disconnected masters")
|
logger.Info("healthy leader count does not match desired; attempting to repair disconnected masters")
|
||||||
|
monitoring.RedisClusterRepairDisconnectedAttempt.WithLabelValues(instance.Namespace, instance.Name).Inc()
|
||||||
if err = k8sutils.RepairDisconnectedMasters(ctx, r.K8sClient, instance); err != nil {
|
if err = k8sutils.RepairDisconnectedMasters(ctx, r.K8sClient, instance); err != nil {
|
||||||
|
monitoring.RedisClusterRepairDisconnectedFailed.WithLabelValues(instance.Namespace, instance.Name).Inc()
|
||||||
logger.Error(err, "failed to repair disconnected masters")
|
logger.Error(err, "failed to repair disconnected masters")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -256,7 +258,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
|
||||||
}
|
}
|
||||||
if int(totalReplicas) > 1 && unhealthyNodeCount >= int(totalReplicas)-1 {
|
if int(totalReplicas) > 1 && unhealthyNodeCount >= int(totalReplicas)-1 {
|
||||||
logger.Info("unhealthy nodes exist after attempting to repair disconnected masters; starting failover")
|
logger.Info("unhealthy nodes exist after attempting to repair disconnected masters; starting failover")
|
||||||
|
monitoring.RedisClusterResetAttempt.WithLabelValues(instance.Namespace, instance.Name).Inc()
|
||||||
if err = k8sutils.ExecuteFailoverOperation(ctx, r.K8sClient, instance); err != nil {
|
if err = k8sutils.ExecuteFailoverOperation(ctx, r.K8sClient, instance); err != nil {
|
||||||
|
monitoring.RedisClusterResetFailed.WithLabelValues(instance.Namespace, instance.Name).Inc()
|
||||||
return intctrlutil.RequeueE(ctx, err, "")
|
return intctrlutil.RequeueE(ctx, err, "")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,5 +32,9 @@ func RegisterRedisClusterMetrics() {
|
||||||
RedisClusterRebalanceTotal,
|
RedisClusterRebalanceTotal,
|
||||||
RedisClusterRemoveFollowerAttempt,
|
RedisClusterRemoveFollowerAttempt,
|
||||||
RedisClusterReshardTotal,
|
RedisClusterReshardTotal,
|
||||||
|
RedisClusterRepairDisconnectedAttempt,
|
||||||
|
RedisClusterRepairDisconnectedFailed,
|
||||||
|
RedisClusterResetAttempt,
|
||||||
|
RedisClusterResetFailed,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,30 @@ var RedisClusterDescription = map[string]MetricDescription{
|
||||||
Type: "Counter",
|
Type: "Counter",
|
||||||
labels: []string{"namespace", "instance"},
|
labels: []string{"namespace", "instance"},
|
||||||
},
|
},
|
||||||
|
"RedisClusterRepairDisconnectedAttempt": {
|
||||||
|
Name: "rediscluster_repair_disconnected_attempt",
|
||||||
|
Help: "Number of times to repair a Redis cluster disconnected from the cluster.",
|
||||||
|
Type: "Counter",
|
||||||
|
labels: []string{"namespace", "instance"},
|
||||||
|
},
|
||||||
|
"RedisClusterRepairFailed": {
|
||||||
|
Name: "rediscluster_repair_failed",
|
||||||
|
Help: "Number of times to repair a Redis cluster failed.",
|
||||||
|
Type: "Counter",
|
||||||
|
labels: []string{"namespace", "instance"},
|
||||||
|
},
|
||||||
|
"RedisClusterResetAttempt": {
|
||||||
|
Name: "rediscluster_reset_attempt",
|
||||||
|
Help: "Number of times to reset a Redis cluster.",
|
||||||
|
Type: "Counter",
|
||||||
|
labels: []string{"namespace", "instance"},
|
||||||
|
},
|
||||||
|
"RedisClusterResetFailed": {
|
||||||
|
Name: "rediscluster_reset_failed",
|
||||||
|
Help: "Number of times to reset a Redis cluster failed.",
|
||||||
|
Type: "Counter",
|
||||||
|
labels: []string{"namespace", "instance"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -114,6 +138,38 @@ var (
|
||||||
},
|
},
|
||||||
RedisClusterDescription["RedisClusterAddingNodeAttempt"].labels,
|
RedisClusterDescription["RedisClusterAddingNodeAttempt"].labels,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
RedisClusterRepairDisconnectedAttempt = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: RedisClusterDescription["RedisClusterRepairDisconnectedAttempt"].Name,
|
||||||
|
Help: RedisClusterDescription["RedisClusterRepairDisconnectedAttempt"].Help,
|
||||||
|
},
|
||||||
|
RedisClusterDescription["RedisClusterRepairDisconnectedAttempt"].labels,
|
||||||
|
)
|
||||||
|
|
||||||
|
RedisClusterRepairDisconnectedFailed = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: RedisClusterDescription["RedisClusterRepairDisconnectedFailed"].Name,
|
||||||
|
Help: RedisClusterDescription["RedisClusterRepairDisconnectedFailed"].Help,
|
||||||
|
},
|
||||||
|
RedisClusterDescription["RedisClusterRepairDisconnectedFailed"].labels,
|
||||||
|
)
|
||||||
|
|
||||||
|
RedisClusterResetAttempt = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: RedisClusterDescription["RedisClusterResetAttempt"].Name,
|
||||||
|
Help: RedisClusterDescription["RedisClusterResetAttempt"].Help,
|
||||||
|
},
|
||||||
|
RedisClusterDescription["RedisClusterResetAttempt"].labels,
|
||||||
|
)
|
||||||
|
|
||||||
|
RedisClusterResetFailed = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: RedisClusterDescription["RedisClusterResetFailed"].Name,
|
||||||
|
Help: RedisClusterDescription["RedisClusterResetFailed"].Help,
|
||||||
|
},
|
||||||
|
RedisClusterDescription["RedisClusterResetFailed"].labels,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
// ListMetrics will create a slice with the metrics available in metricDescription
|
// ListMetrics will create a slice with the metrics available in metricDescription
|
||||||
|
|
Loading…
Reference in New Issue