Log failed scale up metric based on string of AutoscalerErrorType.

This commit is contained in:
Brett Elliott 2021-03-23 15:16:07 +01:00
parent c2c5eaab3c
commit 013fa19be3
4 changed files with 23 additions and 11 deletions

View File

@ -666,12 +666,8 @@ func executeScaleUp(context *context.AutoscalingContext, clusterStateRegistry *c
increase := info.NewSize - info.CurrentSize increase := info.NewSize - info.CurrentSize
if err := info.Group.IncreaseSize(increase); err != nil { if err := info.Group.IncreaseSize(increase); err != nil {
context.LogRecorder.Eventf(apiv1.EventTypeWarning, "FailedToScaleUpGroup", "Scale-up failed for group %s: %v", info.Group.Id(), err) context.LogRecorder.Eventf(apiv1.EventTypeWarning, "FailedToScaleUpGroup", "Scale-up failed for group %s: %v", info.Group.Id(), err)
reason := metrics.CloudProviderError
aerr := errors.ToAutoscalerError(errors.CloudProviderError, err).AddPrefix("failed to increase node group size: %v", err) aerr := errors.ToAutoscalerError(errors.CloudProviderError, err).AddPrefix("failed to increase node group size: %v", err)
if aerr.Type() == errors.AuthorizationError { clusterStateRegistry.RegisterFailedScaleUp(info.Group, metrics.FailedScaleUpReason(string(aerr.Type())), now)
reason = metrics.AuthorizationError
}
clusterStateRegistry.RegisterFailedScaleUp(info.Group, reason, now)
return aerr return aerr
} }
clusterStateRegistry.RegisterOrUpdateScaleUp( clusterStateRegistry.RegisterOrUpdateScaleUp(

View File

@ -18,6 +18,8 @@ package core
import ( import (
"fmt" "fmt"
"net/http"
"net/http/httptest"
"regexp" "regexp"
"strings" "strings"
"testing" "testing"
@ -30,12 +32,14 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/core/utils" "k8s.io/autoscaler/cluster-autoscaler/core/utils"
"k8s.io/autoscaler/cluster-autoscaler/estimator" "k8s.io/autoscaler/cluster-autoscaler/estimator"
"k8s.io/autoscaler/cluster-autoscaler/metrics"
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/autoscaler/cluster-autoscaler/utils/errors"
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test" . "k8s.io/autoscaler/cluster-autoscaler/utils/test"
"k8s.io/autoscaler/cluster-autoscaler/utils/units" "k8s.io/autoscaler/cluster-autoscaler/utils/units"
kube_record "k8s.io/client-go/tools/record" kube_record "k8s.io/client-go/tools/record"
"k8s.io/component-base/metrics/legacyregistry"
appsv1 "k8s.io/api/apps/v1" appsv1 "k8s.io/api/apps/v1"
apiv1 "k8s.io/api/core/v1" apiv1 "k8s.io/api/core/v1"
@ -974,17 +978,33 @@ func TestCheckScaleUpDeltaWithinLimits(t *testing.T) {
} }
func TestAuthError(t *testing.T) { func TestAuthError(t *testing.T) {
metrics.RegisterAll()
context, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{}, &fake.Clientset{}, nil, nil, nil) context, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{}, &fake.Clientset{}, nil, nil, nil)
assert.NoError(t, err) assert.NoError(t, err)
nodeGroup := &mockprovider.NodeGroup{} nodeGroup := &mockprovider.NodeGroup{}
info := nodegroupset.ScaleUpInfo{Group: nodeGroup} info := nodegroupset.ScaleUpInfo{Group: nodeGroup}
nodeGroup.On("Id").Return("A") nodeGroup.On("Id").Return("A")
nodeGroup.On("IncreaseSize", 0).Return(errors.NewAutoscalerError(errors.AuthorizationError, "")) nodeGroup.On("IncreaseSize", 0).Return(errors.NewAutoscalerError(errors.AutoscalerErrorType("abcd"), ""))
clusterStateRegistry := clusterstate.NewClusterStateRegistry(nil, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, newBackoff()) clusterStateRegistry := clusterstate.NewClusterStateRegistry(nil, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, newBackoff())
aerr := executeScaleUp(&context, clusterStateRegistry, info, "", time.Now()) aerr := executeScaleUp(&context, clusterStateRegistry, info, "", time.Now())
assert.Error(t, aerr) assert.Error(t, aerr)
assert.Equal(t, errors.AuthorizationError, aerr.Type())
req, err := http.NewRequest("GET", "/", nil)
if err != nil {
t.Fatal(err)
}
rr := httptest.NewRecorder()
handler := http.HandlerFunc(legacyregistry.Handler().ServeHTTP)
handler.ServeHTTP(rr, req)
// Check that the status code is what we expect.
if status := rr.Code; status != http.StatusOK {
t.Errorf("handler returned wrong status code: got %v want %v",
status, http.StatusOK)
}
// Check that the failed scale up reason is set correctly.
assert.Contains(t, rr.Body.String(), "cluster_autoscaler_failed_scale_ups_total{reason=\"abcd\"} 1")
} }

View File

@ -65,8 +65,6 @@ const (
APIError FailedScaleUpReason = "apiCallError" APIError FailedScaleUpReason = "apiCallError"
// Timeout was encountered when trying to scale-up // Timeout was encountered when trying to scale-up
Timeout FailedScaleUpReason = "timeout" Timeout FailedScaleUpReason = "timeout"
// AuthorizationError is an authorization error.
AuthorizationError FailedScaleUpReason = "authorizationError"
// autoscaledGroup is managed by CA // autoscaledGroup is managed by CA
autoscaledGroup NodeGroupType = "autoscaled" autoscaledGroup NodeGroupType = "autoscaled"

View File

@ -61,8 +61,6 @@ const (
// NodeGroupDoesNotExistError signifies that a NodeGroup // NodeGroupDoesNotExistError signifies that a NodeGroup
// does not exist. // does not exist.
NodeGroupDoesNotExistError AutoscalerErrorType = "nodeGroupDoesNotExistError" NodeGroupDoesNotExistError AutoscalerErrorType = "nodeGroupDoesNotExistError"
// AuthorizationError signifies that an authorization error occurred.
AuthorizationError AutoscalerErrorType = "authorizationError"
) )
// NewAutoscalerError returns new autoscaler error with a message constructed from format string // NewAutoscalerError returns new autoscaler error with a message constructed from format string