correctly classify error for failed scale ups
This commit is contained in:
parent
63259fb5dd
commit
a3bada3708
|
|
@ -287,7 +287,7 @@ func (csr *ClusterStateRegistry) backoffNodeGroup(nodeGroup cloudprovider.NodeGr
|
||||||
func (csr *ClusterStateRegistry) RegisterFailedScaleUp(nodeGroup cloudprovider.NodeGroup, reason metrics.FailedScaleUpReason, currentTime time.Time) {
|
func (csr *ClusterStateRegistry) RegisterFailedScaleUp(nodeGroup cloudprovider.NodeGroup, reason metrics.FailedScaleUpReason, currentTime time.Time) {
|
||||||
csr.Lock()
|
csr.Lock()
|
||||||
defer csr.Unlock()
|
defer csr.Unlock()
|
||||||
csr.registerFailedScaleUpNoLock(nodeGroup, reason, cloudprovider.OtherErrorClass, "cloudProviderError", currentTime)
|
csr.registerFailedScaleUpNoLock(nodeGroup, reason, cloudprovider.OtherErrorClass, string(reason), currentTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (csr *ClusterStateRegistry) registerFailedScaleUpNoLock(nodeGroup cloudprovider.NodeGroup, reason metrics.FailedScaleUpReason, errorClass cloudprovider.InstanceErrorClass, errorCode string, currentTime time.Time) {
|
func (csr *ClusterStateRegistry) registerFailedScaleUpNoLock(nodeGroup cloudprovider.NodeGroup, reason metrics.FailedScaleUpReason, errorClass cloudprovider.InstanceErrorClass, errorCode string, currentTime time.Time) {
|
||||||
|
|
|
||||||
|
|
@ -666,7 +666,7 @@ func executeScaleUp(context *context.AutoscalingContext, clusterStateRegistry *c
|
||||||
increase := info.NewSize - info.CurrentSize
|
increase := info.NewSize - info.CurrentSize
|
||||||
if err := info.Group.IncreaseSize(increase); err != nil {
|
if err := info.Group.IncreaseSize(increase); err != nil {
|
||||||
context.LogRecorder.Eventf(apiv1.EventTypeWarning, "FailedToScaleUpGroup", "Scale-up failed for group %s: %v", info.Group.Id(), err)
|
context.LogRecorder.Eventf(apiv1.EventTypeWarning, "FailedToScaleUpGroup", "Scale-up failed for group %s: %v", info.Group.Id(), err)
|
||||||
clusterStateRegistry.RegisterFailedScaleUp(info.Group, metrics.APIError, now)
|
clusterStateRegistry.RegisterFailedScaleUp(info.Group, metrics.CloudProviderError, now)
|
||||||
return errors.NewAutoscalerError(errors.CloudProviderError,
|
return errors.NewAutoscalerError(errors.CloudProviderError,
|
||||||
"failed to increase node group size: %v", err)
|
"failed to increase node group size: %v", err)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,8 @@ const (
|
||||||
// Unready node was removed
|
// Unready node was removed
|
||||||
Unready NodeScaleDownReason = "unready"
|
Unready NodeScaleDownReason = "unready"
|
||||||
|
|
||||||
|
// CloudProviderError caused scale-up to fail
|
||||||
|
CloudProviderError FailedScaleUpReason = "cloudProviderError"
|
||||||
// APIError caused scale-up to fail
|
// APIError caused scale-up to fail
|
||||||
APIError FailedScaleUpReason = "apiCallError"
|
APIError FailedScaleUpReason = "apiCallError"
|
||||||
// Timeout was encountered when trying to scale-up
|
// Timeout was encountered when trying to scale-up
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue