Merge pull request #1254 from losipiuk/lo/muzon2
NodeGroupManager.CreateNodeGroup can return extra created node groups.
This commit is contained in:
commit
1fa94e35f0
|
|
@ -26,6 +26,7 @@ import (
|
|||
"k8s.io/autoscaler/cluster-autoscaler/context"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/expander/random"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/metrics"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/simulator"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/labels"
|
||||
|
|
@ -92,11 +93,14 @@ type mockAutoprovisioningNodeGroupManager struct {
|
|||
t *testing.T
|
||||
}
|
||||
|
||||
func (p *mockAutoprovisioningNodeGroupManager) CreateNodeGroup(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (cloudprovider.NodeGroup, errors.AutoscalerError) {
|
||||
func (p *mockAutoprovisioningNodeGroupManager) CreateNodeGroup(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (nodegroups.CreateNodeGroupResult, errors.AutoscalerError) {
|
||||
newNodeGroup, err := nodeGroup.Create()
|
||||
assert.NoError(p.t, err)
|
||||
metrics.RegisterNodeGroupCreation()
|
||||
return newNodeGroup, nil
|
||||
result := nodegroups.CreateNodeGroupResult{
|
||||
MainCreatedNodeGroup: newNodeGroup,
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (p *mockAutoprovisioningNodeGroupManager) RemoveUnneededNodeGroups(context *context.AutoscalingContext) error {
|
||||
|
|
|
|||
|
|
@ -455,7 +455,8 @@ func ScaleUp(context *context.AutoscalingContext, processors *ca_processors.Auto
|
|||
|
||||
if !bestOption.NodeGroup.Exist() {
|
||||
oldId := bestOption.NodeGroup.Id()
|
||||
bestOption.NodeGroup, err = processors.NodeGroupManager.CreateNodeGroup(context, bestOption.NodeGroup)
|
||||
createNodeGroupResult, err := processors.NodeGroupManager.CreateNodeGroup(context, bestOption.NodeGroup)
|
||||
bestOption.NodeGroup = createNodeGroupResult.MainCreatedNodeGroup
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
@ -466,6 +467,16 @@ func ScaleUp(context *context.AutoscalingContext, processors *ca_processors.Auto
|
|||
nodeInfos[bestOption.NodeGroup.Id()] = nodeInfos[oldId]
|
||||
delete(nodeInfos, oldId)
|
||||
}
|
||||
|
||||
for _, nodeGroup := range createNodeGroupResult.ExtraCreatedNodeGroups {
|
||||
nodeInfo, err := GetNodeInfoFromTemplate(nodeGroup, daemonSets, context.PredicateChecker)
|
||||
|
||||
if err != nil {
|
||||
glog.Warning("Cannot build node info for newly created extra node group %v; balancing similar node groups will not work; err=%v", nodeGroup.Id(), err)
|
||||
continue
|
||||
}
|
||||
nodeInfos[nodeGroup.Id()] = nodeInfo
|
||||
}
|
||||
}
|
||||
|
||||
nodeInfo, found := nodeInfos[bestOption.NodeGroup.Id()]
|
||||
|
|
|
|||
|
|
@ -266,25 +266,16 @@ func GetNodeInfosForGroups(nodes []*apiv1.Node, cloudProvider cloudprovider.Clou
|
|||
|
||||
// No good template, trying to generate one. This is called only if there are no
|
||||
// working nodes in the node groups. By default CA tries to use a real-world example.
|
||||
baseNodeInfo, err := nodeGroup.TemplateNodeInfo()
|
||||
nodeInfo, err := GetNodeInfoFromTemplate(nodeGroup, daemonsets, predicateChecker)
|
||||
if err != nil {
|
||||
if err == cloudprovider.ErrNotImplemented {
|
||||
continue
|
||||
} else {
|
||||
glog.Errorf("Unable to build proper template node for %s: %v", id, err)
|
||||
return map[string]*schedulercache.NodeInfo{}, errors.ToAutoscalerError(
|
||||
errors.CloudProviderError, err)
|
||||
return map[string]*schedulercache.NodeInfo{}, errors.ToAutoscalerError(errors.CloudProviderError, err)
|
||||
}
|
||||
}
|
||||
pods := daemonset.GetDaemonSetPodsForNode(baseNodeInfo, daemonsets, predicateChecker)
|
||||
pods = append(pods, baseNodeInfo.Pods()...)
|
||||
fullNodeInfo := schedulercache.NewNodeInfo(pods...)
|
||||
fullNodeInfo.SetNode(baseNodeInfo.Node())
|
||||
sanitizedNodeInfo, typedErr := sanitizeNodeInfo(fullNodeInfo, id)
|
||||
if typedErr != nil {
|
||||
return map[string]*schedulercache.NodeInfo{}, typedErr
|
||||
}
|
||||
result[id] = sanitizedNodeInfo
|
||||
result[id] = nodeInfo
|
||||
}
|
||||
|
||||
// Last resort - unready/unschedulable nodes.
|
||||
|
|
@ -309,6 +300,25 @@ func GetNodeInfosForGroups(nodes []*apiv1.Node, cloudProvider cloudprovider.Clou
|
|||
return result, nil
|
||||
}
|
||||
|
||||
// GetNodeInfoFromTemplate returns NodeInfo object built base on TemplateNodeInfo returned by NodeGroup.TemplateNodeInfo().
|
||||
func GetNodeInfoFromTemplate(nodeGroup cloudprovider.NodeGroup, daemonsets []*extensionsv1.DaemonSet, predicateChecker *simulator.PredicateChecker) (*schedulercache.NodeInfo, errors.AutoscalerError) {
|
||||
id := nodeGroup.Id()
|
||||
baseNodeInfo, err := nodeGroup.TemplateNodeInfo()
|
||||
if err != nil {
|
||||
return nil, errors.ToAutoscalerError(errors.CloudProviderError, err)
|
||||
}
|
||||
|
||||
pods := daemonset.GetDaemonSetPodsForNode(baseNodeInfo, daemonsets, predicateChecker)
|
||||
pods = append(pods, baseNodeInfo.Pods()...)
|
||||
fullNodeInfo := schedulercache.NewNodeInfo(pods...)
|
||||
fullNodeInfo.SetNode(baseNodeInfo.Node())
|
||||
sanitizedNodeInfo, typedErr := sanitizeNodeInfo(fullNodeInfo, id)
|
||||
if typedErr != nil {
|
||||
return nil, typedErr
|
||||
}
|
||||
return sanitizedNodeInfo, nil
|
||||
}
|
||||
|
||||
// FilterOutNodesFromNotAutoscaledGroups return subset of input nodes for which cloud provider does not
|
||||
// return autoscaled node group.
|
||||
func FilterOutNodesFromNotAutoscaledGroups(nodes []*apiv1.Node, cloudProvider cloudprovider.CloudProvider) ([]*apiv1.Node, errors.AutoscalerError) {
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ import (
|
|||
|
||||
// NodeGroupManager is responsible for creating/deleting node groups.
|
||||
type NodeGroupManager interface {
|
||||
CreateNodeGroup(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (cloudprovider.NodeGroup, errors.AutoscalerError)
|
||||
CreateNodeGroup(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (CreateNodeGroupResult, errors.AutoscalerError)
|
||||
RemoveUnneededNodeGroups(context *context.AutoscalingContext) error
|
||||
CleanUp()
|
||||
}
|
||||
|
|
@ -35,9 +35,20 @@ type NodeGroupManager interface {
|
|||
type NoOpNodeGroupManager struct {
|
||||
}
|
||||
|
||||
// CreateNodeGroupResult result captures result of successful NodeGroupManager.CreateNodeGroup call.
|
||||
type CreateNodeGroupResult struct {
|
||||
// Main created node group, matching the requested node group passed to CreateNodeGroup call
|
||||
MainCreatedNodeGroup cloudprovider.NodeGroup
|
||||
|
||||
// List of extra node groups created by CreateNodeGroup call. Non-empty if due manager specific
|
||||
// constraints creating one node group requires creating other ones (e.g. matching node group
|
||||
// must exist in each zone for multizonal deployments)
|
||||
ExtraCreatedNodeGroups []cloudprovider.NodeGroup
|
||||
}
|
||||
|
||||
// CreateNodeGroup always returns internal error. It must not be called on NoOpNodeGroupManager.
|
||||
func (*NoOpNodeGroupManager) CreateNodeGroup(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (cloudprovider.NodeGroup, errors.AutoscalerError) {
|
||||
return nil, errors.NewAutoscalerError(errors.InternalError, "not implemented")
|
||||
func (*NoOpNodeGroupManager) CreateNodeGroup(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (CreateNodeGroupResult, errors.AutoscalerError) {
|
||||
return CreateNodeGroupResult{}, errors.NewAutoscalerError(errors.InternalError, "not implemented")
|
||||
}
|
||||
|
||||
// RemoveUnneededNodeGroups does nothing in NoOpNodeGroupManager
|
||||
|
|
|
|||
Loading…
Reference in New Issue