Merge pull request #1254 from losipiuk/lo/muzon2

NodeGroupManager.CreateNodeGroup can return extra created node groups.
This commit is contained in:
k8s-ci-robot 2018-09-19 07:35:52 -07:00 committed by GitHub
commit 1fa94e35f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 54 additions and 18 deletions

View File

@ -26,6 +26,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/context"
"k8s.io/autoscaler/cluster-autoscaler/expander/random"
"k8s.io/autoscaler/cluster-autoscaler/metrics"
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups"
"k8s.io/autoscaler/cluster-autoscaler/simulator"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/labels"
@ -92,11 +93,14 @@ type mockAutoprovisioningNodeGroupManager struct {
t *testing.T
}
func (p *mockAutoprovisioningNodeGroupManager) CreateNodeGroup(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (cloudprovider.NodeGroup, errors.AutoscalerError) {
func (p *mockAutoprovisioningNodeGroupManager) CreateNodeGroup(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (nodegroups.CreateNodeGroupResult, errors.AutoscalerError) {
newNodeGroup, err := nodeGroup.Create()
assert.NoError(p.t, err)
metrics.RegisterNodeGroupCreation()
return newNodeGroup, nil
result := nodegroups.CreateNodeGroupResult{
MainCreatedNodeGroup: newNodeGroup,
}
return result, nil
}
func (p *mockAutoprovisioningNodeGroupManager) RemoveUnneededNodeGroups(context *context.AutoscalingContext) error {

View File

@ -455,7 +455,8 @@ func ScaleUp(context *context.AutoscalingContext, processors *ca_processors.Auto
if !bestOption.NodeGroup.Exist() {
oldId := bestOption.NodeGroup.Id()
bestOption.NodeGroup, err = processors.NodeGroupManager.CreateNodeGroup(context, bestOption.NodeGroup)
createNodeGroupResult, err := processors.NodeGroupManager.CreateNodeGroup(context, bestOption.NodeGroup)
bestOption.NodeGroup = createNodeGroupResult.MainCreatedNodeGroup
if err != nil {
return nil, err
}
@ -466,6 +467,16 @@ func ScaleUp(context *context.AutoscalingContext, processors *ca_processors.Auto
nodeInfos[bestOption.NodeGroup.Id()] = nodeInfos[oldId]
delete(nodeInfos, oldId)
}
for _, nodeGroup := range createNodeGroupResult.ExtraCreatedNodeGroups {
nodeInfo, err := GetNodeInfoFromTemplate(nodeGroup, daemonSets, context.PredicateChecker)
if err != nil {
glog.Warning("Cannot build node info for newly created extra node group %v; balancing similar node groups will not work; err=%v", nodeGroup.Id(), err)
continue
}
nodeInfos[nodeGroup.Id()] = nodeInfo
}
}
nodeInfo, found := nodeInfos[bestOption.NodeGroup.Id()]

View File

@ -266,25 +266,16 @@ func GetNodeInfosForGroups(nodes []*apiv1.Node, cloudProvider cloudprovider.Clou
// No good template, trying to generate one. This is called only if there are no
// working nodes in the node groups. By default CA tries to use a real-world example.
baseNodeInfo, err := nodeGroup.TemplateNodeInfo()
nodeInfo, err := GetNodeInfoFromTemplate(nodeGroup, daemonsets, predicateChecker)
if err != nil {
if err == cloudprovider.ErrNotImplemented {
continue
} else {
glog.Errorf("Unable to build proper template node for %s: %v", id, err)
return map[string]*schedulercache.NodeInfo{}, errors.ToAutoscalerError(
errors.CloudProviderError, err)
return map[string]*schedulercache.NodeInfo{}, errors.ToAutoscalerError(errors.CloudProviderError, err)
}
}
pods := daemonset.GetDaemonSetPodsForNode(baseNodeInfo, daemonsets, predicateChecker)
pods = append(pods, baseNodeInfo.Pods()...)
fullNodeInfo := schedulercache.NewNodeInfo(pods...)
fullNodeInfo.SetNode(baseNodeInfo.Node())
sanitizedNodeInfo, typedErr := sanitizeNodeInfo(fullNodeInfo, id)
if typedErr != nil {
return map[string]*schedulercache.NodeInfo{}, typedErr
}
result[id] = sanitizedNodeInfo
result[id] = nodeInfo
}
// Last resort - unready/unschedulable nodes.
@ -309,6 +300,25 @@ func GetNodeInfosForGroups(nodes []*apiv1.Node, cloudProvider cloudprovider.Clou
return result, nil
}
// GetNodeInfoFromTemplate returns NodeInfo object built base on TemplateNodeInfo returned by NodeGroup.TemplateNodeInfo().
func GetNodeInfoFromTemplate(nodeGroup cloudprovider.NodeGroup, daemonsets []*extensionsv1.DaemonSet, predicateChecker *simulator.PredicateChecker) (*schedulercache.NodeInfo, errors.AutoscalerError) {
id := nodeGroup.Id()
baseNodeInfo, err := nodeGroup.TemplateNodeInfo()
if err != nil {
return nil, errors.ToAutoscalerError(errors.CloudProviderError, err)
}
pods := daemonset.GetDaemonSetPodsForNode(baseNodeInfo, daemonsets, predicateChecker)
pods = append(pods, baseNodeInfo.Pods()...)
fullNodeInfo := schedulercache.NewNodeInfo(pods...)
fullNodeInfo.SetNode(baseNodeInfo.Node())
sanitizedNodeInfo, typedErr := sanitizeNodeInfo(fullNodeInfo, id)
if typedErr != nil {
return nil, typedErr
}
return sanitizedNodeInfo, nil
}
// FilterOutNodesFromNotAutoscaledGroups return subset of input nodes for which cloud provider does not
// return autoscaled node group.
func FilterOutNodesFromNotAutoscaledGroups(nodes []*apiv1.Node, cloudProvider cloudprovider.CloudProvider) ([]*apiv1.Node, errors.AutoscalerError) {

View File

@ -24,7 +24,7 @@ import (
// NodeGroupManager is responsible for creating/deleting node groups.
type NodeGroupManager interface {
CreateNodeGroup(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (cloudprovider.NodeGroup, errors.AutoscalerError)
CreateNodeGroup(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (CreateNodeGroupResult, errors.AutoscalerError)
RemoveUnneededNodeGroups(context *context.AutoscalingContext) error
CleanUp()
}
@ -35,9 +35,20 @@ type NodeGroupManager interface {
type NoOpNodeGroupManager struct {
}
// CreateNodeGroupResult result captures result of successful NodeGroupManager.CreateNodeGroup call.
type CreateNodeGroupResult struct {
// Main created node group, matching the requested node group passed to CreateNodeGroup call
MainCreatedNodeGroup cloudprovider.NodeGroup
// List of extra node groups created by CreateNodeGroup call. Non-empty if due manager specific
// constraints creating one node group requires creating other ones (e.g. matching node group
// must exist in each zone for multizonal deployments)
ExtraCreatedNodeGroups []cloudprovider.NodeGroup
}
// CreateNodeGroup always returns internal error. It must not be called on NoOpNodeGroupManager.
func (*NoOpNodeGroupManager) CreateNodeGroup(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (cloudprovider.NodeGroup, errors.AutoscalerError) {
return nil, errors.NewAutoscalerError(errors.InternalError, "not implemented")
func (*NoOpNodeGroupManager) CreateNodeGroup(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (CreateNodeGroupResult, errors.AutoscalerError) {
return CreateNodeGroupResult{}, errors.NewAutoscalerError(errors.InternalError, "not implemented")
}
// RemoveUnneededNodeGroups does nothing in NoOpNodeGroupManager