cluster-autoscaler/skip-node: unblock cluster autoscaler when having a single nodegroup for node error
Signed-off-by: Julien Balestra <julien.balestra@datadoghq.com>
This commit is contained in:
		
							parent
							
								
									7dbdb9be6b
								
							
						
					
					
						commit
						3441f616e1
					
				| 
						 | 
				
			
			@ -58,13 +58,14 @@ type GceCache struct {
 | 
			
		|||
	cacheMutex sync.Mutex
 | 
			
		||||
 | 
			
		||||
	// Cache content.
 | 
			
		||||
	migs                   map[GceRef]Mig
 | 
			
		||||
	instanceRefToMigRef    map[GceRef]GceRef
 | 
			
		||||
	resourceLimiter        *cloudprovider.ResourceLimiter
 | 
			
		||||
	machinesCache          map[MachineTypeKey]*gce.MachineType
 | 
			
		||||
	migTargetSizeCache     map[GceRef]int64
 | 
			
		||||
	migBaseNameCache       map[GceRef]string
 | 
			
		||||
	instanceTemplatesCache map[GceRef]*gce.InstanceTemplate
 | 
			
		||||
	migs                     map[GceRef]Mig
 | 
			
		||||
	instanceRefToMigRef      map[GceRef]GceRef
 | 
			
		||||
	instancesFromUnknownMigs map[GceRef]struct{}
 | 
			
		||||
	resourceLimiter          *cloudprovider.ResourceLimiter
 | 
			
		||||
	machinesCache            map[MachineTypeKey]*gce.MachineType
 | 
			
		||||
	migTargetSizeCache       map[GceRef]int64
 | 
			
		||||
	migBaseNameCache         map[GceRef]string
 | 
			
		||||
	instanceTemplatesCache   map[GceRef]*gce.InstanceTemplate
 | 
			
		||||
 | 
			
		||||
	// Service used to refresh cache.
 | 
			
		||||
	GceService AutoscalingGceClient
 | 
			
		||||
| 
						 | 
				
			
			@ -73,13 +74,14 @@ type GceCache struct {
 | 
			
		|||
// NewGceCache creates empty GceCache.
 | 
			
		||||
func NewGceCache(gceService AutoscalingGceClient) *GceCache {
 | 
			
		||||
	return &GceCache{
 | 
			
		||||
		migs:                   map[GceRef]Mig{},
 | 
			
		||||
		instanceRefToMigRef:    map[GceRef]GceRef{},
 | 
			
		||||
		machinesCache:          map[MachineTypeKey]*gce.MachineType{},
 | 
			
		||||
		migTargetSizeCache:     map[GceRef]int64{},
 | 
			
		||||
		migBaseNameCache:       map[GceRef]string{},
 | 
			
		||||
		instanceTemplatesCache: map[GceRef]*gce.InstanceTemplate{},
 | 
			
		||||
		GceService:             gceService,
 | 
			
		||||
		migs:                     map[GceRef]Mig{},
 | 
			
		||||
		instanceRefToMigRef:      map[GceRef]GceRef{},
 | 
			
		||||
		instancesFromUnknownMigs: map[GceRef]struct{}{},
 | 
			
		||||
		machinesCache:            map[MachineTypeKey]*gce.MachineType{},
 | 
			
		||||
		migTargetSizeCache:       map[GceRef]int64{},
 | 
			
		||||
		migBaseNameCache:         map[GceRef]string{},
 | 
			
		||||
		instanceTemplatesCache:   map[GceRef]*gce.InstanceTemplate{},
 | 
			
		||||
		GceService:               gceService,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -114,7 +116,7 @@ func (gc *GceCache) UnregisterMig(toBeRemoved Mig) bool {
 | 
			
		|||
	if found {
 | 
			
		||||
		klog.V(1).Infof("Unregistered Mig %s", toBeRemoved.GceRef().String())
 | 
			
		||||
		delete(gc.migs, toBeRemoved.GceRef())
 | 
			
		||||
		gc.removeInstancesForMig(toBeRemoved.GceRef())
 | 
			
		||||
		gc.removeInstancesForMigs(toBeRemoved.GceRef())
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	return false
 | 
			
		||||
| 
						 | 
				
			
			@ -157,6 +159,8 @@ func (gc *GceCache) GetMigForInstance(instanceRef GceRef) (Mig, error) {
 | 
			
		|||
			return nil, fmt.Errorf("instance %+v belongs to unregistered mig %+v", instanceRef, migRef)
 | 
			
		||||
		}
 | 
			
		||||
		return mig, nil
 | 
			
		||||
	} else if _, found := gc.instancesFromUnknownMigs[instanceRef]; found {
 | 
			
		||||
		return nil, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, migRef := range gc.getMigRefs() {
 | 
			
		||||
| 
						 | 
				
			
			@ -182,7 +186,9 @@ func (gc *GceCache) GetMigForInstance(instanceRef GceRef) (Mig, error) {
 | 
			
		|||
 | 
			
		||||
			migRef, found := gc.instanceRefToMigRef[instanceRef]
 | 
			
		||||
			if !found {
 | 
			
		||||
				return nil, fmt.Errorf("instance %+v belongs to unknown mig", instanceRef)
 | 
			
		||||
				klog.Warningf("instance %+v belongs to unknown mig", instanceRef)
 | 
			
		||||
				gc.instancesFromUnknownMigs[instanceRef] = struct{}{}
 | 
			
		||||
				return nil, nil
 | 
			
		||||
			}
 | 
			
		||||
			mig, found := gc.getMigNoLock(migRef)
 | 
			
		||||
			if !found {
 | 
			
		||||
| 
						 | 
				
			
			@ -195,10 +201,11 @@ func (gc *GceCache) GetMigForInstance(instanceRef GceRef) (Mig, error) {
 | 
			
		|||
	return nil, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (gc *GceCache) removeInstancesForMig(migRef GceRef) {
 | 
			
		||||
func (gc *GceCache) removeInstancesForMigs(migRef GceRef) {
 | 
			
		||||
	for instanceRef, instanceMigRef := range gc.instanceRefToMigRef {
 | 
			
		||||
		if migRef == instanceMigRef {
 | 
			
		||||
			delete(gc.instanceRefToMigRef, instanceRef)
 | 
			
		||||
			delete(gc.instancesFromUnknownMigs, instanceRef)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -219,7 +226,7 @@ func (gc *GceCache) regenerateInstanceCacheForMigNoLock(migRef GceRef) error {
 | 
			
		|||
	klog.V(4).Infof("Regenerating MIG information for %s", migRef.String())
 | 
			
		||||
 | 
			
		||||
	// cleanup old entries
 | 
			
		||||
	gc.removeInstancesForMig(migRef)
 | 
			
		||||
	gc.removeInstancesForMigs(migRef)
 | 
			
		||||
 | 
			
		||||
	instances, err := gc.GceService.FetchMigInstances(migRef)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
| 
						 | 
				
			
			@ -242,6 +249,7 @@ func (gc *GceCache) RegenerateInstancesCache() error {
 | 
			
		|||
	defer gc.cacheMutex.Unlock()
 | 
			
		||||
 | 
			
		||||
	gc.instanceRefToMigRef = make(map[GceRef]GceRef)
 | 
			
		||||
	gc.instancesFromUnknownMigs = make(map[GceRef]struct{})
 | 
			
		||||
	for _, migRef := range gc.getMigRefs() {
 | 
			
		||||
		err := gc.regenerateInstanceCacheForMigNoLock(migRef)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -194,6 +194,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
 | 
			
		|||
	stateUpdateStart := time.Now()
 | 
			
		||||
	allNodes, readyNodes, typedErr := a.obtainNodeLists(a.CloudProvider)
 | 
			
		||||
	if typedErr != nil {
 | 
			
		||||
		klog.Errorf("Failed to get node list: %v", typedErr)
 | 
			
		||||
		return typedErr
 | 
			
		||||
	}
 | 
			
		||||
	if a.actOnEmptyCluster(allNodes, readyNodes, currentTime) {
 | 
			
		||||
| 
						 | 
				
			
			@ -202,7 +203,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
 | 
			
		|||
 | 
			
		||||
	daemonsets, err := a.ListerRegistry.DaemonSetLister().List(labels.Everything())
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		klog.Errorf("Failed to get daemonset list")
 | 
			
		||||
		klog.Errorf("Failed to get daemonset list: %v", err)
 | 
			
		||||
		return errors.ToAutoscalerError(errors.ApiCallError, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -216,11 +217,13 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
 | 
			
		|||
	nodeInfosForGroups, autoscalerError := getNodeInfosForGroups(
 | 
			
		||||
		readyNodes, a.nodeInfoCache, autoscalingContext.CloudProvider, autoscalingContext.ListerRegistry, daemonsets, autoscalingContext.PredicateChecker, a.ignoredTaints)
 | 
			
		||||
	if autoscalerError != nil {
 | 
			
		||||
		klog.Errorf("Failed to get node infos for groups: %v", autoscalerError)
 | 
			
		||||
		return autoscalerError.AddPrefix("failed to build node infos for node groups: ")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	typedErr = a.updateClusterState(allNodes, nodeInfosForGroups, currentTime)
 | 
			
		||||
	if typedErr != nil {
 | 
			
		||||
		klog.Errorf("Failed to update cluster state: %v", typedErr)
 | 
			
		||||
		return typedErr
 | 
			
		||||
	}
 | 
			
		||||
	metrics.UpdateDurationFromStart(metrics.UpdateState, stateUpdateStart)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue