fix scale up bug

This commit is contained in:
eric-higgins-ai 2025-02-21 11:01:34 -08:00
parent 0a34bf5d3a
commit 9c0357a6f2
1 changed files with 18 additions and 12 deletions

View File

@ -465,12 +465,14 @@ func (m *ociManagerImpl) GetExistingNodePoolSizeViaCompute(np NodePool) (int, er
case core.InstanceLifecycleStateStopped, core.InstanceLifecycleStateTerminated:
klog.V(4).Infof("skipping instance is in stopped/terminated state: %q", *item.Id)
case core.InstanceLifecycleStateCreatingImage, core.InstanceLifecycleStateStarting, core.InstanceLifecycleStateProvisioning, core.InstanceLifecycleStateMoving:
instances = append(instances, cloudprovider.Instance{
Id: *item.Id,
Status: &cloudprovider.InstanceStatus{
State: cloudprovider.InstanceCreating,
},
})
if *item.Id != "" {
instances = append(instances, cloudprovider.Instance{
Id: *item.Id,
Status: &cloudprovider.InstanceStatus{
State: cloudprovider.InstanceCreating,
},
})
}
// in case an instance is running, it could either be installing OKE software or become a Ready node.
// we do not know, but as we only need info if a node is stopped / terminated, we do not care
case core.InstanceLifecycleStateRunning:
@ -560,12 +562,16 @@ func (m *ociManagerImpl) GetNodePoolNodes(np NodePool) ([]cloudprovider.Instance
},
})
case oke.NodeLifecycleStateCreating, oke.NodeLifecycleStateUpdating:
instances = append(instances, cloudprovider.Instance{
Id: *node.Id,
Status: &cloudprovider.InstanceStatus{
State: cloudprovider.InstanceCreating,
},
})
// A node pool can fail to scale up if there's no capacity in the region. In that case, the node pool will be
// returned by the API, but it will not actually exist or have an ID, so we don't want to tell the autoscaler about it.
if *node.Id != "" {
instances = append(instances, cloudprovider.Instance{
Id: *node.Id,
Status: &cloudprovider.InstanceStatus{
State: cloudprovider.InstanceCreating,
},
})
}
case oke.NodeLifecycleStateActive:
instances = append(instances, cloudprovider.Instance{
Id: *node.Id,