fix scale up bug

This commit is contained in:
eric-higgins-ai 2025-02-21 11:01:34 -08:00
parent 0a34bf5d3a
commit 9c0357a6f2
1 changed files with 18 additions and 12 deletions

View File

@ -465,12 +465,14 @@ func (m *ociManagerImpl) GetExistingNodePoolSizeViaCompute(np NodePool) (int, er
case core.InstanceLifecycleStateStopped, core.InstanceLifecycleStateTerminated: case core.InstanceLifecycleStateStopped, core.InstanceLifecycleStateTerminated:
klog.V(4).Infof("skipping instance is in stopped/terminated state: %q", *item.Id) klog.V(4).Infof("skipping instance is in stopped/terminated state: %q", *item.Id)
case core.InstanceLifecycleStateCreatingImage, core.InstanceLifecycleStateStarting, core.InstanceLifecycleStateProvisioning, core.InstanceLifecycleStateMoving: case core.InstanceLifecycleStateCreatingImage, core.InstanceLifecycleStateStarting, core.InstanceLifecycleStateProvisioning, core.InstanceLifecycleStateMoving:
instances = append(instances, cloudprovider.Instance{ if *item.Id != "" {
Id: *item.Id, instances = append(instances, cloudprovider.Instance{
Status: &cloudprovider.InstanceStatus{ Id: *item.Id,
State: cloudprovider.InstanceCreating, Status: &cloudprovider.InstanceStatus{
}, State: cloudprovider.InstanceCreating,
}) },
})
}
// in case an instance is running, it could either be installing OKE software or become a Ready node. // in case an instance is running, it could either be installing OKE software or become a Ready node.
// we do not know, but as we only need info if a node is stopped / terminated, we do not care // we do not know, but as we only need info if a node is stopped / terminated, we do not care
case core.InstanceLifecycleStateRunning: case core.InstanceLifecycleStateRunning:
@ -560,12 +562,16 @@ func (m *ociManagerImpl) GetNodePoolNodes(np NodePool) ([]cloudprovider.Instance
}, },
}) })
case oke.NodeLifecycleStateCreating, oke.NodeLifecycleStateUpdating: case oke.NodeLifecycleStateCreating, oke.NodeLifecycleStateUpdating:
instances = append(instances, cloudprovider.Instance{ // A node pool can fail to scale up if there's no capacity in the region. In that case, the node pool will be
Id: *node.Id, // returned by the API, but it will not actually exist or have an ID, so we don't want to tell the autoscaler about it.
Status: &cloudprovider.InstanceStatus{ if *node.Id != "" {
State: cloudprovider.InstanceCreating, instances = append(instances, cloudprovider.Instance{
}, Id: *node.Id,
}) Status: &cloudprovider.InstanceStatus{
State: cloudprovider.InstanceCreating,
},
})
}
case oke.NodeLifecycleStateActive: case oke.NodeLifecycleStateActive:
instances = append(instances, cloudprovider.Instance{ instances = append(instances, cloudprovider.Instance{
Id: *node.Id, Id: *node.Id,