Merge pull request #8491 from gvnc/oci-auto-discovery-enhancement

read min and max values from nodepool tags for oci autodiscovery
This commit is contained in:
Kubernetes Prow Robot 2025-09-12 09:14:14 -07:00 committed by GitHub
commit bf86702ba8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 128 additions and 8 deletions

View File

@ -40,6 +40,8 @@ const (
nodepoolTags = "nodepoolTags"
min = "min"
max = "max"
minSize = "minSize"
maxSize = "maxSize"
)
var (
@ -90,6 +92,9 @@ func CreateNodePoolManager(cloudConfigPath string, nodeGroupAutoDiscoveryList []
var err error
var configProvider common.ConfigurationProvider
// enable SDK to look up the IMDS endpoint to figure out the right realmDomain
common.EnableInstanceMetadataServiceLookup()
if os.Getenv(ipconsts.OciUseWorkloadIdentityEnvVar) == "true" {
klog.Info("using workload identity provider")
configProvider, err = auth.OkeWorkloadIdentityConfigurationProvider()
@ -214,14 +219,15 @@ func autoDiscoverNodeGroups(m *ociManagerImpl, okeClient okeClient, nodeGroup no
if validateNodepoolTags(nodeGroup.tags, nodePoolSummary.FreeformTags, nodePoolSummary.DefinedTags) {
nodepool := &nodePool{}
nodepool.id = *nodePoolSummary.Id
nodepool.minSize = nodeGroup.minSize
nodepool.maxSize = nodeGroup.maxSize
// set minSize-maxSize from nodepool free form tags, or else use nodeGroupAutoDiscovery configuration
nodepool.minSize = getIntFromMap(nodePoolSummary.FreeformTags, minSize, nodeGroup.minSize)
nodepool.maxSize = getIntFromMap(nodePoolSummary.FreeformTags, maxSize, nodeGroup.maxSize)
nodepool.manager = nodeGroup.manager
nodepool.kubeClient = nodeGroup.kubeClient
m.staticNodePools[nodepool.id] = nodepool
klog.V(5).Infof("auto discovered nodepool in compartment : %s , nodepoolid: %s", nodeGroup.compartmentId, nodepool.id)
klog.V(4).Infof("auto discovered nodepool in compartment : %s , nodepoolid: %s ,minSize: %d, maxSize:%d", nodeGroup.compartmentId, nodepool.id, nodepool.minSize, nodepool.maxSize)
} else {
klog.Warningf("nodepool ignored as the tags do not satisfy the requirement : %s , %v, %v", *nodePoolSummary.Id, nodePoolSummary.FreeformTags, nodePoolSummary.DefinedTags)
}
@ -229,6 +235,18 @@ func autoDiscoverNodeGroups(m *ociManagerImpl, okeClient okeClient, nodeGroup no
return true, nil
}
func getIntFromMap(m map[string]string, key string, defaultValue int) int {
value, ok := m[key]
if !ok {
return defaultValue
}
i, err := strconv.Atoi(value)
if err != nil {
return defaultValue
}
return i
}
func validateNodepoolTags(nodeGroupTags map[string]string, freeFormTags map[string]string, definedTags map[string]map[string]interface{}) bool {
if nodeGroupTags != nil {
for tagKey, tagValue := range nodeGroupTags {
@ -394,11 +412,35 @@ func (m *ociManagerImpl) TaintToPreventFurtherSchedulingOnRestart(nodes []*apiv1
func (m *ociManagerImpl) forceRefresh() error {
// auto discover node groups
if m.nodeGroups != nil {
// empty previous nodepool map to do an auto discovery
// create a copy of m.staticNodePools to use it in comparison
staticNodePoolsCopy := make(map[string]NodePool)
for k, v := range m.staticNodePools {
staticNodePoolsCopy[k] = v
}
// empty previous nodepool map to do a fresh auto discovery
m.staticNodePools = make(map[string]NodePool)
// run auto-discovery
for _, nodeGroup := range m.nodeGroups {
autoDiscoverNodeGroups(m, m.okeClient, nodeGroup)
}
// compare the new and previous nodepool list to log the updates
for nodepoolId, nodepool := range m.staticNodePools {
if _, ok := staticNodePoolsCopy[nodepoolId]; !ok {
klog.Infof("New nodepool discovered. [id: %s ,minSize: %d, maxSize:%d]", nodepool.Id(), nodepool.MinSize(), nodepool.MaxSize())
} else if staticNodePoolsCopy[nodepoolId].MinSize() != nodepool.MinSize() || staticNodePoolsCopy[nodepoolId].MaxSize() != nodepool.MaxSize() {
klog.Infof("Nodepool min/max sizes are updated. [id: %s ,minSize: %d, maxSize:%d]", nodepool.Id(), nodepool.MinSize(), nodepool.MaxSize())
}
}
// log if there are nodepools removed from the list
for k := range staticNodePoolsCopy {
if _, ok := m.staticNodePools[k]; !ok {
klog.Infof("Previously auto-discovered nodepool removed from the managed nodepool list. nodepoolid: %s", k)
}
}
}
// rebuild nodepool cache
err := m.nodePoolCache.rebuild(m.staticNodePools, maxGetNodepoolRetries)

View File

@ -6,10 +6,12 @@ package nodepools
import (
"context"
"fmt"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/nodepools/consts"
"net/http"
"reflect"
"testing"
"time"
apiv1 "k8s.io/api/core/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
@ -20,6 +22,10 @@ import (
oke "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/vendor-internal/github.com/oracle/oci-go-sdk/v65/containerengine"
)
const (
autoDiscoveryCompartment = "ocid1.compartment.oc1.test-region.test"
)
func TestNodePoolFromArgs(t *testing.T) {
value := `1:5:ocid`
nodePool, err := nodePoolFromArg(value)
@ -321,8 +327,15 @@ func TestBuildGenericLabels(t *testing.T) {
type mockOKEClient struct{}
func (c mockOKEClient) GetNodePool(context.Context, oke.GetNodePoolRequest) (oke.GetNodePoolResponse, error) {
return oke.GetNodePoolResponse{}, nil
func (c mockOKEClient) GetNodePool(ctx context.Context, req oke.GetNodePoolRequest) (oke.GetNodePoolResponse, error) {
return oke.GetNodePoolResponse{
NodePool: oke.NodePool{
Id: req.NodePoolId,
NodeConfigDetails: &oke.NodePoolNodeConfigDetails{
Size: common.Int(1),
},
},
}, nil
}
func (c mockOKEClient) UpdateNodePool(context.Context, oke.UpdateNodePoolRequest) (oke.UpdateNodePoolResponse, error) {
return oke.UpdateNodePoolResponse{}, nil
@ -336,7 +349,39 @@ func (c mockOKEClient) DeleteNode(context.Context, oke.DeleteNodeRequest) (oke.D
}, nil
}
func (c mockOKEClient) ListNodePools(context.Context, oke.ListNodePoolsRequest) (oke.ListNodePoolsResponse, error) {
func (c mockOKEClient) ListNodePools(ctx context.Context, req oke.ListNodePoolsRequest) (oke.ListNodePoolsResponse, error) {
// below test data added for auto-discovery tests
if req.CompartmentId != nil && *req.CompartmentId == autoDiscoveryCompartment {
freeformTags1 := map[string]string{
"ca-managed": "true",
}
freeformTags2 := map[string]string{
"ca-managed": "true",
"minSize": "4",
"maxSize": "10",
}
definedTags := map[string]map[string]interface{}{
"namespace": {
"foo": "bar",
},
}
resp := oke.ListNodePoolsResponse{
Items: []oke.NodePoolSummary{
{
Id: common.String("node-pool-1"),
FreeformTags: freeformTags1,
DefinedTags: definedTags,
},
{
Id: common.String("node-pool-2"),
FreeformTags: freeformTags2,
DefinedTags: definedTags,
},
},
}
return resp, nil
}
return oke.ListNodePoolsResponse{}, nil
}
@ -393,8 +438,41 @@ func TestRemoveInstance(t *testing.T) {
}
}
func TestNodeGroupAutoDiscovery(t *testing.T) {
var nodeGroupArg = fmt.Sprintf("clusterId:ocid1.cluster.oc1.test-region.test,compartmentId:%s,nodepoolTags:ca-managed=true&namespace.foo=bar,min:1,max:5", autoDiscoveryCompartment)
nodeGroup, err := nodeGroupFromArg(nodeGroupArg)
if err != nil {
t.Errorf("Error: #{err}")
}
nodePoolCache := newNodePoolCache(nil)
nodePoolCache.okeClient = mockOKEClient{}
cloudConfig := &ocicommon.CloudConfig{}
cloudConfig.Global.RefreshInterval = 5 * time.Minute
cloudConfig.Global.CompartmentID = autoDiscoveryCompartment
manager := &ociManagerImpl{
nodePoolCache: nodePoolCache,
nodeGroups: []nodeGroupAutoDiscovery{*nodeGroup},
okeClient: mockOKEClient{},
cfg: cloudConfig,
staticNodePools: map[string]NodePool{},
}
// test data to use as initial nodepools
nodepool2 := &nodePool{
id: "node-pool-2", minSize: 1, maxSize: 5,
}
manager.staticNodePools[nodepool2.id] = nodepool2
nodepool3 := &nodePool{
id: "node-pool-3", minSize: 2, maxSize: 5,
}
manager.staticNodePools[nodepool3.id] = nodepool3
manager.forceRefresh()
}
func TestNodeGroupFromArg(t *testing.T) {
var nodeGroupArg = "clusterId:ocid1.cluster.oc1.test-region.test,compartmentId:ocid1.compartment.oc1.test-region.test,nodepoolTags:ca-managed=true&namespace.foo=bar,min:1,max:5"
var nodeGroupArg = fmt.Sprintf("clusterId:ocid1.cluster.oc1.test-region.test,compartmentId:%s,nodepoolTags:ca-managed=true&namespace.foo=bar,min:1,max:5", autoDiscoveryCompartment)
nodeGroupAutoDiscovery, err := nodeGroupFromArg(nodeGroupArg)
if err != nil {
t.Errorf("Error: #{err}")