cleanup unnneeded files.

This commit is contained in:
RainbowMango 2020-11-13 19:27:08 +08:00
parent 185673cba4
commit 7a1db397b4
5 changed files with 0 additions and 1083 deletions

View File

@ -48,10 +48,6 @@ type huaweicloudCloudProvider struct {
resourceLimiter *cloudprovider.ResourceLimiter resourceLimiter *cloudprovider.ResourceLimiter
autoScalingGroup []AutoScalingGroup autoScalingGroup []AutoScalingGroup
lock sync.RWMutex lock sync.RWMutex
// Following to be refactored
huaweiCloudManager *huaweicloudCloudManager
nodeGroups []NodeGroup
} }
func newCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) *huaweicloudCloudProvider { func newCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) *huaweicloudCloudProvider {

View File

@ -1,285 +0,0 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package huaweicloud
import (
"fmt"
"github.com/pkg/errors"
"gopkg.in/gcfg.v1"
"io"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/huaweicloud/huawei-cloud-sdk-go"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/huaweicloud/huawei-cloud-sdk-go/openstack"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/huaweicloud/huawei-cloud-sdk-go/openstack/cce/v3/clusters"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/version"
"k8s.io/apimachinery/pkg/util/sets"
"time"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
klog "k8s.io/klog/v2"
)
const (
clusterStatusAvailable = "Available"
clusterStatusUnavailable = "Unavailable"
// clusterStatusScalingUp = "ScalingUp"
clusterStatusScalingDown = "ScalingDown"
clusterStatusCreating = "Creating"
clusterStatusDeleting = "Deleting"
clusterStatusUpgrading = "Upgrading"
clusterStatusResizing = "Resizing"
clusterStatusEmpty = "Empty"
clusterStatusHibernation = "Hibernation"
waitForStatusTimeIncrement = 30 * time.Second
waitForUpdateStatusTimeout = 2 * time.Minute
waitForCompleteStatusTimout = 10 * time.Minute
deleteOperationSucceedCode = "Com.200"
deleteOperationSucceedMessage = ""
// deleteOperationFailure = "CCE_CM.0003"
)
// availableStatuses is a set of statuses that would prevent the cluster from successfully scaling.
var availableStatuses = sets.NewString(
clusterStatusUnavailable,
clusterStatusCreating,
clusterStatusDeleting,
clusterStatusUpgrading,
clusterStatusResizing,
clusterStatusEmpty,
clusterStatusHibernation,
)
type huaweicloudCloudManager struct {
clusterClient *huaweicloudsdk.ServiceClient
clusterName string // this is the id of the cluster
timeIncrement time.Duration
}
func buildManager(configReader io.Reader, discoverOpts cloudprovider.NodeGroupDiscoveryOptions, opts config.AutoscalingOptions) (*huaweicloudCloudManager, error) {
// read from cloud-config file
var cfg CloudConfig
if configReader != nil {
if err := gcfg.ReadInto(&cfg, configReader); err != nil {
klog.Errorf("failed to read from cloud-config file: %v", err)
return nil, err
}
}
if cfg.Global.AccessKey == "" {
return nil, errors.New("couldn't find access key. An access key must be provided.")
}
if cfg.Global.SecretKey == "" {
return nil, errors.New("couldn't find secret key. A secret key must be provided.")
}
akskOptions := toAKSKOptions(cfg)
// create the authenticated provider client.
provider, authErr := openstack.AuthenticatedClient(akskOptions)
if authErr != nil {
fmt.Println("Failed to get the AuthenticatedClient for Huawei Cloud: ", authErr)
return nil, authErr
}
userAgent := huaweicloudsdk.UserAgent{}
userAgent.Prepend(fmt.Sprintf("cluster-autoscaler/%s", version.ClusterAutoscalerVersion))
userAgent.Prepend(fmt.Sprintf("cluster/%s", opts.ClusterName))
provider.UserAgent = userAgent
klog.V(5).Infof("Using user-agent %s", userAgent.Join())
// create Huawei CCE Client
clusterClient, clientErr := openstack.NewCCEV3(provider, huaweicloudsdk.EndpointOpts{})
if clientErr != nil {
fmt.Println("Failed to get the CCEV3 client: ", clientErr)
return nil, clientErr
}
manager := huaweicloudCloudManager{
clusterClient: clusterClient,
clusterName: opts.ClusterName,
timeIncrement: waitForStatusTimeIncrement,
}
// add more headers. without this setting, you will get "not suppport content type issue"
headerMap := make(map[string]string)
headerMap["Content-Type"] = "application/json;charset=utf-8"
clusterClient.MoreHeaders = headerMap
// make sure that the cluster exists
cceCluster, err := clusters.GetCCECluster(clusterClient, opts.ClusterName).Extract()
if err != nil {
return nil, fmt.Errorf("unable to access cluster (%s): %v", manager.clusterName, err)
}
if len(cceCluster.Metadata.Uid) == 0 {
return nil, fmt.Errorf("cluster: (%s): doesn't exist %v", manager.clusterName, err)
}
return &manager, nil
}
// nodeGroupSize gets the size of the node pool with the name of nodeGroupName attached to current cluster.
func (mgr *huaweicloudCloudManager) nodeGroupSize(nodeGroupName string) (int, error) {
nodePools, err := clusters.GetNodePools(mgr.clusterClient, mgr.clusterName).Extract()
if err != nil {
return 0, fmt.Errorf("could not retrieve node pools from CCE cluster: %v", err)
}
for _, nodePool := range nodePools.Items {
if nodePool.Metadata.Name == nodeGroupName {
return nodePool.NodePoolStatus.CurrentNode, nil
}
}
return 0, fmt.Errorf("could not find node pool with given name: %v", err)
}
/*
updateNodeCount updates the the number of nodes in a certain node pool associated with current cluster.
Request body of updating a node pool's size:
{
"metadata": {
"name": {NODE POOL NAME}
"uid": {NODE POOL UID}
},
"spec": {
"initialNodeCount": {TARGET NODE POOL SIZE}
"autoscaling": {
"enable": true,
"minNodeCount": {MIN NODE COUNT}
"maxNodeCount": {MAX NODE COUNT}
}
}
}
*/
// Call CCE REST API to change the size of a node pool.
func (mgr *huaweicloudCloudManager) updateNodeCount(nodepool *NodeGroup, nodes int) error {
updateOpts := clusters.RequestBody{
Metadata: clusters.Metadata{
Name: nodepool.nodePoolName,
Uid: nodepool.nodePoolId,
},
Spec: clusters.Spec{
InitialNodeCount: nodes,
Autoscaling: clusters.Autoscaling{
Enable: true,
MinNodeCount: nodepool.minNodeCount,
MaxNodeCount: nodepool.maxNodeCount,
},
},
}
_, err := clusters.UpdateNodePool(mgr.clusterClient, mgr.clusterName, nodepool.nodePoolId, updateOpts).Extract()
if err != nil {
return fmt.Errorf("could not update the size of this node pool of current cluster: %v", err)
}
return nil
}
// getNodes is not implemented currently (it should return the unique ids of all nodes in a nodegroup).
func (mgr *huaweicloudCloudManager) getNodes(nodegroup string) ([]string, error) {
return []string{}, nil
}
// deleteNodes deletes nodes by passing the node pool, a list of ids of the nodes to be deleted,
// and simultaneously sets the node pool size to be updatedNodeCount.
func (mgr *huaweicloudCloudManager) deleteNodes(nodepool *NodeGroup, nodeIds []string, updatedNodeCount int) error {
// Step 1: delete nodes by their UIDs
err := mgr.deleteNodesHelper(mgr.clusterClient, mgr.clusterName, nodeIds)
if err != nil {
return fmt.Errorf("delete nodes failed: %v", err)
}
// Step 2: wait for the cluster to scale down
err = mgr.waitForClusterStatus(clusterStatusScalingDown, waitForUpdateStatusTimeout)
if err != nil {
fmt.Printf("cluster failed to reach %s status: %v. (May not be an error. There's a possibility that the cluster has already turned to Available status before this checking)", clusterStatusScalingDown, err)
}
// Step 3: wait for the cluster to be available
err = mgr.waitForClusterStatus(clusterStatusAvailable, waitForCompleteStatusTimout)
if err != nil {
return fmt.Errorf("cluster failed to reach %s status: %v", clusterStatusAvailable, err)
}
// Step 4: decrease the size of the node pool after deleting the nodes
err = mgr.updateNodeCount(nodepool, updatedNodeCount)
if err != nil {
return fmt.Errorf("failed to update the size of the node pool attached to current cluster: %v", err)
}
return nil
}
// deleteNodesHelper calls CCE REST API to remove a set of nodes from a cluster.
func (mgr *huaweicloudCloudManager) deleteNodesHelper(client *huaweicloudsdk.ServiceClient, clusterId string, nodeIds []string) error {
for _, nodeId := range nodeIds {
err := clusters.DeleteNode(client, clusterId, nodeId).ExtractErr()
if err != nil {
/*
Due to the CCE DELETE Node API Response Issue, we need to do special handling here.
The CCE issue is: Even DELETE operation succeeds, CCE still return something like this: {"ErrorCode":"Com.200","Message":""}
Example response of real DELETE operation failure: {"ErrorCode":"CCE_CM.0003","Message":"Resource not found"}
*/
ue := err.(*huaweicloudsdk.UnifiedError)
if ue.ErrorCode() == deleteOperationSucceedCode && ue.Message() == deleteOperationSucceedMessage {
continue
}
return fmt.Errorf("delete nodes failed: %v", err)
}
}
return nil
}
// waitForClusterStatus keeps waiting until the cluster has reached a specified status or timeout occurs.
func (mgr *huaweicloudCloudManager) waitForClusterStatus(status string, timeout time.Duration) error {
klog.V(2).Infof("Waiting for cluster to reach %s status", status)
for start := time.Now(); time.Since(start) < timeout; time.Sleep(mgr.timeIncrement) {
currentStatus, err := mgr.getClusterStatus()
if err != nil {
return fmt.Errorf("cluster failed to reach %v status: %v", status, err)
}
if currentStatus == status {
klog.V(0).Infof("cluster has reached %s status", status)
return nil
}
}
return fmt.Errorf("waited for %v. timeout when waiting for cluster status %s", timeout, status)
}
// getClusterStatus returns the current status of the cce cluster.
func (mgr *huaweicloudCloudManager) getClusterStatus() (string, error) {
cluster, err := clusters.GetCCECluster(mgr.clusterClient, mgr.clusterName).Extract()
if err != nil {
return "", fmt.Errorf("could not get cluster: %v", err)
}
return cluster.Status.Phase, nil
}
// canUpdate returns true if the status of current status is not one of the statuses that prevent the cluster from being updated.
func (mgr *huaweicloudCloudManager) canUpdate() (bool, string, error) {
clusterStatus, err := mgr.getClusterStatus()
if err != nil {
return false, "", fmt.Errorf("could not get cluster status: %v", err)
}
return !availableStatuses.Has(clusterStatus), clusterStatus, nil
}

View File

@ -1,310 +0,0 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package huaweicloud
import (
"fmt"
"github.com/stretchr/testify/assert"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/huaweicloud/huawei-cloud-sdk-go"
th "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/huaweicloud/huawei-cloud-sdk-go/testhelper"
"net/http"
"testing"
)
const (
clusterUUID = "25b884ab-623f-11ea-9981-0255ac101546"
)
var (
isTestingDeleteNodes = false
getRequestCount = 0
getClusterResponse = `
{
"kind": "Cluster",
"apiVersion": "v3",
"metadata": {
"name": "crc-test",
"uid": "%s",
"creationTimestamp": "2020-03-09 19:49:54.353283 +0000 UTC",
"updateTimestamp": "2020-03-23 18:33:42.788547 +0000 UTC"
},
"status": {
"phase": "%s"
}
}`
getClusterResponseSuccess = fmt.Sprintf(getClusterResponse, clusterUUID, clusterStatusAvailable)
getNodePoolsResponse = `
{
"kind": "List",
"apiVersion": "v3",
"items": [
{
"kind": "NodePool",
"apiVersion": "v3",
"metadata": {
"name": "%s",
"uid": "deb25e14-6558-11ea-97c2-0255ac101d4a"
},
"spec": {
"initialNodeCount": 1,
"autoscaling": {
"enable": true,
"maxNodeCount": 50
}
},
"status": {
"currentNode": %d,
"phase": ""
}
},
{
"kind": "NodePool",
"apiVersion": "v3",
"metadata": {
"name": "DefaultPool",
"uid": "DefaultPool"
},
"spec": {
"initialNodeCount": 1,
"autoscaling": {}
},
"status": {
"currentNode": 1,
"phase": ""
}
}
]
}`
getNodePoolsResponseSuccess = fmt.Sprintf(getNodePoolsResponse, nodePoolName, nodePoolNodeCount)
getNodePoolResponseSuccess = fmt.Sprintf(`
{
"kind": "NodePool",
"apiVersion": "v3",
"metadata": {
"name": "%s",
"uid": "%s"
},
"spec": {
"initialNodeCount": 1,
"autoscaling": {
"enable": true,
"maxNodeCount": 50
},
"nodeManagement": {}
},
"status": {
"currentNode": %d,
"phase": ""
}
}`, nodePoolName, nodePoolUID, nodePoolNodeCount)
deleteNodesResponseSuccess = `
{
"kind": "Node",
"apiVersion": "v3",
"metadata": {
"name": "crc-nodepool-tc3ei",
"uid": "c1b6ff0c-6ee1-11ea-befa-0255ac101d4c"
},
"status": {
"phase": "Active",
"jobID": "f5fb1835-6ee1-11ea-befa-0255ac101d4c",
"serverId": "4a2a7527-7d0d-4bc5-ba1b-379efed8da3c",
"privateIP": "192.168.0.15"
}
}`
)
// create fake service client
func createTestServiceClient() *huaweicloudsdk.ServiceClient {
return &huaweicloudsdk.ServiceClient{
ProviderClient: &huaweicloudsdk.ProviderClient{},
Endpoint: th.GetEndpoint() + "/huaweitest/",
}
}
// create fake manager
func createTestHuaweicloudManager() *huaweicloudCloudManager {
sc := createTestServiceClient()
return &huaweicloudCloudManager{
clusterClient: sc,
clusterName: clusterUUID,
timeIncrement: waitTimeStep,
}
}
// create routers
func register() {
// create router for getting cluster's status
th.Mux.HandleFunc("/huaweitest/clusters/"+clusterUUID, func(w http.ResponseWriter, r *http.Request) {
w.Header().Add("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
if !isTestingDeleteNodes {
_, err := fmt.Fprint(w, getClusterResponseSuccess)
if err != nil {
fmt.Println("error handling request of getting cluster status available:", err)
}
} else {
getRequestCount += 1
if getRequestCount == 1 {
_, err := fmt.Fprintf(w, getClusterResponse, clusterUUID, clusterStatusScalingDown)
if err != nil {
fmt.Println("error handling request of getting cluster status scaling down when deleting nodes:", err)
}
} else {
_, err := fmt.Fprintf(w, getClusterResponseSuccess)
if err != nil {
fmt.Println("error handling request of getting cluster status available when deleting nodes:", err)
}
}
}
})
// create router for getting all nodepools' status associated with a cluster
th.Mux.HandleFunc("/huaweitest/clusters/"+clusterUUID+"/nodepools", func(w http.ResponseWriter, r *http.Request) {
w.Header().Add("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
if !isTestingDeleteNodes {
_, err := fmt.Fprintf(w, getNodePoolsResponseSuccess)
if err != nil {
fmt.Println("error handling request of getting nodepools' status:", err)
}
} else {
_, err := fmt.Fprintf(w, getNodePoolsResponse, nodePoolName, nodePoolNodeCount+decreaseSize)
if err != nil {
fmt.Println("error handling request of getting nodepools' status after deleting node:", err)
}
}
})
// create router for getting status of a certain nodepool
th.Mux.HandleFunc("/huaweitest/clusters/"+clusterUUID+"/nodepools/"+nodePoolUID, func(w http.ResponseWriter, r *http.Request) {
w.Header().Add("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
_, err := fmt.Fprintf(w, getNodePoolResponseSuccess)
if err != nil {
fmt.Println("error handling request of getting/updating a nodepool's status:", err)
}
})
// create router for deleting a node
th.Mux.HandleFunc("/huaweitest/clusters/"+clusterUUID+"/nodes/"+nodeIdToBeDeleted, func(w http.ResponseWriter, r *http.Request) {
w.Header().Add("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
_, err := fmt.Fprintf(w, deleteNodesResponseSuccess)
if err != nil {
fmt.Println("error handling request of deleting a node:", err)
}
})
}
func Test_nodeGroupSize(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
register()
manager := createTestHuaweicloudManager()
nodeCount, err := manager.nodeGroupSize(nodePoolName)
assert.NoError(t, err)
assert.Equal(t, nodePoolNodeCount, nodeCount)
}
func Test_updateNodeCount(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
register()
manager := createTestHuaweicloudManager()
ng := createTestNodeGroup(manager)
err := manager.updateNodeCount(ng, nodePoolNodeCount)
assert.NoError(t, err)
}
func Test_deleteNodesHelper(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
register()
manager := createTestHuaweicloudManager()
nodeIds := []string{nodeIdToBeDeleted}
err := manager.deleteNodesHelper(manager.clusterClient, clusterUUID, nodeIds)
assert.NoError(t, err)
}
func Test_deleteNodes(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
register()
manager := createTestHuaweicloudManager()
ng := createTestNodeGroup(manager)
nodeIds := []string{nodeIdToBeDeleted}
isTestingDeleteNodes = true
getRequestCount = 0
err := manager.deleteNodes(ng, nodeIds, nodePoolNodeCount)
assert.NoError(t, err)
isTestingDeleteNodes = false
getRequestCount = 0
}
func Test_getClusterStatus(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
register()
manager := createTestHuaweicloudManager()
status, _ := manager.getClusterStatus()
assert.Equal(t, clusterStatusAvailable, status)
}
func Test_waitForClusterStatus(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
register()
manager := createTestHuaweicloudManager()
err := manager.waitForClusterStatus(clusterStatusAvailable, waitForCompleteStatusTimout)
assert.NoError(t, err)
}
func Test_canUpdate(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
register()
manager := createTestHuaweicloudManager()
canUpdate, status, _ := manager.canUpdate()
assert.Equal(t, canUpdate, true)
assert.Equal(t, status, clusterStatusAvailable)
}

View File

@ -1,314 +0,0 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package huaweicloud
import (
"fmt"
apiv1 "k8s.io/api/core/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
klog "k8s.io/klog/v2"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
"sync"
"time"
)
const updateWaitTime = 10 * time.Second
// NodeGroup contains configuration info and functions to control a set of nodes that have the same capacity and set of labels.
// Represents a homogeneous collection of nodes within a cluster, which can be dynamically resized between a minimum and maximum number of nodes.
type NodeGroup struct {
huaweiCloudManager *huaweicloudCloudManager
nodePoolName string
nodePoolId string
clusterName string
minNodeCount int
maxNodeCount int
targetSize *int
autoscalingEnabled bool
nodesToDelete []*apiv1.Node
deleteWaitTime time.Duration
timeIncrement time.Duration
nodePoolSizeTmp int
getNodePoolSizeTime time.Time
clusterUpdateMutex *sync.Mutex
}
// MaxSize returns maximum size of the node group.
func (ng *NodeGroup) MaxSize() int {
return ng.maxNodeCount
}
// MinSize returns minimum size of the node group.
func (ng *NodeGroup) MinSize() int {
return ng.minNodeCount
}
// TargetSize returns the current target size of the node group. It is possible that the
// number of nodes in Kubernetes is different at the moment but should be equal
// to Size() once everything stabilizes (new nodes finish startup and registration or
// removed nodes are deleted completely).
func (ng *NodeGroup) TargetSize() (int, error) {
return *ng.targetSize, nil
}
// waitForClusterStatus keeps waiting until the cluster has reached a specified status or timeout occurs.
func (ng *NodeGroup) waitForClusterStatus(status string, timeout time.Duration) error {
klog.V(2).Infof("Waiting for cluster %s status", status)
for start := time.Now(); time.Since(start) < timeout; time.Sleep(ng.timeIncrement) {
clusterStatus, err := ng.huaweiCloudManager.getClusterStatus()
if err != nil {
return fmt.Errorf("error waiting for %s status: %v", status, err)
}
if clusterStatus == status {
klog.V(0).Infof("Cluster has reached %s status", status)
return nil
}
}
return fmt.Errorf("timeout (%v) waiting for %s status", timeout, status)
}
// IncreaseSize increases the size of the node group. To delete a node you need
// to explicitly name it and use DeleteNode. This function should wait until
// node group size is updated.
func (ng *NodeGroup) IncreaseSize(delta int) error {
ng.clusterUpdateMutex.Lock()
defer ng.clusterUpdateMutex.Unlock()
if delta <= 0 {
return fmt.Errorf("delta for increasing size should be positive")
}
currentSize, err := ng.huaweiCloudManager.nodeGroupSize(ng.nodePoolName)
if err != nil {
return fmt.Errorf("failed to get the size of the node pool: %v", err)
}
if currentSize+delta > ng.MaxSize() {
return fmt.Errorf("failed to increase the size of the node pool. target size above maximum. target size:%d, maximum size:%d", currentSize+delta, ng.MaxSize())
}
canUpdate, status, err := ng.huaweiCloudManager.canUpdate()
if err != nil {
return fmt.Errorf("failed to get status of the node pool: %v", err)
}
if !canUpdate {
return fmt.Errorf("cluster is in %s status, cannot increase the size the cluster now", status)
}
klog.V(0).Infof("Increasing the size of the node pool by %d: %d->%d", delta, *ng.targetSize, *ng.targetSize+delta)
*ng.targetSize += delta
err = ng.huaweiCloudManager.updateNodeCount(ng, *ng.targetSize)
if err != nil {
return fmt.Errorf("failed to update the size of the node pool: %v", err)
}
// wait until cluster become available
err = ng.waitForClusterStatus(clusterStatusAvailable, waitForCompleteStatusTimout)
if err != nil {
return fmt.Errorf("cluster failed to reach available status: %v", err)
}
// update internal cache
*ng.targetSize = currentSize + delta
return nil
}
// DeleteNodes deletes nodes from this node group. This function
// should wait until node group size is updated.
func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
// start the process of deleting nodes
ng.clusterUpdateMutex.Lock()
defer ng.clusterUpdateMutex.Unlock()
// check whether deleting the nodes will cause the size of the node pool below minimum size
// and update ng.nodesToDelete (as well as ng.nodePoolSizeTmp and ng.getNodePoolSizeTime if necessary)
currentSize, err := checkAndUpdate(ng, nodes)
if err != nil {
return err
}
// get the unique ids of the nodes to be deleted
nodeIDs, finished, err := getNodeIDsToDelete(ng)
if finished { // nodes have been deleted by other goroutine
return nil
}
if err != nil {
return err
}
// call REST API to delete the nodes
err = ng.huaweiCloudManager.deleteNodes(ng, nodeIDs, currentSize-len(nodes))
if err != nil {
return fmt.Errorf("fail to delete the nodes: %v", err)
}
// wait until cluster become available
err = ng.waitForClusterStatus(clusterStatusAvailable, waitForCompleteStatusTimout)
if err != nil {
return fmt.Errorf("cluster failed to reach available status: %v", err)
}
// update ng.targetSize
newSize, err := ng.huaweiCloudManager.nodeGroupSize(ng.nodePoolName)
if err != nil {
*ng.targetSize = currentSize - len(nodes)
return fmt.Errorf("failed to get node pool's size after deleting the nodes: %v", err)
}
*ng.targetSize = newSize
return nil
}
// checkAndUpdate checks whether deleting the nodes will cause the size of the node pool below minimum size,
// and updates ng.nodePoolSizeTmp, ng.getNodePoolSizeTime and ng.nodesToDelete if necessary.
func checkAndUpdate(ng *NodeGroup, nodes []*apiv1.Node) (int, error) {
// currentSize is used to evaluate whether it's valid to delete the nodes. If the time since last update isn't
// longer than updateWaitTime, ng.nodePoolSizeTmp will be used; otherwise, latest size of the node pool will be
// obtained and used by currentSize.
var currentSize int
var err error
if time.Since(ng.getNodePoolSizeTime) > updateWaitTime {
currentSize, err = ng.huaweiCloudManager.nodeGroupSize(ng.nodePoolName)
if err != nil {
return 0, fmt.Errorf("failed to get current node pool's size: %v", err)
}
// update ng.nodePoolSizeTmp and ng.getNodePoolSizeTime
ng.nodePoolSizeTmp = currentSize
ng.getNodePoolSizeTime = time.Now()
} else {
// use ng.nodePoolSizeTmp if the time since last update isn't longer than updateWaitTime
currentSize = ng.nodePoolSizeTmp
}
// evaluate whether it's valid to delete the nodes.
// make sure that deleting the nodes won't cause the size of node pool below minimum size
if currentSize-len(ng.nodesToDelete)-len(nodes) < ng.MinSize() {
return 0, fmt.Errorf("cannot delete the nodes now since the size of the node pool isn't sufficient to retain minimum size")
}
// update ng.nodesToDelete
ng.nodesToDelete = append(ng.nodesToDelete, nodes...)
return currentSize, nil
}
// getNodeIDsToDelete checks whether there're still nodes waiting for being deleted. If there're no nodes
// to delete, it will return true, representing that the process of deleting the nodes has been finished;
// otherwise, it will return a slice of node ids to be deleted.
func getNodeIDsToDelete(ng *NodeGroup) ([]string, bool, error) {
// check whether the nodes has already been deleted by other goroutine
// If current goroutine is not the first one to acquire the ng.clusterUpdateMutex,
// it's possible that the nodes have already been deleted, which makes ng.nodesToDelete to be empty.
if len(ng.nodesToDelete) == 0 {
return nil, true, nil
}
// check whether the cluster is available for deleting nodes
canUpdate, status, err := ng.huaweiCloudManager.canUpdate()
if err != nil {
return nil, false, fmt.Errorf("failed to check whether the cluster is available for updating: %v", err)
}
if !canUpdate {
return nil, false, fmt.Errorf("cluster is in %s status, cannot perform node deletion now", status)
}
nodeIDs := make([]string, 0)
for _, node := range ng.nodesToDelete {
// the node.Spec.ProviderID is the node's uid
klog.Infof("Delete node with node id: %s", node.Spec.ProviderID)
nodeIDs = append(nodeIDs, node.Spec.ProviderID)
}
ng.nodesToDelete = nil
return nodeIDs, false, nil
}
// DecreaseTargetSize decreases the target size of the node group. This function
// doesn't permit to delete any existing node and can be used only to reduce the
// request for new nodes that have not been yet fulfilled. Delta should be negative.
// It is assumed that cloud provider will not delete the existing nodes when there
// is an option to just decrease the target.
func (ng *NodeGroup) DecreaseTargetSize(delta int) error {
if delta >= 0 {
return fmt.Errorf("delta for decreasing target size should be negative")
}
klog.V(0).Infof("Target size is decreased by %d, %d->%d", delta, *ng.targetSize, *ng.targetSize+delta)
*ng.targetSize += delta
return ng.huaweiCloudManager.updateNodeCount(ng, *ng.targetSize)
}
// Id returns the node pool's name.
func (ng *NodeGroup) Id() string {
return ng.nodePoolName
}
// Debug returns a string containing information of the name, minimum size, maximum size and target size of the node pool
func (ng *NodeGroup) Debug() string {
return fmt.Sprintf("%s min=%d max=%d target=%d", ng.nodePoolName, ng.minNodeCount, ng.maxNodeCount, ng.targetSize)
}
// Nodes returns a list of all nodes that belong to this node group.
// It is required that Instance objects returned by this method have Id field set.
// Other fields are optional.
// This list should include also instances that might have not become a kubernetes node yet.
func (ng *NodeGroup) Nodes() ([]cloudprovider.Instance, error) {
nodes, err := ng.huaweiCloudManager.getNodes(ng.nodePoolName)
if err != nil {
return nil, fmt.Errorf("failed to get the nodes belong to the node pool: %v", err)
}
var instances []cloudprovider.Instance
for _, node := range nodes {
instances = append(instances, cloudprovider.Instance{Id: node})
}
return instances, nil
}
// TemplateNodeInfo returns a schedulerframework.NodeInfo structure of an empty
// (as if just started) node. This will be used in scale-up simulations to
// predict what would a new node look like if a node group was expanded. The returned
// NodeInfo is expected to have a fully populated Node object, with all of the labels,
// capacity and allocatable information as well as all pods that are started on
// the node by default, using manifest (most likely only kube-proxy). Not implemented
func (ng *NodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) {
return nil, cloudprovider.ErrNotImplemented
}
// Exist checks if the node group really exists on the cloud provider side. Currently always returns true.
func (ng *NodeGroup) Exist() bool {
return true
}
// Create creates the node group on the cloud provider side. Not implemented.
func (ng *NodeGroup) Create() (cloudprovider.NodeGroup, error) {
return nil, cloudprovider.ErrAlreadyExist
}
// Delete deletes the node group on the cloud provider side. Not implemented.
func (ng *NodeGroup) Delete() error {
return cloudprovider.ErrNotImplemented
}
// Autoprovisioned returns true if the node group is autoprovisioned. An autoprovisioned group
// was created by CA and can be deleted when scaled to 0. Currently always returns false.
func (ng *NodeGroup) Autoprovisioned() bool {
return false
}

View File

@ -1,170 +0,0 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package huaweicloud
import (
"github.com/stretchr/testify/assert"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
th "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/huaweicloud/huawei-cloud-sdk-go/testhelper"
"sync"
"testing"
"time"
)
const (
waitTimeStep = 100 * time.Millisecond
nodePoolName = "crc-nodepool"
nodePoolUID = "deb25e14-6558-11ea-97c2-0255ac101d4a"
minNodeCount = 1
maxNodeCount = 10
increaseSize = 1
decreaseSize = -1
nodePoolNodeCount = 5
nodeIdToBeDeleted = "c1b6ff0c-6ee1-11ea-befa-0255ac101d4c"
nodeNameToBeDeleted = "nodeToBeDeleted"
)
func createTestNodeGroup(manager *huaweicloudCloudManager) *NodeGroup {
size := nodePoolNodeCount
return &NodeGroup{
huaweiCloudManager: manager,
clusterUpdateMutex: &sync.Mutex{},
nodePoolName: nodePoolName,
nodePoolId: nodePoolUID,
clusterName: clusterUUID,
autoscalingEnabled: true,
minNodeCount: minNodeCount,
maxNodeCount: maxNodeCount,
targetSize: &size,
timeIncrement: waitTimeStep,
}
}
func TestNodeGroup_MaxSize(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
manager := createTestHuaweicloudManager()
ng := createTestNodeGroup(manager)
size := ng.MaxSize()
assert.Equal(t, maxNodeCount, size)
}
func TestNodeGroup_MinSize(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
manager := createTestHuaweicloudManager()
ng := createTestNodeGroup(manager)
size := ng.MinSize()
assert.Equal(t, minNodeCount, size)
}
func TestNodeGroup_TargetSize(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
manager := createTestHuaweicloudManager()
ng := createTestNodeGroup(manager)
size, err := ng.TargetSize()
assert.NoError(t, err)
assert.Equal(t, nodePoolNodeCount, size)
}
func TestNodeGroup_waitForClusterStatus(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
register()
manager := createTestHuaweicloudManager()
ng := createTestNodeGroup(manager)
err := ng.waitForClusterStatus(clusterStatusAvailable, waitForCompleteStatusTimout)
assert.NoError(t, err)
}
func TestNodeGroup_IncreaseSize(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
register()
manager := createTestHuaweicloudManager()
ng := createTestNodeGroup(manager)
err := ng.IncreaseSize(increaseSize)
assert.NoError(t, err)
curSize, _ := ng.TargetSize()
assert.Equal(t, nodePoolNodeCount+increaseSize, curSize)
}
func TestNodeGroup_DeleteNodes(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
register()
manager := createTestHuaweicloudManager()
ng := createTestNodeGroup(manager)
nodes := []*apiv1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNameToBeDeleted,
},
Spec: apiv1.NodeSpec{
ProviderID: nodeIdToBeDeleted,
},
},
}
isTestingDeleteNodes = true
getRequestCount = -1
err := ng.DeleteNodes(nodes)
assert.NoError(t, err)
curSize, _ := ng.TargetSize()
assert.Equal(t, nodePoolNodeCount+decreaseSize, curSize)
isTestingDeleteNodes = false
getRequestCount = 0
}
func TestNodeGroup_DecreaseTargetSize(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
register()
manager := createTestHuaweicloudManager()
ng := createTestNodeGroup(manager)
err := ng.DecreaseTargetSize(decreaseSize)
assert.NoError(t, err)
curSize, _ := ng.TargetSize()
assert.Equal(t, nodePoolNodeCount+decreaseSize, curSize)
}
func TestNodeGroup_Id(t *testing.T) {
th.CreateServer()
defer th.ShutDownServer()
manager := createTestHuaweicloudManager()
ng := createTestNodeGroup(manager)
name := ng.Id()
assert.Equal(t, nodePoolName, name)
}