diff --git a/cluster-autoscaler/cloudprovider/builder/builder_exoscale.go b/cluster-autoscaler/cloudprovider/builder/builder_exoscale.go index bd247b675f..5454ecaa69 100644 --- a/cluster-autoscaler/cloudprovider/builder/builder_exoscale.go +++ b/cluster-autoscaler/cloudprovider/builder/builder_exoscale.go @@ -2,7 +2,7 @@ // +build exoscale /* -Copyright 2020 The Kubernetes Authors. +Copyright 2021 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/cluster-autoscaler/cloudprovider/exoscale/README.md b/cluster-autoscaler/cloudprovider/exoscale/README.md index 4e9c6861ef..fbe10b0892 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/README.md +++ b/cluster-autoscaler/cloudprovider/exoscale/README.md @@ -1,7 +1,7 @@ # Cluster Autoscaler for Exoscale The Cluster Autoscaler (CA) for Exoscale scales worker nodes running in -Exoscale Instance Pools. +Exoscale SKS Nodepools or Instance Pools. ## Configuration @@ -20,7 +20,8 @@ the CA *Deployment*. First, start by exporting the Exoscale API credentials (we recommend that you create dedicated API credentials using the [Exoscale IAM][exo-iam] service) to -provide to the CA in your shell: +provide to the CA in your shell, as well as the zone the target Kubernetes +cluster is located in: ```sh export EXOSCALE_API_KEY="EXOxxxxxxxxxxxxxxxxxxxxxxxx" @@ -33,13 +34,17 @@ Next, run the following command from the same shell: ./examples/generate-secret.sh ``` -Finally, ensure that the `exoscale-secret` *Secret* has been created +Next, ensure that the `exoscale-api-credentials` *Secret* has been created successfully by running the following command: ``` -kubectl get secret --namespace kube-system exoscale-credentials +kubectl get secret --namespace kube-system exoscale-api-credentials ``` +Finally, a `EXOSCALE_ZONE` variable must be set to the target Kubernetes +cluster zone along with the API credentials in the CA *Deployment* shell +environment. + ### Deploying the Cluster Autoscaler diff --git a/cluster-autoscaler/cloudprovider/exoscale/examples/cluster-autoscaler-run-on-control-plane.yaml b/cluster-autoscaler/cloudprovider/exoscale/examples/cluster-autoscaler-run-on-control-plane.yaml index 0ed916f761..b903ac1810 100644 --- a/cluster-autoscaler/cloudprovider/exoscale/examples/cluster-autoscaler-run-on-control-plane.yaml +++ b/cluster-autoscaler/cloudprovider/exoscale/examples/cluster-autoscaler-run-on-control-plane.yaml @@ -143,11 +143,10 @@ spec: operator: "Equal" value: "true" key: node-role.kubernetes.io/master - nodeSelector: - kubernetes.io/role: master containers: - - image: k8s.gcr.io/autoscaling/cluster-autoscaler:latest - name: cluster-autoscaler + - name: cluster-autoscaler + image: exoscale/cluster-autoscaler:latest + imagePullPolicy: "Always" resources: limits: cpu: 100m @@ -156,40 +155,30 @@ spec: cpu: 100m memory: 300Mi command: - - ./cluster-autoscaler + - /cluster-autoscaler - --cloud-provider=exoscale - --stderrthreshold=info - #- --scale-down-delay-after-add=1m # For development - #- --scale-down-unneeded-time=1m # For development - #- --unremovable-node-recheck-timeout=1m # For development + #- --scale-down-delay-after-add=30s + #- --scale-down-unneeded-time=30s + #- --unremovable-node-recheck-timeout=30s env: - name: EXOSCALE_API_KEY valueFrom: secretKeyRef: key: api-key - name: exoscale-credentials + name: exoscale-api-credentials - name: EXOSCALE_API_SECRET valueFrom: secretKeyRef: key: api-secret - name: exoscale-credentials - - name: EXOSCALE_API_ENDPOINT - valueFrom: - secretKeyRef: - key: api-endpoint - name: exoscale-credentials + name: exoscale-api-credentials + - name: EXOSCALE_ZONE + value: de-fra-1 # Change this to match your actual Kubernetes cluster zone volumeMounts: - name: ssl-certs mountPath: /etc/ssl/certs/ca-certificates.crt readOnly: true - - name: cloud-config - mountPath: /config - readOnly: true - imagePullPolicy: "Always" volumes: - name: ssl-certs hostPath: path: "/etc/ssl/certs/ca-certificates.crt" - - name: cloud-config - secret: - secretName: cluster-autoscaler-cloud-config diff --git a/cluster-autoscaler/cloudprovider/exoscale/examples/generate-secret.sh b/cluster-autoscaler/cloudprovider/exoscale/examples/generate-secret.sh index 0761b8f492..b4d9345227 100755 --- a/cluster-autoscaler/cloudprovider/exoscale/examples/generate-secret.sh +++ b/cluster-autoscaler/cloudprovider/exoscale/examples/generate-secret.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Copyright 2020 The Kubernetes Authors. +# Copyright 2021 The Kubernetes Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,11 +17,10 @@ cat < n.MaxSize() { - return fmt.Errorf("size increase is too large. current: %d desired: %d max: %d", - n.instancePool.Size, targetSize, n.MaxSize()) - } - - ctx := context.Background() - - klog.V(4).Infof("Scaling Instance Pool %s to %d", n.instancePool.ID, targetSize) - - _, err := n.manager.client.RequestWithContext(ctx, egoscale.ScaleInstancePool{ - ID: n.instancePool.ID, - ZoneID: n.instancePool.ZoneID, - Size: targetSize, - }) - if err != nil { - return err - } - - if err := n.waitInstancePoolRunning(ctx); err != nil { - return err - } - - n.instancePool.Size = targetSize - - return nil -} - -// DeleteNodes deletes nodes from this node group. Error is returned either on -// failure or if the given node doesn't belong to this node group. This function -// should wait until node group size is updated. Implementation required. -func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { - var instanceIDs []egoscale.UUID - for _, node := range nodes { - nodeID := node.Spec.ProviderID - - uuid, err := egoscale.ParseUUID(toNodeID(nodeID)) - if err != nil { - return err - } - - instanceIDs = append(instanceIDs, *uuid) - } - - ctx := context.Background() - - n.Lock() - defer n.Unlock() - - if err := n.waitInstancePoolRunning(ctx); err != nil { - return err - } - - klog.V(4).Infof("Evicting Instance Pool %s members: %v", n.instancePool.ID, instanceIDs) - - err := n.manager.client.BooleanRequest(egoscale.EvictInstancePoolMembers{ - ID: n.instancePool.ID, - ZoneID: n.instancePool.ZoneID, - MemberIDs: instanceIDs, - }) - if err != nil { - return err - } - - if err := n.waitInstancePoolRunning(ctx); err != nil { - return err - } - - n.instancePool.Size = n.instancePool.Size - len(instanceIDs) - - return nil -} - -// DecreaseTargetSize decreases the target size of the node group. This function -// doesn't permit to delete any existing node and can be used only to reduce the -// request for new nodes that have not been yet fulfilled. Delta should be negative. -// It is assumed that cloud provider will not delete the existing nodes when there -// is an option to just decrease the target. Implementation required. -func (n *NodeGroup) DecreaseTargetSize(_ int) error { - // Exoscale Instance Pools don't support down-sizing without deleting members, - // so it is not possible to implement it according to the documented behavior. - return nil -} - -// Id returns an unique identifier of the node group. -func (n *NodeGroup) Id() string { - return n.id -} - -// Debug returns a string containing all information regarding this node group. -func (n *NodeGroup) Debug() string { - return fmt.Sprintf("Node group ID: %s (min:%d max:%d)", n.Id(), n.MinSize(), n.MaxSize()) -} - -// Nodes returns a list of all nodes that belong to this node group. -// It is required that Instance objects returned by this method have Id field set. -// Other fields are optional. -// This list should include also instances that might have not become a kubernetes node yet. -func (n *NodeGroup) Nodes() ([]cloudprovider.Instance, error) { - if n.instancePool == nil { - return nil, errors.New("instance pool instance is not created") - } - - instances := make([]cloudprovider.Instance, 0, len(n.instancePool.VirtualMachines)) - for _, vm := range n.instancePool.VirtualMachines { - instances = append(instances, toInstance(vm)) - } - - return instances, nil -} - -// TemplateNodeInfo returns a schedulerframework.NodeInfo structure of an empty -// (as if just started) node. This will be used in scale-up simulations to -// predict what would a new node look like if a node group was expanded. The returned -// NodeInfo is expected to have a fully populated Node object, with all of the labels, -// capacity and allocatable information as well as all pods that are started on -// the node by default, using manifest (most likely only kube-proxy). Implementation optional. -func (n *NodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { - return nil, cloudprovider.ErrNotImplemented -} - -// Exist checks if the node group really exists on the cloud provider side. Allows to tell the -// theoretical node group from the real one. Implementation required. -func (n *NodeGroup) Exist() bool { - return n.instancePool != nil -} - -// Create creates the node group on the cloud provider side. Implementation optional. -func (n *NodeGroup) Create() (cloudprovider.NodeGroup, error) { - return nil, cloudprovider.ErrNotImplemented -} - -// Delete deletes the node group on the cloud provider side. -// This will be executed only for autoprovisioned node groups, once their size drops to 0. -// Implementation optional. -func (n *NodeGroup) Delete() error { - return cloudprovider.ErrNotImplemented -} - -// Autoprovisioned returns true if the node group is autoprovisioned. An autoprovisioned group -// was created by CA and can be deleted when scaled to 0. -func (n *NodeGroup) Autoprovisioned() bool { - return false -} - -// GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular -// NodeGroup. Returning a nil will result in using default options. -func (n *NodeGroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { - return nil, cloudprovider.ErrNotImplemented -} - -// toInstance converts the given egoscale.VirtualMachine to a -// cloudprovider.Instance -func toInstance(vm egoscale.VirtualMachine) cloudprovider.Instance { - return cloudprovider.Instance{ - Id: toProviderID(vm.ID.String()), - Status: toInstanceStatus(egoscale.VirtualMachineState(vm.State)), - } -} - -// toInstanceStatus converts the given egoscale.VirtualMachineState to a -// cloudprovider.InstanceStatus -func toInstanceStatus(vmState egoscale.VirtualMachineState) *cloudprovider.InstanceStatus { - if vmState == "" { - return nil - } - - st := &cloudprovider.InstanceStatus{} - switch vmState { - case egoscale.VirtualMachineStarting: - st.State = cloudprovider.InstanceCreating - case egoscale.VirtualMachineRunning: - st.State = cloudprovider.InstanceRunning - case egoscale.VirtualMachineStopping: - st.State = cloudprovider.InstanceDeleting - default: - st.ErrorInfo = &cloudprovider.InstanceErrorInfo{ - ErrorClass: cloudprovider.OtherErrorClass, - ErrorCode: "no-code-exoscale", - ErrorMessage: "error", - } - } - - return st -} - -func (n *NodeGroup) waitInstancePoolRunning(ctx context.Context) error { - err := n.poller( - ctx, - egoscale.GetInstancePool{ID: n.instancePool.ID, ZoneID: n.instancePool.ZoneID}, - func(i interface{}, err error) (bool, error) { - if err != nil { - return false, err - } - - if i.(*egoscale.GetInstancePoolResponse).InstancePools[0].State == - egoscale.InstancePoolRunning { - return true, nil - } - - return false, nil - }, - ) - - return err -} - -func (n *NodeGroup) poller(ctx context.Context, req egoscale.Command, callback func(interface{}, error) (bool, error)) error { - timeout := time.Minute * 10 - c, cancel := context.WithTimeout(ctx, timeout) - defer cancel() - - for t := time.Tick(time.Second * 10); ; { // nolint: staticcheck - g, err := n.manager.client.RequestWithContext(c, req) - ok, err := callback(g, err) - if err != nil { - return err - } - if ok { - return nil - } - - select { - case <-c.Done(): - return fmt.Errorf("context timeout after: %v", timeout) - case <-t: - continue - } - } -} diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool.go new file mode 100644 index 0000000000..ec2fa647f8 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool.go @@ -0,0 +1,222 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exoscale + +import ( + "context" + "errors" + "fmt" + "sync" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + egoscale "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale/internal/github.com/exoscale/egoscale/v2" + "k8s.io/autoscaler/cluster-autoscaler/config" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +// instancePoolNodeGroup implements cloudprovider.NodeGroup interface for Exoscale Instance Pools. +type instancePoolNodeGroup struct { + instancePool *egoscale.InstancePool + + m *Manager + + sync.Mutex +} + +var errNoInstancePool = errors.New("not an Instance Pool member") + +// MaxSize returns maximum size of the node group. +func (n *instancePoolNodeGroup) MaxSize() int { + limit, err := n.m.computeInstanceQuota() + if err != nil { + return 0 + } + + return limit +} + +// MinSize returns minimum size of the node group. +func (n *instancePoolNodeGroup) MinSize() int { + return 1 +} + +// TargetSize returns the current target size of the node group. It is possible that the +// number of nodes in Kubernetes is different at the moment but should be equal +// to Size() once everything stabilizes (new nodes finish startup and registration or +// removed nodes are deleted completely). Implementation required. +func (n *instancePoolNodeGroup) TargetSize() (int, error) { + return int(*n.instancePool.Size), nil +} + +// IncreaseSize increases the size of the node group. To delete a node you need +// to explicitly name it and use DeleteNode. This function should wait until +// node group size is updated. Implementation required. +func (n *instancePoolNodeGroup) IncreaseSize(delta int) error { + if delta <= 0 { + return fmt.Errorf("delta must be positive, have: %d", delta) + } + + targetSize := *n.instancePool.Size + int64(delta) + + if targetSize > int64(n.MaxSize()) { + return fmt.Errorf("size increase is too large (current: %d desired: %d max: %d)", + *n.instancePool.Size, targetSize, n.MaxSize()) + } + + infof("scaling Instance Pool %s to size %d", *n.instancePool.ID, targetSize) + + if err := n.m.client.ScaleInstancePool(n.m.ctx, n.m.zone, n.instancePool, targetSize); err != nil { + return err + } + + if err := n.waitUntilRunning(n.m.ctx); err != nil { + return err + } + + n.instancePool.Size = &targetSize + + return nil +} + +// DeleteNodes deletes nodes from this node group. Error is returned either on +// failure or if the given node doesn't belong to this node group. This function +// should wait until node group size is updated. Implementation required. +func (n *instancePoolNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { + n.Lock() + defer n.Unlock() + + if err := n.waitUntilRunning(n.m.ctx); err != nil { + return err + } + + instanceIDs := make([]string, len(nodes)) + for i, node := range nodes { + instanceIDs[i] = toNodeID(node.Spec.ProviderID) + } + + infof("evicting Instance Pool %s members: %v", *n.instancePool.ID, instanceIDs) + + if err := n.m.client.EvictInstancePoolMembers(n.m.ctx, n.m.zone, n.instancePool, instanceIDs); err != nil { + errorf("unable to evict instances from Instance Pool %s: %v", *n.instancePool.ID, err) + return err + } + + if err := n.waitUntilRunning(n.m.ctx); err != nil { + return err + } + + newSize := *n.instancePool.Size - int64(len(instanceIDs)) + n.instancePool.Size = &newSize + + return nil +} + +// DecreaseTargetSize decreases the target size of the node group. This function +// doesn't permit to delete any existing node and can be used only to reduce the +// request for new nodes that have not been yet fulfilled. Delta should be negative. +// It is assumed that cloud provider will not delete the existing nodes when there +// is an option to just decrease the target. Implementation required. +func (n *instancePoolNodeGroup) DecreaseTargetSize(_ int) error { + // Exoscale Instance Pools don't support down-sizing without deleting members, + // so it is not possible to implement it according to the documented behavior. + return nil +} + +// Id returns an unique identifier of the node group. +func (n *instancePoolNodeGroup) Id() string { + return *n.instancePool.ID +} + +// Debug returns a string containing all information regarding this node group. +func (n *instancePoolNodeGroup) Debug() string { + return fmt.Sprintf("Node group ID: %s (min:%d max:%d)", n.Id(), n.MinSize(), n.MaxSize()) +} + +// Nodes returns a list of all nodes that belong to this node group. +// It is required that Instance objects returned by this method have Id field set. +// Other fields are optional. +// This list should include also instances that might have not become a kubernetes node yet. +func (n *instancePoolNodeGroup) Nodes() ([]cloudprovider.Instance, error) { + nodes := make([]cloudprovider.Instance, len(*n.instancePool.InstanceIDs)) + for i, id := range *n.instancePool.InstanceIDs { + instance, err := n.m.client.GetInstance(n.m.ctx, n.m.zone, id) + if err != nil { + errorf("unable to retrieve Compute instance %s: %v", id, err) + return nil, err + } + nodes[i] = toInstance(instance) + } + + return nodes, nil +} + +// TemplateNodeInfo returns a schedulerframework.NodeInfo structure of an empty +// (as if just started) node. This will be used in scale-up simulations to +// predict what would a new node look like if a node group was expanded. The returned +// NodeInfo is expected to have a fully populated Node object, with all of the labels, +// capacity and allocatable information as well as all pods that are started on +// the node by default, using manifest (most likely only kube-proxy). Implementation optional. +func (n *instancePoolNodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Exist checks if the node group really exists on the cloud provider side. Allows to tell the +// theoretical node group from the real one. Implementation required. +func (n *instancePoolNodeGroup) Exist() bool { + return n.instancePool != nil +} + +// Create creates the node group on the cloud provider side. Implementation optional. +func (n *instancePoolNodeGroup) Create() (cloudprovider.NodeGroup, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Delete deletes the node group on the cloud provider side. +// This will be executed only for autoprovisioned node groups, once their size drops to 0. +// Implementation optional. +func (n *instancePoolNodeGroup) Delete() error { + return cloudprovider.ErrNotImplemented +} + +// Autoprovisioned returns true if the node group is autoprovisioned. An autoprovisioned group +// was created by CA and can be deleted when scaled to 0. +func (n *instancePoolNodeGroup) Autoprovisioned() bool { + return false +} + +// GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular +// instancePoolNodeGroup. Returning a nil will result in using default options. +func (n *instancePoolNodeGroup) GetOptions(_ config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { + return nil, cloudprovider.ErrNotImplemented +} + +func (n *instancePoolNodeGroup) waitUntilRunning(ctx context.Context) error { + return pollCmd(ctx, func() (bool, error) { + instancePool, err := n.m.client.GetInstancePool(ctx, n.m.zone, n.Id()) + if err != nil { + errorf("unable to retrieve Instance Pool %s: %s", n.Id(), err) + return false, err + } + + if *instancePool.State == "running" { + return true, nil + } + + return false, nil + }) +} diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool_test.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool_test.go new file mode 100644 index 0000000000..1aa19511b9 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_instance_pool_test.go @@ -0,0 +1,202 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exoscale + +import ( + "github.com/stretchr/testify/mock" + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + egoscale "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale/internal/github.com/exoscale/egoscale/v2" +) + +func (ts *cloudProviderTestSuite) TestInstancePoolNodeGroup_MaxSize() { + ts.p.manager.client.(*exoscaleClientMock). + On("GetQuota", ts.p.manager.ctx, ts.p.manager.zone, "instance"). + Return( + &egoscale.Quota{ + Resource: &testComputeInstanceQuotaName, + Usage: &testComputeInstanceQuotaUsage, + Limit: &testComputeInstanceQuotaLimit, + }, + nil, + ) + + nodeGroup := &instancePoolNodeGroup{ + instancePool: &egoscale.InstancePool{ + ID: &testInstancePoolID, + Name: &testInstancePoolName, + }, + m: ts.p.manager, + } + + ts.Require().Equal(int(testComputeInstanceQuotaLimit), nodeGroup.MaxSize()) +} + +func (ts *cloudProviderTestSuite) TestInstancePoolNodeGroup_MinSize() { + nodeGroup := &instancePoolNodeGroup{ + instancePool: &egoscale.InstancePool{ + ID: &testInstancePoolID, + Name: &testInstancePoolName, + }, + m: ts.p.manager, + } + + ts.Require().Equal(1, nodeGroup.MinSize()) +} + +func (ts *cloudProviderTestSuite) TestInstancePoolNodeGroup_TargetSize() { + nodeGroup := &instancePoolNodeGroup{ + instancePool: &egoscale.InstancePool{ + ID: &testInstancePoolID, + Name: &testInstancePoolName, + Size: &testInstancePoolSize, + }, + m: ts.p.manager, + } + + actual, err := nodeGroup.TargetSize() + ts.Require().NoError(err) + ts.Require().Equal(int(testInstancePoolSize), actual) +} + +func (ts *cloudProviderTestSuite) TestInstancePoolNodeGroup_IncreaseSize() { + ts.p.manager.client.(*exoscaleClientMock). + On("GetQuota", ts.p.manager.ctx, ts.p.manager.zone, "instance"). + Return( + &egoscale.Quota{ + Resource: &testComputeInstanceQuotaName, + Usage: &testComputeInstanceQuotaUsage, + Limit: &testComputeInstanceQuotaLimit, + }, + nil, + ) + + ts.p.manager.client.(*exoscaleClientMock). + On("ScaleInstancePool", ts.p.manager.ctx, ts.p.manager.zone, mock.Anything, mock.Anything). + Return(nil) + + ts.p.manager.client.(*exoscaleClientMock). + On("GetInstancePool", ts.p.manager.ctx, ts.p.manager.zone, testInstancePoolID). + Return(&egoscale.InstancePool{ + ID: &testInstancePoolID, + Name: &testInstancePoolName, + Size: &testInstancePoolSize, + State: &testInstancePoolState, + }, nil) + + nodeGroup := &instancePoolNodeGroup{ + instancePool: &egoscale.InstancePool{ + ID: &testInstancePoolID, + Name: &testInstancePoolName, + Size: &testInstancePoolSize, + }, + m: ts.p.manager, + } + + ts.Require().NoError(nodeGroup.IncreaseSize(int(testInstancePoolSize + 1))) + + // Test size increase failure if beyond current limits: + ts.Require().Error(nodeGroup.IncreaseSize(1000)) +} + +func (ts *cloudProviderTestSuite) TestInstancePoolNodeGroup_DeleteNodes() { + ts.p.manager.client.(*exoscaleClientMock). + On( + "EvictInstancePoolMembers", + ts.p.manager.ctx, + ts.p.manager.zone, + mock.Anything, + mock.Anything, + mock.Anything, + ). + Return(nil) + + ts.p.manager.client.(*exoscaleClientMock). + On("GetInstancePool", ts.p.manager.ctx, ts.p.manager.zone, testInstancePoolID). + Return(&egoscale.InstancePool{ + ID: &testInstancePoolID, + Name: &testInstancePoolName, + Size: &testInstancePoolSize, + State: &testInstancePoolState, + }, nil) + + node := &apiv1.Node{ + Spec: apiv1.NodeSpec{ + ProviderID: toProviderID(testInstanceID), + }, + } + + nodeGroup := &instancePoolNodeGroup{ + instancePool: &egoscale.InstancePool{ + ID: &testInstancePoolID, + Name: &testInstancePoolName, + Size: &testInstancePoolSize, + }, + m: ts.p.manager, + } + + ts.Require().NoError(nodeGroup.DeleteNodes([]*apiv1.Node{node})) +} + +func (ts *cloudProviderTestSuite) TestInstancePoolNodeGroup_Id() { + nodeGroup := &instancePoolNodeGroup{ + instancePool: &egoscale.InstancePool{ + ID: &testInstancePoolID, + Name: &testInstancePoolName, + Size: &testInstancePoolSize, + }, + m: ts.p.manager, + } + + ts.Require().Equal(testInstancePoolID, nodeGroup.Id()) +} + +func (ts *cloudProviderTestSuite) TestInstancePoolNodeGroup_Nodes() { + ts.p.manager.client.(*exoscaleClientMock). + On("GetInstance", ts.p.manager.ctx, ts.p.manager.zone, testInstanceID). + Return(&egoscale.Instance{ + ID: &testInstanceID, + State: &testInstanceState, + }, nil) + + nodeGroup := &instancePoolNodeGroup{ + instancePool: &egoscale.InstancePool{ + ID: &testInstancePoolID, + InstanceIDs: &[]string{testInstanceID}, + Name: &testInstancePoolName, + Size: &testInstancePoolSize, + }, + m: ts.p.manager, + } + + instances, err := nodeGroup.Nodes() + ts.Require().NoError(err) + ts.Require().Len(instances, 1) + ts.Require().Equal(testInstanceID, toNodeID(instances[0].Id)) + ts.Require().Equal(cloudprovider.InstanceRunning, instances[0].Status.State) +} + +func (ts *cloudProviderTestSuite) TestInstancePoolNodeGroup_Exist() { + nodeGroup := &instancePoolNodeGroup{ + instancePool: &egoscale.InstancePool{ + ID: &testInstancePoolID, + }, + m: ts.p.manager, + } + + ts.Require().True(nodeGroup.Exist()) +} diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go new file mode 100644 index 0000000000..91b6631b85 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool.go @@ -0,0 +1,237 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exoscale + +import ( + "context" + "fmt" + "sync" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + egoscale "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale/internal/github.com/exoscale/egoscale/v2" + "k8s.io/autoscaler/cluster-autoscaler/config" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +// sksNodepoolNodeGroup implements cloudprovider.NodeGroup interface for Exoscale SKS Nodepools. +type sksNodepoolNodeGroup struct { + sksNodepool *egoscale.SKSNodepool + sksCluster *egoscale.SKSCluster + + m *Manager + + sync.Mutex +} + +// MaxSize returns maximum size of the node group. +func (n *sksNodepoolNodeGroup) MaxSize() int { + limit, err := n.m.computeInstanceQuota() + if err != nil { + return 0 + } + + return limit +} + +// MinSize returns minimum size of the node group. +func (n *sksNodepoolNodeGroup) MinSize() int { + return 1 +} + +// TargetSize returns the current target size of the node group. It is possible that the +// number of nodes in Kubernetes is different at the moment but should be equal +// to Size() once everything stabilizes (new nodes finish startup and registration or +// removed nodes are deleted completely). Implementation required. +func (n *sksNodepoolNodeGroup) TargetSize() (int, error) { + return int(*n.sksNodepool.Size), nil +} + +// IncreaseSize increases the size of the node group. To delete a node you need +// to explicitly name it and use DeleteNode. This function should wait until +// node group size is updated. Implementation required. +func (n *sksNodepoolNodeGroup) IncreaseSize(delta int) error { + if delta <= 0 { + return fmt.Errorf("delta must be positive, have: %d", delta) + } + + targetSize := *n.sksNodepool.Size + int64(delta) + + if targetSize > int64(n.MaxSize()) { + return fmt.Errorf("size increase is too large (current: %d desired: %d max: %d)", + *n.sksNodepool.Size, targetSize, n.MaxSize()) + } + + infof("scaling SKS Nodepool %s to size %d", *n.sksNodepool.ID, targetSize) + + if err := n.m.client.ScaleSKSNodepool(n.m.ctx, n.m.zone, n.sksCluster, n.sksNodepool, targetSize); err != nil { + errorf("unable to scale SKS Nodepool %s: %v", *n.sksNodepool.ID, err) + return err + } + + if err := n.waitUntilRunning(n.m.ctx); err != nil { + return err + } + + n.sksNodepool.Size = &targetSize + + return nil +} + +// DeleteNodes deletes nodes from this node group. Error is returned either on +// failure or if the given node doesn't belong to this node group. This function +// should wait until node group size is updated. Implementation required. +func (n *sksNodepoolNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { + n.Lock() + defer n.Unlock() + + if err := n.waitUntilRunning(n.m.ctx); err != nil { + return err + } + + instanceIDs := make([]string, len(nodes)) + for i, node := range nodes { + instanceIDs[i] = toNodeID(node.Spec.ProviderID) + } + + infof("evicting SKS Nodepool %s members: %v", *n.sksNodepool.ID, instanceIDs) + + if err := n.m.client.EvictSKSNodepoolMembers( + n.m.ctx, + n.m.zone, + n.sksCluster, + n.sksNodepool, + instanceIDs, + ); err != nil { + errorf("unable to evict instances from SKS Nodepool %s: %v", *n.sksNodepool.ID, err) + return err + } + + if err := n.waitUntilRunning(n.m.ctx); err != nil { + return err + } + + newSize := *n.sksNodepool.Size - int64(len(instanceIDs)) + n.sksNodepool.Size = &newSize + + return nil +} + +// DecreaseTargetSize decreases the target size of the node group. This function +// doesn't permit to delete any existing node and can be used only to reduce the +// request for new nodes that have not been yet fulfilled. Delta should be negative. +// It is assumed that cloud provider will not delete the existing nodes when there +// is an option to just decrease the target. Implementation required. +func (n *sksNodepoolNodeGroup) DecreaseTargetSize(_ int) error { + // Exoscale Instance Pools don't support down-sizing without deleting members, + // so it is not possible to implement it according to the documented behavior. + return nil +} + +// Id returns an unique identifier of the node group. +func (n *sksNodepoolNodeGroup) Id() string { + return *n.sksNodepool.InstancePoolID +} + +// Debug returns a string containing all information regarding this node group. +func (n *sksNodepoolNodeGroup) Debug() string { + return fmt.Sprintf("Node group ID: %s (min:%d max:%d)", n.Id(), n.MinSize(), n.MaxSize()) +} + +// Nodes returns a list of all nodes that belong to this node group. +// It is required that Instance objects returned by this method have Id field set. +// Other fields are optional. +// This list should include also instances that might have not become a kubernetes node yet. +func (n *sksNodepoolNodeGroup) Nodes() ([]cloudprovider.Instance, error) { + instancePool, err := n.m.client.GetInstancePool(n.m.ctx, n.m.zone, *n.sksNodepool.InstancePoolID) + if err != nil { + errorf( + "unable to retrieve Instance Pool %s managed by SKS Nodepool %s", + *n.sksNodepool.InstancePoolID, + *n.sksNodepool.ID, + ) + return nil, err + } + + nodes := make([]cloudprovider.Instance, len(*instancePool.InstanceIDs)) + for i, id := range *instancePool.InstanceIDs { + instance, err := n.m.client.GetInstance(n.m.ctx, n.m.zone, id) + if err != nil { + errorf("unable to retrieve Compute instance %s: %v", id, err) + return nil, err + } + nodes[i] = toInstance(instance) + } + + return nodes, nil +} + +// TemplateNodeInfo returns a schedulerframework.NodeInfo structure of an empty +// (as if just started) node. This will be used in scale-up simulations to +// predict what would a new node look like if a node group was expanded. The returned +// NodeInfo is expected to have a fully populated Node object, with all of the labels, +// capacity and allocatable information as well as all pods that are started on +// the node by default, using manifest (most likely only kube-proxy). Implementation optional. +func (n *sksNodepoolNodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Exist checks if the node group really exists on the cloud provider side. Allows to tell the +// theoretical node group from the real one. Implementation required. +func (n *sksNodepoolNodeGroup) Exist() bool { + return n.sksNodepool != nil +} + +// Create creates the node group on the cloud provider side. Implementation optional. +func (n *sksNodepoolNodeGroup) Create() (cloudprovider.NodeGroup, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Delete deletes the node group on the cloud provider side. +// This will be executed only for autoprovisioned node groups, once their size drops to 0. +// Implementation optional. +func (n *sksNodepoolNodeGroup) Delete() error { + return cloudprovider.ErrNotImplemented +} + +// Autoprovisioned returns true if the node group is autoprovisioned. An autoprovisioned group +// was created by CA and can be deleted when scaled to 0. +func (n *sksNodepoolNodeGroup) Autoprovisioned() bool { + return false +} + +// GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular +// sksNodepoolNodeGroup. Returning a nil will result in using default options. +func (n *sksNodepoolNodeGroup) GetOptions(_ config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { + return nil, cloudprovider.ErrNotImplemented +} + +func (n *sksNodepoolNodeGroup) waitUntilRunning(ctx context.Context) error { + return pollCmd(ctx, func() (bool, error) { + instancePool, err := n.m.client.GetInstancePool(ctx, n.m.zone, n.Id()) + if err != nil { + errorf("unable to retrieve Instance Pool %s: %s", n.Id(), err) + return false, err + } + + if *instancePool.State == "running" { + return true, nil + } + + return false, nil + }) +} diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool_test.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool_test.go new file mode 100644 index 0000000000..9cc0d10134 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_sks_nodepool_test.go @@ -0,0 +1,254 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exoscale + +import ( + "github.com/stretchr/testify/mock" + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + egoscale "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale/internal/github.com/exoscale/egoscale/v2" +) + +func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_MaxSize() { + ts.p.manager.client.(*exoscaleClientMock). + On("GetQuota", ts.p.manager.ctx, ts.p.manager.zone, "instance"). + Return( + &egoscale.Quota{ + Resource: &testComputeInstanceQuotaName, + Usage: &testComputeInstanceQuotaUsage, + Limit: &testComputeInstanceQuotaLimit, + }, + nil, + ) + + nodeGroup := &sksNodepoolNodeGroup{ + sksNodepool: &egoscale.SKSNodepool{ + ID: &testSKSNodepoolID, + Name: &testSKSNodepoolName, + }, + sksCluster: &egoscale.SKSCluster{ + ID: &testSKSClusterID, + Name: &testSKSClusterName, + }, + m: ts.p.manager, + } + + ts.Require().Equal(int(testComputeInstanceQuotaLimit), nodeGroup.MaxSize()) +} + +func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_MinSize() { + nodeGroup := &sksNodepoolNodeGroup{ + sksNodepool: &egoscale.SKSNodepool{ + ID: &testSKSNodepoolID, + Name: &testSKSNodepoolName, + }, + sksCluster: &egoscale.SKSCluster{ + ID: &testSKSClusterID, + Name: &testSKSClusterName, + }, + m: ts.p.manager, + } + + ts.Require().Equal(1, nodeGroup.MinSize()) +} + +func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_TargetSize() { + nodeGroup := &sksNodepoolNodeGroup{ + sksNodepool: &egoscale.SKSNodepool{ + ID: &testSKSNodepoolID, + Name: &testSKSNodepoolName, + Size: &testSKSNodepoolSize, + }, + sksCluster: &egoscale.SKSCluster{ + ID: &testSKSClusterID, + Name: &testSKSClusterName, + }, + m: ts.p.manager, + } + + actual, err := nodeGroup.TargetSize() + ts.Require().NoError(err) + ts.Require().Equal(int(testInstancePoolSize), actual) +} + +func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_IncreaseSize() { + ts.p.manager.client.(*exoscaleClientMock). + On("GetQuota", ts.p.manager.ctx, ts.p.manager.zone, "instance"). + Return( + &egoscale.Quota{ + Resource: &testComputeInstanceQuotaName, + Usage: &testComputeInstanceQuotaUsage, + Limit: &testComputeInstanceQuotaLimit, + }, + nil, + ) + + ts.p.manager.client.(*exoscaleClientMock). + On( + "ScaleSKSNodepool", + ts.p.manager.ctx, + ts.p.manager.zone, + mock.Anything, + mock.Anything, + mock.Anything, + ). + Return(nil) + + ts.p.manager.client.(*exoscaleClientMock). + On("GetInstancePool", ts.p.manager.ctx, ts.p.manager.zone, testInstancePoolID). + Return(&egoscale.InstancePool{ + ID: &testInstancePoolID, + Name: &testInstancePoolName, + Size: &testInstancePoolSize, + State: &testInstancePoolState, + }, nil) + + nodeGroup := &sksNodepoolNodeGroup{ + sksNodepool: &egoscale.SKSNodepool{ + ID: &testSKSNodepoolID, + InstancePoolID: &testInstancePoolID, + Name: &testSKSNodepoolName, + Size: &testSKSNodepoolSize, + }, + sksCluster: &egoscale.SKSCluster{ + ID: &testSKSClusterID, + Name: &testSKSClusterName, + }, + m: ts.p.manager, + } + + ts.Require().NoError(nodeGroup.IncreaseSize(int(testInstancePoolSize + 1))) + + // Test size increase failure if beyond current limits: + ts.Require().Error(nodeGroup.IncreaseSize(1000)) +} + +func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_DeleteNodes() { + ts.p.manager.client.(*exoscaleClientMock). + On( + "EvictSKSNodepoolMembers", + ts.p.manager.ctx, + ts.p.manager.zone, + mock.Anything, + mock.Anything, + mock.Anything, + mock.Anything, + ). + Return(nil) + + ts.p.manager.client.(*exoscaleClientMock). + On("GetInstancePool", ts.p.manager.ctx, ts.p.manager.zone, testInstancePoolID). + Return(&egoscale.InstancePool{ + ID: &testInstancePoolID, + Name: &testInstancePoolName, + Size: &testInstancePoolSize, + State: &testInstancePoolState, + }, nil) + + node := &apiv1.Node{ + Spec: apiv1.NodeSpec{ + ProviderID: toProviderID(testInstanceID), + }, + } + + nodeGroup := &sksNodepoolNodeGroup{ + sksNodepool: &egoscale.SKSNodepool{ + ID: &testSKSNodepoolID, + InstancePoolID: &testInstancePoolID, + Name: &testSKSNodepoolName, + Size: &testSKSNodepoolSize, + }, + sksCluster: &egoscale.SKSCluster{ + ID: &testSKSClusterID, + Name: &testSKSClusterName, + }, + m: ts.p.manager, + } + + ts.Require().NoError(nodeGroup.DeleteNodes([]*apiv1.Node{node})) +} + +func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_Id() { + nodeGroup := &sksNodepoolNodeGroup{ + sksNodepool: &egoscale.SKSNodepool{ + ID: &testSKSNodepoolID, + InstancePoolID: &testInstancePoolID, + Name: &testSKSNodepoolName, + }, + sksCluster: &egoscale.SKSCluster{ + ID: &testSKSClusterID, + Name: &testSKSClusterName, + }, + m: ts.p.manager, + } + + ts.Require().Equal(testInstancePoolID, nodeGroup.Id()) +} + +func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_Nodes() { + ts.p.manager.client.(*exoscaleClientMock). + On("GetInstancePool", ts.p.manager.ctx, ts.p.manager.zone, testInstancePoolID). + Return(&egoscale.InstancePool{ + ID: &testInstancePoolID, + InstanceIDs: &[]string{testInstanceID}, + Name: &testInstancePoolName, + Size: &testInstancePoolSize, + State: &testInstancePoolState, + }, nil) + + ts.p.manager.client.(*exoscaleClientMock). + On("GetInstance", ts.p.manager.ctx, ts.p.manager.zone, testInstanceID). + Return(&egoscale.Instance{ + ID: &testInstanceID, + State: &testInstanceState, + }, nil) + + nodeGroup := &sksNodepoolNodeGroup{ + sksNodepool: &egoscale.SKSNodepool{ + ID: &testSKSNodepoolID, + InstancePoolID: &testInstancePoolID, + Name: &testSKSNodepoolName, + }, + sksCluster: &egoscale.SKSCluster{ + ID: &testSKSClusterID, + Name: &testSKSClusterName, + }, + m: ts.p.manager, + } + + instances, err := nodeGroup.Nodes() + ts.Require().NoError(err) + ts.Require().Len(instances, 1) + ts.Require().Equal(testInstanceID, toNodeID(instances[0].Id)) + ts.Require().Equal(cloudprovider.InstanceRunning, instances[0].Status.State) +} + +func (ts *cloudProviderTestSuite) TestSKSNodepoolNodeGroup_Exist() { + nodeGroup := &sksNodepoolNodeGroup{ + sksNodepool: &egoscale.SKSNodepool{ + ID: &testSKSNodepoolID, + Name: &testSKSNodepoolName, + }, + sksCluster: &egoscale.SKSCluster{ + ID: &testSKSClusterID, + Name: &testSKSClusterName, + }, + m: ts.p.manager, + } + + ts.Require().True(nodeGroup.Exist()) +} diff --git a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_test.go b/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_test.go deleted file mode 100644 index 7a9bdcd1b9..0000000000 --- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_node_group_test.go +++ /dev/null @@ -1,165 +0,0 @@ -/* -Copyright 2020 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package exoscale - -import ( - "os" - "testing" - - "github.com/stretchr/testify/assert" - apiv1 "k8s.io/api/core/v1" - "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" - "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale/internal/github.com/exoscale/egoscale" -) - -func testSetupNodeGroup(url string) (*NodeGroup, error) { - os.Setenv("EXOSCALE_API_KEY", "KEY") - os.Setenv("EXOSCALE_API_SECRET", "SECRET") - os.Setenv("EXOSCALE_API_ENDPOINT", url) - - manager, err := newManager() - if err != nil { - return nil, err - } - - nodeGroup := &NodeGroup{ - id: testMockInstancePool1ID, - manager: manager, - instancePool: &egoscale.InstancePool{ - ID: egoscale.MustParseUUID(testMockInstancePool1ID), - Size: 1, - ZoneID: egoscale.MustParseUUID(testMockGetZoneID), - VirtualMachines: []egoscale.VirtualMachine{ - { - ID: egoscale.MustParseUUID(testMockInstance1ID), - State: string(egoscale.VirtualMachineRunning), - }, - }, - }, - } - - return nodeGroup, nil -} - -func TestNodeGroup_MaxSize(t *testing.T) { - ts := newTestServer( - testHTTPResponse{200, testMockResourceLimit}, - ) - - nodeGroup, err := testSetupNodeGroup(ts.URL) - assert.NoError(t, err) - assert.NotNil(t, nodeGroup) - assert.Equal(t, testMockResourceLimitMax, nodeGroup.MaxSize()) -} - -func TestNodeGroup_MinSize(t *testing.T) { - nodeGroup, err := testSetupNodeGroup("url") - assert.NoError(t, err) - assert.NotNil(t, nodeGroup) - assert.Equal(t, 1, nodeGroup.MinSize()) -} - -func TestNodeGroup_TargetSize(t *testing.T) { - nodeGroup, err := testSetupNodeGroup("url") - assert.NoError(t, err) - assert.NotNil(t, nodeGroup) - - target, err := nodeGroup.TargetSize() - assert.NoError(t, err) - assert.Equal(t, nodeGroup.instancePool.Size, target) -} - -func TestNodeGroup_IncreaseSize(t *testing.T) { - ts := newTestServer( - testHTTPResponse{200, testMockResourceLimit}, - testHTTPResponse{200, testMockBooleanResponse("scaleinstancepoolresponse")}, - testHTTPResponse{200, testMockInstancePool1}, - ) - - nodeGroup, err := testSetupNodeGroup(ts.URL) - assert.NoError(t, err) - assert.NotNil(t, nodeGroup) - - err = nodeGroup.IncreaseSize(2) - assert.NoError(t, err) -} - -func TestNodeGroup_IncreaseSizeFailure(t *testing.T) { - ts := newTestServer( - testHTTPResponse{200, testMockResourceLimit}, - testHTTPResponse{200, testMockBooleanResponse("scaleinstancepoolresponse")}, - testHTTPResponse{200, testMockInstancePool1}, - ) - - nodeGroup, err := testSetupNodeGroup(ts.URL) - assert.NoError(t, err) - assert.NotNil(t, nodeGroup) - - err = nodeGroup.IncreaseSize(testMockResourceLimitMax + 1) - assert.Error(t, err) -} - -func TestNodeGroup_DeleteNodes(t *testing.T) { - ts := newTestServer( - testHTTPResponse{200, testMockInstancePool1}, - testHTTPResponse{200, testMockBooleanResponse("evictinstancepoolmembersresponse")}, - testHTTPResponse{200, testMockInstancePool1}, - ) - - nodeGroup, err := testSetupNodeGroup(ts.URL) - assert.NoError(t, err) - assert.NotNil(t, nodeGroup) - - node := &apiv1.Node{ - Spec: apiv1.NodeSpec{ - ProviderID: toProviderID(testMockInstance1ID), - }, - } - - err = nodeGroup.DeleteNodes([]*apiv1.Node{node}) - assert.NoError(t, err) -} - -func TestNodeGroup_Id(t *testing.T) { - nodeGroup, err := testSetupNodeGroup("url") - assert.NoError(t, err) - assert.NotNil(t, nodeGroup) - - id := nodeGroup.Id() - assert.Equal(t, testMockInstancePool1ID, id) -} - -func TestNodeGroup_Nodes(t *testing.T) { - nodeGroup, err := testSetupNodeGroup("url") - assert.NoError(t, err) - assert.NotNil(t, nodeGroup) - - instances, err := nodeGroup.Nodes() - assert.NoError(t, err) - assert.Equal(t, 1, len(instances)) - assert.Equal(t, testMockInstance1ID, toNodeID(instances[0].Id)) - assert.Equal(t, cloudprovider.InstanceRunning, instances[0].Status.State) -} - -func TestNodeGroup_Exist(t *testing.T) { - nodeGroup, err := testSetupNodeGroup("url") - assert.NoError(t, err) - assert.NotNil(t, nodeGroup) - - exist := nodeGroup.Exist() - assert.True(t, exist) -} diff --git a/cluster-autoscaler/cloudprovider/exoscale/log.go b/cluster-autoscaler/cloudprovider/exoscale/log.go new file mode 100644 index 0000000000..aa5b6e0ba4 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/exoscale/log.go @@ -0,0 +1,37 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exoscale + +import ( + "k8s.io/klog/v2" +) + +func fatalf(format string, args ...interface{}) { + klog.Fatalf("exoscale-provider: "+format, args...) +} + +func errorf(format string, args ...interface{}) { + klog.Errorf("exoscale-provider: "+format, args...) +} + +func infof(format string, args ...interface{}) { + klog.Infof("exoscale-provider: "+format, args...) +} + +func debugf(format string, args ...interface{}) { + klog.V(3).Infof("exoscale-provider: "+format, args...) +} diff --git a/cluster-autoscaler/cloudprovider/exoscale/request_test.go b/cluster-autoscaler/cloudprovider/exoscale/request_test.go deleted file mode 100644 index 4eee2fc300..0000000000 --- a/cluster-autoscaler/cloudprovider/exoscale/request_test.go +++ /dev/null @@ -1,431 +0,0 @@ -/* -Copyright 2020 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package exoscale - -import ( - "fmt" - "net/http" - "net/http/httptest" -) - -var testMockInstancePool1ID = "baca3aee-e609-4287-833f-573f6459ffe1" - -var testMockInstancePool1 = fmt.Sprintf(` -{"getinstancepoolresponse": { - "count": 1, - "instancepool": [ - { - "id": %q, - "keypair": "test", - "name": "k8s-nodes1", - "rootdisksize": 50, - "securitygroupids": [ - "5cbdfbb8-31ea-4791-962f-8a9719da8758" - ], - "serviceofferingid": "5e5fb3c6-e076-429d-9b6c-b71f7b27760b", - "size": 1, - "state": "running", - "templateid": "d860ceb8-684d-47e0-a6ad-970a0eec95d3", - "virtualmachines": [ - { - "affinitygroup": [], - "cpunumber": 2, - "cpuspeed": 2198, - "cpuused": "0.04%%", - "created": "2020-08-25T10:04:51+0200", - "diskioread": 843, - "diskiowrite": 2113, - "diskkbsread": 96120, - "diskkbswrite": 673840, - "displayname": "pool-1a11c-dbmaa", - "id": "10e48003-3ac5-4b90-b9fb-c1c7c5a597ff", - "keypair": "pierre", - "lock": { - "calls": [ - "scaleVirtualMachine", - "updateDefaultNicForVirtualMachine", - "expungeVirtualMachine", - "restoreVirtualMachine", - "recoverVirtualMachine", - "updateVirtualMachine", - "changeServiceForVirtualMachine" - ] - }, - "manager": "instancepool", - "managerid": "1a11c398-cab1-6c91-3b94-a0561c92ce3c", - "memory": 4096, - "name": "pool-1a11c-dbmaa", - "networkkbsread": 13, - "networkkbswrite": 8, - "nic": [ - { - "broadcasturi": "vlan://untagged", - "gateway": "89.145.160.1", - "id": "353054ab-83fe-45c6-b515-47cacacde7e6", - "ipaddress": "89.145.160.58", - "isdefault": true, - "macaddress": "06:5a:b2:00:00:3f", - "netmask": "255.255.252.0", - "networkid": "71d5d5a8-f8b8-4331-82f5-d6f1d18ffbca", - "networkname": "defaultGuestNetwork", - "traffictype": "Guest", - "type": "Shared" - } - ], - "oscategoryid": "9594477e-ea0e-4c63-a642-25cbd6747493", - "oscategoryname": "Ubuntu", - "ostypeid": "bf3c2b62-1b0d-4432-8160-19ac837a777a", - "passwordenabled": true, - "rootdeviceid": 0, - "rootdevicetype": "ROOT", - "securitygroup": [ - { - "account": "exoscale-2", - "description": "Default Security Group", - "id": "5cbdfbb8-31ea-4791-962f-8a9719da8758", - "name": "default" - } - ], - "serviceofferingid": "5e5fb3c6-e076-429d-9b6c-b71f7b27760b", - "serviceofferingname": "Medium", - "state": "Running", - "tags": [], - "templatedisplaytext": "Linux Ubuntu 20.04 LTS 64-bit 2020-08-11-e15f6a", - "templateid": "d860ceb8-684d-47e0-a6ad-970a0eec95d3", - "templatename": "Linux Ubuntu 20.04 LTS 64-bit", - "zoneid": "de88c980-78f6-467c-a431-71bcc88e437f", - "zonename": "de-fra-1" - } - ], - "zoneid": "de88c980-78f6-467c-a431-71bcc88e437f" - } - ] - }}`, testMockInstancePool1ID) - -var testMockInstancePool2ID = "b0520c25-66c6-440d-a533-43881a15a679" - -var testMockInstancePool2 = fmt.Sprintf(` - {"getinstancepoolresponse": { - "count": 1, - "instancepool": [ - { - "id": %q, - "keypair": "test", - "name": "k8s-nodes2", - "rootdisksize": 50, - "securitygroupids": [ - "5cbdfbb8-31ea-4791-962f-8a9719da8758" - ], - "serviceofferingid": "5e5fb3c6-e076-429d-9b6c-b71f7b27760b", - "size": 1, - "state": "running", - "templateid": "d860ceb8-684d-47e0-a6ad-970a0eec95d3", - "virtualmachines": [ - { - "affinitygroup": [], - "cpunumber": 2, - "cpuspeed": 2198, - "cpuused": "0.04%%", - "created": "2020-08-25T10:04:51+0200", - "diskioread": 843, - "diskiowrite": 2113, - "diskkbsread": 96120, - "diskkbswrite": 673840, - "displayname": "pool-1a11c-dbmaa", - "id": "10e48003-3ac5-4b90-b9fb-c1c7c5a597ff", - "keypair": "pierre", - "lock": { - "calls": [ - "scaleVirtualMachine", - "updateDefaultNicForVirtualMachine", - "expungeVirtualMachine", - "restoreVirtualMachine", - "recoverVirtualMachine", - "updateVirtualMachine", - "changeServiceForVirtualMachine" - ] - }, - "manager": "instancepool", - "managerid": "1a11c398-cab1-6c91-3b94-a0561c92ce3c", - "memory": 4096, - "name": "pool-1a11c-dbmaa", - "networkkbsread": 13, - "networkkbswrite": 8, - "nic": [ - { - "broadcasturi": "vlan://untagged", - "gateway": "89.145.160.1", - "id": "353054ab-83fe-45c6-b515-47cacacde7e6", - "ipaddress": "89.145.160.58", - "isdefault": true, - "macaddress": "06:5a:b2:00:00:3f", - "netmask": "255.255.252.0", - "networkid": "71d5d5a8-f8b8-4331-82f5-d6f1d18ffbca", - "networkname": "defaultGuestNetwork", - "traffictype": "Guest", - "type": "Shared" - } - ], - "oscategoryid": "9594477e-ea0e-4c63-a642-25cbd6747493", - "oscategoryname": "Ubuntu", - "ostypeid": "bf3c2b62-1b0d-4432-8160-19ac837a777a", - "passwordenabled": true, - "rootdeviceid": 0, - "rootdevicetype": "ROOT", - "securitygroup": [ - { - "account": "exoscale-2", - "description": "Default Security Group", - "id": "5cbdfbb8-31ea-4791-962f-8a9719da8758", - "name": "default" - } - ], - "serviceofferingid": "5e5fb3c6-e076-429d-9b6c-b71f7b27760b", - "serviceofferingname": "Medium", - "state": "Running", - "tags": [], - "templatedisplaytext": "Linux Ubuntu 20.04 LTS 64-bit 2020-08-11-e15f6a", - "templateid": "d860ceb8-684d-47e0-a6ad-970a0eec95d3", - "templatename": "Linux Ubuntu 20.04 LTS 64-bit", - "zoneid": "de88c980-78f6-467c-a431-71bcc88e437f", - "zonename": "de-fra-1" - } - ], - "zoneid": "de88c980-78f6-467c-a431-71bcc88e437f" - } - ] - }}`, testMockInstancePool2ID) - -var testMockGetZoneID = "de88c980-78f6-467c-a431-71bcc88e437f" -var testMockGetZoneName = "de-fra-1" - -var testMockGetZone = fmt.Sprintf(` -{"listzonesresponse": { - "count": 1, - "zone": [ - { - "allocationstate": "Enabled", - "id": %q, - "localstorageenabled": true, - "name": %q, - "networktype": "Basic", - "securitygroupsenabled": true, - "tags": [], - "zonetoken": "c4bdb9f2-c28d-36a3-bbc5-f91fc69527e6" - } - ] - }}`, testMockGetZoneID, testMockGetZoneName) - -var testMockResourceLimitMax = 50 - -var testMockResourceLimit = fmt.Sprintf(` -{"listresourcelimitsresponse": { - "count": 1, - "resourcelimit": [ - { - "max": %d, - "resourcetype": "0", - "resourcetypename": "user_vm" - } - ] - }}`, testMockResourceLimitMax) - -var testMockInstance1ID = "7ce1c7a6-d9ca-45b5-91bd-2688dbce7ab0" - -var testMockInstance1 = fmt.Sprintf(` -{"listvirtualmachinesresponse": { - "count": 1, - "virtualmachine": [ - { - "affinitygroup": [], - "cpunumber": 2, - "cpuspeed": 2198, - "created": "2020-08-25T10:04:51+0200", - "displayname": "pool-1a11c-dbmaa", - "hypervisor": "KVM", - "id": %q, - "keypair": "pierre", - "manager": "instancepool", - "managerid": "baca3aee-e609-4287-833f-573f6459ffe1", - "memory": 4096, - "name": "pool-1a11c-dbmaa", - "nic": [ - { - "broadcasturi": "vlan://untagged", - "gateway": "89.145.160.1", - "id": "353054ab-83fe-45c6-b515-47cacacde7e6", - "ipaddress": "89.145.160.58", - "isdefault": true, - "macaddress": "06:5a:b2:00:00:3f", - "netmask": "255.255.252.0", - "networkid": "71d5d5a8-f8b8-4331-82f5-d6f1d18ffbca", - "networkname": "defaultGuestNetwork", - "traffictype": "Guest", - "type": "Shared" - } - ], - "oscategoryid": "9594477e-ea0e-4c63-a642-25cbd6747493", - "oscategoryname": "Ubuntu", - "ostypeid": "bf3c2b62-1b0d-4432-8160-19ac837a777a", - "passwordenabled": true, - "rootdeviceid": 0, - "rootdevicetype": "ROOT", - "securitygroup": [ - { - "account": "exoscale-2", - "description": "Default Security Group", - "id": "5cbdfbb8-31ea-4791-962f-8a9719da8758", - "name": "default" - } - ], - "serviceofferingid": "5e5fb3c6-e076-429d-9b6c-b71f7b27760b", - "serviceofferingname": "Medium", - "state": "Running", - "tags": [], - "templatedisplaytext": "Linux Ubuntu 20.04 LTS 64-bit 2020-08-11-e15f6a", - "templateid": "d860ceb8-684d-47e0-a6ad-970a0eec95d3", - "templatename": "Linux Ubuntu 20.04 LTS 64-bit", - "zoneid": "de88c980-78f6-467c-a431-71bcc88e437f", - "zonename": "de-fra-1" - } - ] - }}`, testMockInstance1ID) - -var testMockInstance2ID = "25775367-fac5-451f-b14d-7eb1869abe2c" - -var testMockInstance2 = fmt.Sprintf(` -{"listvirtualmachinesresponse": { - "count": 1, - "virtualmachine": [ - { - "affinitygroup": [], - "cpunumber": 2, - "cpuspeed": 2198, - "created": "2020-08-25T10:04:51+0200", - "displayname": "pool-1a11c-dbmaa", - "hypervisor": "KVM", - "id": %q, - "keypair": "pierre", - "manager": "instancepool", - "managerid": "b0520c25-66c6-440d-a533-43881a15a679", - "memory": 4096, - "name": "pool-1a11c-dbmaa", - "nic": [ - { - "broadcasturi": "vlan://untagged", - "gateway": "89.145.160.1", - "id": "353054ab-83fe-45c6-b515-47cacacde7e6", - "ipaddress": "89.145.160.58", - "isdefault": true, - "macaddress": "06:5a:b2:00:00:3f", - "netmask": "255.255.252.0", - "networkid": "71d5d5a8-f8b8-4331-82f5-d6f1d18ffbca", - "networkname": "defaultGuestNetwork", - "traffictype": "Guest", - "type": "Shared" - } - ], - "oscategoryid": "9594477e-ea0e-4c63-a642-25cbd6747493", - "oscategoryname": "Ubuntu", - "ostypeid": "bf3c2b62-1b0d-4432-8160-19ac837a777a", - "passwordenabled": true, - "rootdeviceid": 0, - "rootdevicetype": "ROOT", - "securitygroup": [ - { - "account": "exoscale-2", - "description": "Default Security Group", - "id": "5cbdfbb8-31ea-4791-962f-8a9719da8758", - "name": "default" - } - ], - "serviceofferingid": "5e5fb3c6-e076-429d-9b6c-b71f7b27760b", - "serviceofferingname": "Medium", - "state": "Running", - "tags": [], - "templatedisplaytext": "Linux Ubuntu 20.04 LTS 64-bit 2020-08-11-e15f6a", - "templateid": "d860ceb8-684d-47e0-a6ad-970a0eec95d3", - "templatename": "Linux Ubuntu 20.04 LTS 64-bit", - "zoneid": "de88c980-78f6-467c-a431-71bcc88e437f", - "zonename": "de-fra-1" - } - ] - }}`, testMockInstance2ID) - -func testMockBooleanResponse(cmd string) string { - return fmt.Sprintf(` -{%q: { - "success": true -}}`, cmd) -} - -func testMockAPICloudProviderTest() string { - ts := newTestServer( - testHTTPResponse{200, testMockInstance1}, - testHTTPResponse{200, testMockGetZone}, - testHTTPResponse{200, testMockInstancePool1}, - testHTTPResponse{200, testMockInstancePool1}, - testHTTPResponse{200, testMockInstance2}, - testHTTPResponse{200, testMockGetZone}, - testHTTPResponse{200, testMockInstancePool2}, - testHTTPResponse{200, testMockInstancePool1}, - testHTTPResponse{200, testMockInstancePool2}, - ) - - return ts.URL -} - -type testHTTPResponse struct { - code int - body string -} - -type testServer struct { - *httptest.Server - lastResponse int - responses []testHTTPResponse -} - -func newTestServer(responses ...testHTTPResponse) *testServer { - mux := http.NewServeMux() - - ts := &testServer{ - httptest.NewServer(mux), - 0, - responses, - } - - mux.Handle("/", ts) - - return ts -} - -func (ts *testServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { - i := ts.lastResponse - if i >= len(ts.responses) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(500) - w.Write([]byte("{}")) // nolint: errcheck - return - } - response := ts.responses[i] - ts.lastResponse++ - - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(response.code) - w.Write([]byte(response.body)) // nolint: errcheck -} diff --git a/cluster-autoscaler/cloudprovider/exoscale/util.go b/cluster-autoscaler/cloudprovider/exoscale/util.go new file mode 100644 index 0000000000..b27777eeaf --- /dev/null +++ b/cluster-autoscaler/cloudprovider/exoscale/util.go @@ -0,0 +1,95 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exoscale + +import ( + "context" + "fmt" + "strings" + "time" + + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + egoscale "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale/internal/github.com/exoscale/egoscale/v2" +) + +// toProviderID returns a provider ID from the given node ID. +func toProviderID(nodeID string) string { + return fmt.Sprintf("%s%s", exoscaleProviderIDPrefix, nodeID) +} + +// toNodeID returns a node or Compute instance ID from the given provider ID. +func toNodeID(providerID string) string { + return strings.TrimPrefix(providerID, exoscaleProviderIDPrefix) +} + +// toInstance converts the given egoscale.VirtualMachine to a cloudprovider.Instance. +func toInstance(instance *egoscale.Instance) cloudprovider.Instance { + return cloudprovider.Instance{ + Id: toProviderID(*instance.ID), + Status: toInstanceStatus(*instance.State), + } +} + +// toInstanceStatus converts the given Exoscale API Compute instance status to a cloudprovider.InstanceStatus. +func toInstanceStatus(state string) *cloudprovider.InstanceStatus { + if state == "" { + return nil + } + + switch state { + case "starting": + return &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating} + + case "running": + return &cloudprovider.InstanceStatus{State: cloudprovider.InstanceRunning} + + case "stopping": + return &cloudprovider.InstanceStatus{State: cloudprovider.InstanceDeleting} + + default: + return &cloudprovider.InstanceStatus{ErrorInfo: &cloudprovider.InstanceErrorInfo{ + ErrorClass: cloudprovider.OtherErrorClass, + ErrorCode: "no-code-exoscale", + ErrorMessage: "error", + }} + } +} + +// pollCmd executes the specified callback function until either it returns true or a non-nil error, +// or the if context times out. +func pollCmd(ctx context.Context, callback func() (bool, error)) error { + timeout := time.Minute * 10 + c, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + for t := time.Tick(time.Second * 10); ; { // nolint: staticcheck + ok, err := callback() + if err != nil { + return err + } + if ok { + return nil + } + + select { + case <-c.Done(): + return fmt.Errorf("context timeout after: %v", timeout) + case <-t: + continue + } + } +} diff --git a/cluster-autoscaler/cloudprovider/exoscale/vendor_internal.sh b/cluster-autoscaler/cloudprovider/exoscale/vendor_internal.sh index 41bddd6f87..325021725e 100755 --- a/cluster-autoscaler/cloudprovider/exoscale/vendor_internal.sh +++ b/cluster-autoscaler/cloudprovider/exoscale/vendor_internal.sh @@ -6,7 +6,7 @@ # The following modules have been vendored manually and are not managed # by this script: # - github.com/gofrs/uuid -# - github.com/deepmap/oapi-codegen/{runtime,types} +# - github.com/deepmap/oapi-codegen # - k8s.io/klog if [[ $# -ne 1 ]]; then @@ -17,8 +17,8 @@ fi EGOSCALE_DIR=$(readlink -f "$1") rm -rf ./internal/github.com/exoscale/egoscale/* -cp -rf $EGOSCALE_DIR/* ./internal/github.com/exoscale/egoscale/ -rm -rf ./internal/github.com/exoscale/egoscale/{*_test.go,doc.go,api/v2/*_test.go,internal/v2/*_test.go,internal/v2/mock.go,go.*,gopher.png,*.md,admin,cmd,generate,test,website} +cp -rf $EGOSCALE_DIR/{v2,version} ./internal/github.com/exoscale/egoscale/ +rm -rf ./internal/github.com/exoscale/egoscale/v2/{*_test.go,doc.go,*_test.go,api/*_test.go,oapi/*_test.go,oapi/test.go,oapi/mock.go} find ./internal -name '*.go' | while read f; do sed -i -r \ @@ -30,7 +30,7 @@ find ./internal -name '*.go' | while read f; do "$f" cat < "$f.tmp" /* -Copyright 2020 The Kubernetes Authors. +Copyright 2021 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.