diff --git a/cluster-autoscaler/README.md b/cluster-autoscaler/README.md index 66f31e7628..3291c29809 100644 --- a/cluster-autoscaler/README.md +++ b/cluster-autoscaler/README.md @@ -18,6 +18,7 @@ You should also take a look at the notes and "gotchas" for your specific cloud p * [AWS](./cloudprovider/aws/README.md) * [BaiduCloud](./cloudprovider/baiducloud/README.md) * [Brightbox](./cloudprovider/brightbox/README.md) +* [CherryServers](./cloudprovider/cherryservers/README.md) * [CloudStack](./cloudprovider/cloudstack/README.md) * [HuaweiCloud](./cloudprovider/huaweicloud/README.md) * [Hetzner](./cloudprovider/hetzner/README.md) @@ -161,6 +162,7 @@ Supported cloud providers: * Azure https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/azure/README.md * Alibaba Cloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/alicloud/README.md * Brightbox https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/brightbox/README.md +* CherryServers https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/cherryservers/README.md * OpenStack Magnum https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/magnum/README.md * DigitalOcean https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/digitalocean/README.md * CloudStack https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/cloudstack/README.md diff --git a/cluster-autoscaler/cloudprovider/builder/builder_all.go b/cluster-autoscaler/cloudprovider/builder/builder_all.go index 62d5ec69fa..a2631aa527 100644 --- a/cluster-autoscaler/cloudprovider/builder/builder_all.go +++ b/cluster-autoscaler/cloudprovider/builder/builder_all.go @@ -27,6 +27,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/baiducloud" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/bizflycloud" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/brightbox" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/cherryservers" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/cloudstack" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/clusterapi" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/digitalocean" @@ -51,6 +52,7 @@ var AvailableCloudProviders = []string{ cloudprovider.AzureProviderName, cloudprovider.GceProviderName, cloudprovider.AlicloudProviderName, + cloudprovider.CherryServersProviderName, cloudprovider.CloudStackProviderName, cloudprovider.BaiducloudProviderName, cloudprovider.MagnumProviderName, @@ -85,6 +87,8 @@ func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGro return azure.BuildAzure(opts, do, rl) case cloudprovider.AlicloudProviderName: return alicloud.BuildAlicloud(opts, do, rl) + case cloudprovider.CherryServersProviderName: + return cherryservers.BuildCherry(opts, do, rl) case cloudprovider.CloudStackProviderName: return cloudstack.BuildCloudStack(opts, do, rl) case cloudprovider.BaiducloudProviderName: diff --git a/cluster-autoscaler/cloudprovider/builder/builder_cherry.go b/cluster-autoscaler/cloudprovider/builder/builder_cherry.go new file mode 100644 index 0000000000..4f16b8b903 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/builder/builder_cherry.go @@ -0,0 +1,43 @@ +//go:build cherry +// +build cherry + +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package builder + +import ( + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + cherry "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/cherryservers" + "k8s.io/autoscaler/cluster-autoscaler/config" +) + +// AvailableCloudProviders supported by the cloud provider builder. +var AvailableCloudProviders = []string{ + cherry.ProviderName, +} + +// DefaultCloudProvider for Cherry-only build is Cherry +const DefaultCloudProvider = cherry.ProviderName + +func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { + switch opts.CloudProviderName { + case cherry.ProviderName: + return cherry.BuildCherry(opts, do, rl) + } + + return nil +} diff --git a/cluster-autoscaler/cloudprovider/cherryservers/OWNERS b/cluster-autoscaler/cloudprovider/cherryservers/OWNERS new file mode 100644 index 0000000000..9b0c1bddeb --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/OWNERS @@ -0,0 +1,10 @@ +approvers: +- deitch +#- zalmarge +#- ArturasRa +#- Andrius521 +reviewers: +- deitch +#- zalmarge +#- ArturasRa +#- Andrius521 diff --git a/cluster-autoscaler/cloudprovider/cherryservers/README.md b/cluster-autoscaler/cloudprovider/cherryservers/README.md new file mode 100644 index 0000000000..98998650d0 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/README.md @@ -0,0 +1,167 @@ +# Cluster Autoscaler for Cherry Servers + +The cluster autoscaler for [Cherry Servers](https://cherryservers.com) worker nodes performs +autoscaling within any specified nodepools. It will run as a `Deployment` in +your cluster. The nodepools are specified using tags on Cherry Servers. + +This README will go over some of the necessary steps required to get +the cluster autoscaler up and running. + +## Permissions and credentials + +The autoscaler needs a `ServiceAccount` with permissions for Kubernetes and +requires credentials, specifically API tokens, for interacting with Cherry Servers. + +An example `ServiceAccount` is given in [examples/cluster-autoscaler-svcaccount.yaml](examples/cluster-autoscaler-svcaccount.yaml). + +The credentials for authenticating with Cherry Servers are stored in a secret and +provided as an env var to the container. [examples/cluster-autoscaler-secret](examples/cluster-autoscaler-secret.yaml) +In the above file you can modify the following fields: + +| Secret | Key | Value | +|---------------------------------|-------------------------|------------------------------------------------------------------------------------------------------------------------------------| +| cluster-autoscaler-cherry | authtoken | Your Cherry Servers API token. It must be base64 encoded. | +| cluster-autoscaler-cloud-config | Global/project-id | Your Cherry Servers project id | +| cluster-autoscaler-cloud-config | Global/api-server | The ip:port for you cluster's k8s api (e.g. K8S_MASTER_PUBLIC_IP:6443) | +| cluster-autoscaler-cloud-config | Global/region | The Cherry Servers region for the servers in your nodepool (eg: EU-Nord-1) | +| cluster-autoscaler-cloud-config | Global/plan | The Cherry Servers plan ID for new nodes in the nodepool (eg: `103`) | +| cluster-autoscaler-cloud-config | Global/os | The OS image to use for new nodes, e.g. `CentOS 6 64bit`. If you change this also update cloudinit. | +| cluster-autoscaler-cloud-config | Global/cloudinit | The base64 encoded user data submitted when provisioning servers. In the example file, the default value has been tested with Ubuntu 18.04 to install Docker & kubelet and then to bootstrap the node into the cluster using kubeadm. The kubeadm, kubelet, kubectl are pinned to version 1.17.4. For a different base OS or bootstrap method, this needs to be customized accordingly| +| cluster-autoscaler-cloud-config | Global/reservation | The values "require" or "prefer" will request the next available hardware reservation for new servers in selected region & plan. If no hardware reservations match, "require" will trigger a failure, while "prefer" will launch on-demand servers instead (default: none) | +| cluster-autoscaler-cloud-config | Global/hostname-pattern | The pattern for the names of new Cherry Servers servers (default: "k8s-{{.ClusterName}}-{{.NodeGroup}}-{{.RandString8}}" ) | + +You can always update the secret with more nodepool definitions (with different plans etc.) as shown in the example, but you should always provide a default nodepool configuration. + +## Configure nodepool and cluster names using Cherry Servers tags + +The Cherry Servers API does not yet have native support for groups or pools of servers. So we use tags to specify them. Each Cherry Servers server that's a member of the "cluster1" cluster should have the tag k8s-cluster-cluster1. The servers that are members of the "pool1" nodepool should also have the tag k8s-nodepool-pool1. Once you have a Kubernetes cluster running on Cherry Servers, use the Cherry Servers Portal, API or CLI to tag the nodes accordingly. + +## Autoscaler deployment + +The yaml files in [examples](./examples) can be used. You will need to change several of the files +to match your cluster: + +* [cluster-autoscaler-rbac.yaml](./examples/cluster-autoscaler-rbac.yaml) unchanged +* [cluster-autoscaler-svcaccount.yaml](./examples/cluster-autoscaler-svcaccount.yaml) unchanged +* [cluster-autoscaler-secret.yaml](./examples/cluster-autoscaler-secret.yaml) requires entering the correct tokens, project ID, plan type, etc. for your cluster; see the file comments +* [cluster-autoscaler-deployment.yaml](./examples/cluster-autoscaler-deployment.yaml) requires setting the arguments passed to the autoscaler to match your cluster. + +| Argument | Usage | +|-----------------------|------------------------------------------------------------------------------------------------------------| +| --cluster-name | The name of your Kubernetes cluster. It should correspond to the tags that have been applied to the nodes. | +| --nodes | Of the form `min:max:NodepoolName`. For multiple nodepools you can add the same argument multiple times. E.g. for pool1, pool2 you would add `--nodes=0:10:pool1` and `--nodes=0:10:pool2`. In addition, each node provisioned by the autoscaler will have a label with key: `pool` and with value: `NodepoolName`. These labels can be useful when there is a need to target specific nodepools. | +| --expander=random | This is an optional argument which allows the cluster-autoscaler to take into account various algorithms when scaling with multiple nodepools, see [expanders](../../FAQ.md#what-are-expanders). | + +## Target Specific Nodepools + +In case you want to target a specific nodepool(s) for e.g. a deployment, you can add a `nodeAffinity` with the key `pool` and with value the nodepool name that you want to target. This functionality is not backwards compatible, which means that nodes provisioned with older cluster-autoscaler images won't have the key `pool`. But you can overcome this limitation by manually adding the correct labels. Here are some examples: + +Target a nodepool with a specific name: +``` +affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: pool + operator: In + values: + - pool3 +``` +Target a nodepool with a specific Cherry Servers instance: +``` +affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: beta.kubernetes.io/instance-type + operator: In + values: + - 103 +``` + +## CCM and Controller node labels + +### CCM + +By default, autoscaler assumes that you have a recent version of +[Cherry Servers CCM](https://github.com/cherryservers/cloud-provider-cherry) +installed in your +cluster. + +## Notes + +The autoscaler will not remove nodes which have non-default kube-system pods. +This prevents the node that the autoscaler is running on from being scaled down. +If you are deploying the autoscaler into a cluster which already has more than one node, +it is best to deploy it onto any node which already has non-default kube-system pods, +to minimise the number of nodes which cannot be removed when scaling. For this reason in +the provided example the autoscaler pod has a nodeaffinity which forces it to deploy on +the control plane (previously referred to as master) node. + +## Development + +### Testing + +The Cherry Servers cluster-autoscaler includes a series of tests, which are executed +against a mock backend server included in the package. It will **not** execute them +against the real Cherry Servers API. + +If you want to execute them against the real Cherry Servers API, set the +environment variable: + +```sh +CHERRY_USE_PRODUCTION_API=true +``` + +### Running Locally + +To run the CherryServers cluster-autoscaler locally: + +1. Save the desired cloud-config to a local file, e.g. `/tmp/cloud-config`. The contents of the file can be extracted from the value in [examples/cluster-autoscaler-secret.yaml](./examples/cluster-autoscaler-secret.yaml), secret named `cluster-autoscaler-cloud-config`, key `cloud-config`. +1. Export the following environment variables: + * `BOOTSTRAP_TOKEN_ID`: the bootstrap token ID, i.e. the leading 6 characters of the entire bootstrap token, before the `.` + * `BOOTSTRAP_TOKEN_SECRET`: the bootstrap token secret, i.e. the trailing 16 characters of the entire bootstrap token, after the `.` + * `CHERRY_AUTH_TOKEN`: your CherryServers authentication token + * `KUBECONFIG`: a kubeconfig file with permissions to your cluster + * `CLUSTER_NAME`: the name for your cluster, e.g. `cluster1` + * `CLOUD_CONFIG`: the path to your cloud-config file, e.g. `/tmp/cloud-config` +1. Run the autoscaler per the command-line below. + +The command-line format is: + +``` +cluster-autoscaler --alsologtostderr --cluster-name=$CLUSTER_NAME --cloud-config=$CLOUD_CONFIG \ + --cloud-provider=cherryservers \ + --nodes=0:10:pool1 \ + --nodes=0:10:pool2 \ + --scale-down-unneeded-time=1m0s --scale-down-delay-after-add=1m0s --scale-down-unready-time=1m0s \ + --kubeconfig=$KUBECONFIG \ + --v=2 +``` + +You can set `--nodes=` as many times as you like. The format for each `--nodes=` is: + +``` +--nodes=:: +``` + +* `` and `` must be integers, and `` must be greater than `` +* `` must be a pool that exists in the `cloud-config` + +If the poolname is not found, it will use the `default` pool, e.g.: + +You also can make changes and run it directly, replacing the command with `go run`, +but this must be run from the `cluster-autoscaler` directory, i.e. not within the specific +cloudprovider implementation: + +``` +go run . --alsologtostderr --cluster-name=$CLUSTER_NAME --cloud-config=$CLOUD_CONFIG \ + --cloud-provider=cherryservers \ + --nodes=0:10:pool1 \ + --nodes=0:10:pool2 \ + --scale-down-unneeded-time=1m0s --scale-down-delay-after-add=1m0s --scale-down-unready-time=1m0s \ + --kubeconfig=$KUBECONFIG \ + --v=2 +``` diff --git a/cluster-autoscaler/cloudprovider/cherryservers/cherry_cloud_provider.go b/cluster-autoscaler/cloudprovider/cherryservers/cherry_cloud_provider.go new file mode 100644 index 0000000000..b3f6b8888c --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/cherry_cloud_provider.go @@ -0,0 +1,213 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cherryservers + +import ( + "io" + "os" + "regexp" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" + "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + klog "k8s.io/klog/v2" +) + +const ( + // ProviderName is the cloud provider name for Cherry Servers + ProviderName = "cherryservers" + // GPULabel is the label added to nodes with GPU resource. + GPULabel = "cherryservers.com/gpu" + // DefaultControllerNodeLabelKey is the label added to Master/Controller to identify as + // master/controller node. + DefaultControllerNodeLabelKey = "node-role.kubernetes.io/master" + // ControllerNodeIdentifierEnv is the string for the environment variable. + ControllerNodeIdentifierEnv = "CHERRY_CONTROLLER_NODE_IDENTIFIER_LABEL" +) + +var ( + availableGPUTypes = map[string]struct{}{} +) + +// cherryCloudProvider implements CloudProvider interface from cluster-autoscaler/cloudprovider module. +type cherryCloudProvider struct { + cherryManager cherryManager + resourceLimiter *cloudprovider.ResourceLimiter + nodeGroups []cherryNodeGroup + controllerNodeLabel string +} + +func buildCherryCloudProvider(cherryManager cherryManager, resourceLimiter *cloudprovider.ResourceLimiter) (*cherryCloudProvider, error) { + controllerNodeLabel := os.Getenv(ControllerNodeIdentifierEnv) + if controllerNodeLabel == "" { + klog.V(3).Infof("env %s not set, using default: %s", ControllerNodeIdentifierEnv, DefaultControllerNodeLabelKey) + controllerNodeLabel = DefaultControllerNodeLabelKey + } + + ccp := &cherryCloudProvider{ + cherryManager: cherryManager, + resourceLimiter: resourceLimiter, + nodeGroups: []cherryNodeGroup{}, + controllerNodeLabel: controllerNodeLabel, + } + return ccp, nil +} + +// Name returns the name of the cloud provider. +func (ccp *cherryCloudProvider) Name() string { + return ProviderName +} + +// GPULabel returns the label added to nodes with GPU resource. +func (ccp *cherryCloudProvider) GPULabel() string { + return GPULabel +} + +// GetAvailableGPUTypes return all available GPU types cloud provider supports +func (ccp *cherryCloudProvider) GetAvailableGPUTypes() map[string]struct{} { + return availableGPUTypes +} + +// NodeGroups returns all node groups managed by this cloud provider. +func (ccp *cherryCloudProvider) NodeGroups() []cloudprovider.NodeGroup { + groups := make([]cloudprovider.NodeGroup, len(ccp.nodeGroups)) + for i := range ccp.nodeGroups { + groups[i] = &ccp.nodeGroups[i] + } + return groups +} + +// AddNodeGroup appends a node group to the list of node groups managed by this cloud provider. +func (ccp *cherryCloudProvider) AddNodeGroup(group cherryNodeGroup) { + ccp.nodeGroups = append(ccp.nodeGroups, group) +} + +// NodeGroupForNode returns the node group that a given node belongs to. +// +// Since only a single node group is currently supported, the first node group is always returned. +func (ccp *cherryCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) { + // ignore control plane nodes + if _, found := node.ObjectMeta.Labels[ccp.controllerNodeLabel]; found { + return nil, nil + } + nodeGroupId, err := ccp.cherryManager.NodeGroupForNode(node.ObjectMeta.Labels, node.Spec.ProviderID) + if err != nil { + return nil, err + } + if nodeGroupId == "" { + return nil, nil + } + for i, nodeGroup := range ccp.nodeGroups { + if nodeGroup.Id() == nodeGroupId { + return &(ccp.nodeGroups[i]), nil + } + } + return nil, nil +} + +// Pricing returns pricing model for this cloud provider or error if not available. +func (ccp *cherryCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) { + return nil, cloudprovider.ErrNotImplemented +} + +// GetAvailableMachineTypes is not implemented. +func (ccp *cherryCloudProvider) GetAvailableMachineTypes() ([]string, error) { + return []string{}, nil +} + +// NewNodeGroup is not implemented. +func (ccp *cherryCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string, + taints []apiv1.Taint, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// GetResourceLimiter returns resource constraints for the cloud provider +func (ccp *cherryCloudProvider) GetResourceLimiter() (*cloudprovider.ResourceLimiter, error) { + return ccp.resourceLimiter, nil +} + +// Refresh is called before every autoscaler main loop. +// +// Currently only prints debug information. +func (ccp *cherryCloudProvider) Refresh() error { + for _, nodegroup := range ccp.nodeGroups { + klog.V(3).Info(nodegroup.Debug()) + } + return nil +} + +// Cleanup currently does nothing. +func (ccp *cherryCloudProvider) Cleanup() error { + return nil +} + +// BuildCherry is called by the autoscaler to build a Cherry Servers cloud provider. +// +// The cherryManager is created here, and the node groups are created +// based on the specs provided via the command line parameters. +func BuildCherry(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { + var config io.ReadCloser + + if opts.CloudConfig != "" { + var err error + config, err = os.Open(opts.CloudConfig) + if err != nil { + klog.Fatalf("Couldn't open cloud provider configuration %s: %#v", opts.CloudConfig, err) + } + defer config.Close() + } + + manager, err := createCherryManager(config, do, opts) + if err != nil { + klog.Fatalf("Failed to create cherry manager: %v", err) + } + + provider, err := buildCherryCloudProvider(manager, rl) + if err != nil { + klog.Fatalf("Failed to create cherry cloud provider: %v", err) + } + + if len(do.NodeGroupSpecs) == 0 { + klog.Fatalf("Must specify at least one node group with --nodes=::,...") + } + + validNodepoolName := regexp.MustCompile(`^[a-z0-9A-Z]+[a-z0-9A-Z\-\.\_]*[a-z0-9A-Z]+$|^[a-z0-9A-Z]{1}$`) + + for _, nodegroupSpec := range do.NodeGroupSpecs { + spec, err := dynamic.SpecFromString(nodegroupSpec, scaleToZeroSupported) + if err != nil { + klog.Fatalf("Could not parse node group spec %s: %v", nodegroupSpec, err) + } + + if !validNodepoolName.MatchString(spec.Name) || len(spec.Name) > 63 { + klog.Fatalf("Invalid nodepool name: %s\nMust be a valid kubernetes label value", spec.Name) + } + + targetSize, err := manager.nodeGroupSize(spec.Name) + if err != nil { + klog.Fatalf("Could not set current nodes in node group: %v", err) + } + ng := newCherryNodeGroup(manager, spec.Name, spec.MinSize, spec.MaxSize, targetSize, waitForStatusTimeStep, deleteNodesBatchingDelay) + + provider.AddNodeGroup(ng) + } + + return provider +} diff --git a/cluster-autoscaler/cloudprovider/cherryservers/cherry_manager.go b/cluster-autoscaler/cloudprovider/cherryservers/cherry_manager.go new file mode 100644 index 0000000000..1cfd02cfa5 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/cherry_manager.go @@ -0,0 +1,68 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cherryservers + +import ( + "fmt" + "io" + "os" + + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +const ( + defaultManager = "rest" +) + +// NodeRef stores the name, machineID and providerID of a node. +type NodeRef struct { + Name string + MachineID string + ProviderID string + IPs []string +} + +// cherryManager is an interface for the basic interactions with the cluster. +type cherryManager interface { + nodeGroupSize(nodegroup string) (int, error) + createNodes(nodegroup string, nodes int) error + getNodes(nodegroup string) ([]string, error) + getNodeNames(nodegroup string) ([]string, error) + deleteNodes(nodegroup string, nodes []NodeRef, updatedNodeCount int) error + templateNodeInfo(nodegroup string) (*schedulerframework.NodeInfo, error) + NodeGroupForNode(labels map[string]string, nodeId string) (string, error) +} + +// createCherryManager creates the desired implementation of cherryManager. +// Currently reads the environment variable CHERRY_MANAGER to find which to create, +// and falls back to a default if the variable is not found. +func createCherryManager(configReader io.Reader, discoverOpts cloudprovider.NodeGroupDiscoveryOptions, opts config.AutoscalingOptions) (cherryManager, error) { + // For now get manager from env var, can consider adding flag later + manager, ok := os.LookupEnv("CHERRY_MANAGER") + if !ok { + manager = defaultManager + } + + switch manager { + case "rest": + return createCherryManagerRest(configReader, discoverOpts, opts) + } + + return nil, fmt.Errorf("cherry manager does not exist: %s", manager) +} diff --git a/cluster-autoscaler/cloudprovider/cherryservers/cherry_manager_rest.go b/cluster-autoscaler/cloudprovider/cherryservers/cherry_manager_rest.go new file mode 100644 index 0000000000..5a1f577be4 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/cherry_manager_rest.go @@ -0,0 +1,684 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cherryservers + +import ( + "bytes" + "context" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "math/rand" + "net/http" + "net/http/httputil" + "net/url" + "os" + "path" + "strconv" + "strings" + "text/template" + "time" + + "gopkg.in/gcfg.v1" + apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" + "k8s.io/autoscaler/cluster-autoscaler/version" + klog "k8s.io/klog/v2" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +const ( + userAgent = "kubernetes/cluster-autoscaler/" + version.ClusterAutoscalerVersion + expectedAPIContentTypePrefix = "application/json" + cherryPrefix = "cherryservers://" + baseURL = "https://api.cherryservers.com/v1/" +) + +type instanceType struct { + InstanceName string + CPU int64 + MemoryMb int64 + GPU int64 +} + +type cherryManagerNodePool struct { + clusterName string + projectID int + apiServerEndpoint string + region string + plan int + os string + cloudinit string + hostnamePattern string + waitTimeStep time.Duration +} + +type cherryManagerRest struct { + authToken string + baseURL *url.URL + nodePools map[string]*cherryManagerNodePool + plans map[int]*Plan + planUpdate time.Time +} + +// ConfigNodepool options only include the project-id for now +type ConfigNodepool struct { + ClusterName string `gcfg:"cluster-name"` + ProjectID int `gcfg:"project-id"` + APIServerEndpoint string `gcfg:"api-server-endpoint"` + Region string `gcfg:"region"` + Plan string `gcfg:"plan"` + OS string `gcfg:"os"` + CloudInit string `gcfg:"cloudinit"` + HostnamePattern string `gcfg:"hostname-pattern"` +} + +// ConfigFile is used to read and store information from the cloud configuration file +type ConfigFile struct { + DefaultNodegroupdef ConfigNodepool `gcfg:"global"` + Nodegroupdef map[string]*ConfigNodepool `gcfg:"nodegroupdef"` +} + +// CloudInitTemplateData represents the variables that can be used in cloudinit templates +type CloudInitTemplateData struct { + BootstrapTokenID string + BootstrapTokenSecret string + APIServerEndpoint string + NodeGroup string +} + +// HostnameTemplateData represents the template variables used to construct host names for new nodes +type HostnameTemplateData struct { + ClusterName string + NodeGroup string + RandString8 string +} + +// ErrorResponse is the http response used on errors +type ErrorResponse struct { + Response *http.Response + Errors []string `json:"errors"` + SingleError string `json:"error"` +} + +var multipliers = map[string]int64{ + "KB": 1024, + "MB": 1024 * 1024, + "GB": 1024 * 1024 * 1024, + "TB": 1024 * 1024 * 1024 * 1024, +} + +// Error implements the error interface +func (r *ErrorResponse) Error() string { + return fmt.Sprintf("%v %v: %d %v %v", + r.Response.Request.Method, r.Response.Request.URL, r.Response.StatusCode, strings.Join(r.Errors, ", "), r.SingleError) +} + +// Find returns the smallest index i at which x == a[i], +// or len(a) if there is no such index. +func Find(a []string, x string) int { + for i, n := range a { + if x == n { + return i + } + } + return len(a) +} + +// Contains tells whether a contains x. +func Contains(a []string, x string) bool { + for _, n := range a { + if x == n { + return true + } + } + return false +} + +// createCherryManagerRest sets up the client and returns +// an cherryManagerRest. +func createCherryManagerRest(configReader io.Reader, discoverOpts cloudprovider.NodeGroupDiscoveryOptions, opts config.AutoscalingOptions) (*cherryManagerRest, error) { + // Initialize ConfigFile instance + cfg := ConfigFile{ + DefaultNodegroupdef: ConfigNodepool{}, + Nodegroupdef: map[string]*ConfigNodepool{}, + } + + if configReader != nil { + if err := gcfg.ReadInto(&cfg, configReader); err != nil { + klog.Errorf("Couldn't read config: %v", err) + return nil, err + } + } + + var manager cherryManagerRest + manager.nodePools = make(map[string]*cherryManagerNodePool) + + if _, ok := cfg.Nodegroupdef["default"]; !ok { + cfg.Nodegroupdef["default"] = &cfg.DefaultNodegroupdef + } + + if *cfg.Nodegroupdef["default"] == (ConfigNodepool{}) { + klog.Fatalf("No \"default\" or [Global] nodepool definition was found") + } + + cherryAuthToken := os.Getenv("CHERRY_AUTH_TOKEN") + if len(cherryAuthToken) == 0 { + klog.Fatalf("CHERRY_AUTH_TOKEN is required and missing") + } + + manager.authToken = cherryAuthToken + base, err := url.Parse(baseURL) + if err != nil { + return nil, fmt.Errorf("invalid baseURL %s: %v", baseURL, err) + } + + manager.baseURL = base + + projectID := cfg.Nodegroupdef["default"].ProjectID + apiServerEndpoint := cfg.Nodegroupdef["default"].APIServerEndpoint + + for key, nodepool := range cfg.Nodegroupdef { + if opts.ClusterName == "" && nodepool.ClusterName == "" { + klog.Fatalf("The cluster-name parameter must be set") + } else if opts.ClusterName != "" && nodepool.ClusterName == "" { + nodepool.ClusterName = opts.ClusterName + } + + plan, err := strconv.ParseInt(nodepool.Plan, 10, 32) + if err != nil { + return nil, fmt.Errorf("invalid plan %s for nodepool %s, must be integer: %v", nodepool.Plan, key, err) + } + manager.nodePools[key] = &cherryManagerNodePool{ + projectID: projectID, + apiServerEndpoint: apiServerEndpoint, + clusterName: nodepool.ClusterName, + region: nodepool.Region, + plan: int(plan), + os: nodepool.OS, + cloudinit: nodepool.CloudInit, + hostnamePattern: nodepool.HostnamePattern, + } + } + + return &manager, nil +} + +func (mgr *cherryManagerRest) request(ctx context.Context, method, pathUrl string, jsonData []byte) ([]byte, error) { + u, err := url.Parse(pathUrl) + if err != nil { + return nil, fmt.Errorf("invalid request path %s: %v", pathUrl, err) + } + reqUrl := mgr.baseURL.ResolveReference(u) + + req, err := http.NewRequestWithContext(ctx, method, reqUrl.String(), bytes.NewBuffer(jsonData)) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", mgr.authToken)) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("User-Agent", userAgent) + dump, _ := httputil.DumpRequestOut(req, true) + klog.V(2).Infof("%s", string(dump)) + + client := &http.Client{} + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to perform request: %w", err) + } + + defer func() { + if err := resp.Body.Close(); err != nil { + klog.Errorf("failed to close response body: %v", err) + } + }() + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response body: %w", err) + } + + ct := resp.Header.Get("Content-Type") + if !strings.HasPrefix(ct, expectedAPIContentTypePrefix) { + errorResponse := &ErrorResponse{Response: resp} + errorResponse.SingleError = fmt.Sprintf("Unexpected Content-Type: %s with status: %s", ct, resp.Status) + return nil, errorResponse + } + + // If the response is good return early + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + return body, nil + } + + errorResponse := &ErrorResponse{Response: resp} + + if len(body) > 0 { + if err := json.Unmarshal(body, errorResponse); err != nil { + return nil, fmt.Errorf("failed to unmarshal response body: %w", err) + } + } + + return nil, errorResponse +} + +func (mgr *cherryManagerRest) listCherryPlans(ctx context.Context) (Plans, error) { + req := "plans" + + result, err := mgr.request(ctx, "GET", req, []byte(``)) + if err != nil { + return nil, err + } + + var plans Plans + if err := json.Unmarshal(result, &plans); err != nil { + return nil, fmt.Errorf("failed to unmarshal response body: %w", err) + } + + return plans, nil +} + +func (mgr *cherryManagerRest) listCherryServers(ctx context.Context) ([]Server, error) { + pool := mgr.getNodePoolDefinition("default") + req := path.Join("projects", fmt.Sprintf("%d", pool.projectID), "servers") + + result, err := mgr.request(ctx, "GET", req, []byte(``)) + if err != nil { + return nil, err + } + + var servers []Server + if err := json.Unmarshal(result, &servers); err != nil { + return nil, fmt.Errorf("failed to unmarshal response body: %w", err) + } + + return servers, nil +} + +func (mgr *cherryManagerRest) getCherryServer(ctx context.Context, id string) (*Server, error) { + req := path.Join("servers", id) + + result, err := mgr.request(ctx, "GET", req, []byte(``)) + if err != nil { + return nil, err + } + + var server Server + if err := json.Unmarshal(result, &server); err != nil { + return nil, fmt.Errorf("failed to unmarshal response body: %w", err) + } + + return &server, nil +} + +func (mgr *cherryManagerRest) NodeGroupForNode(labels map[string]string, nodeId string) (string, error) { + if nodegroup, ok := labels["pool"]; ok { + return nodegroup, nil + } + + trimmedNodeId := strings.TrimPrefix(nodeId, cherryPrefix) + + server, err := mgr.getCherryServer(context.TODO(), trimmedNodeId) + if err != nil { + return "", fmt.Errorf("Could not find group for node: %s %s", nodeId, err) + } + for k, v := range server.Tags { + if k == "k8s-nodepool" { + return v, nil + } + } + return "", nil +} + +// nodeGroupSize gets the current size of the nodegroup as reported by Cherry Servers tags. +func (mgr *cherryManagerRest) nodeGroupSize(nodegroup string) (int, error) { + servers, err := mgr.listCherryServers(context.TODO()) + if err != nil { + return 0, fmt.Errorf("failed to list servers: %w", err) + } + + // Get the count of servers tagged as nodegroup members + count := 0 + for _, s := range servers { + clusterName, ok := s.Tags["k8s-cluster"] + if !ok || clusterName != mgr.getNodePoolDefinition(nodegroup).clusterName { + continue + } + nodepoolName, ok := s.Tags["k8s-nodepool"] + if !ok || nodegroup != nodepoolName { + continue + } + count++ + } + klog.V(3).Infof("Nodegroup %s: %d/%d", nodegroup, count, len(servers)) + return count, nil +} + +func randString8() string { + n := 8 + rand.Seed(time.Now().UnixNano()) + letterRunes := []rune("acdefghijklmnopqrstuvwxyz") + b := make([]rune, n) + for i := range b { + b[i] = letterRunes[rand.Intn(len(letterRunes))] + } + return string(b) +} + +// createNode creates a cluster node by creating a server with the appropriate userdata to add it to the cluster. +func (mgr *cherryManagerRest) createNode(ctx context.Context, cloudinit, nodegroup string) error { + udvars := CloudInitTemplateData{ + BootstrapTokenID: os.Getenv("BOOTSTRAP_TOKEN_ID"), + BootstrapTokenSecret: os.Getenv("BOOTSTRAP_TOKEN_SECRET"), + APIServerEndpoint: mgr.getNodePoolDefinition(nodegroup).apiServerEndpoint, + NodeGroup: nodegroup, + } + + ud, err := renderTemplate(cloudinit, udvars) + if err != nil { + return fmt.Errorf("failed to create userdata from template: %w", err) + } + + hnvars := HostnameTemplateData{ + ClusterName: mgr.getNodePoolDefinition(nodegroup).clusterName, + NodeGroup: nodegroup, + RandString8: randString8(), + } + hn, err := renderTemplate(mgr.getNodePoolDefinition(nodegroup).hostnamePattern, hnvars) + if err != nil { + return fmt.Errorf("failed to create hostname from template: %w", err) + } + + cr := &CreateServer{ + Hostname: hn, + Region: mgr.getNodePoolDefinition(nodegroup).region, + PlanID: mgr.getNodePoolDefinition(nodegroup).plan, + Image: mgr.getNodePoolDefinition(nodegroup).os, + ProjectID: mgr.getNodePoolDefinition(nodegroup).projectID, + UserData: base64.StdEncoding.EncodeToString([]byte(ud)), + Tags: &map[string]string{"k8s-cluster": mgr.getNodePoolDefinition(nodegroup).clusterName, "k8s-nodepool": nodegroup}, + } + + if err := mgr.createServerRequest(ctx, cr, nodegroup); err != nil { + return fmt.Errorf("failed to create server: %w", err) + } + + klog.Infof("Created new node on Cherry Servers.") + + return nil +} + +// createNodes provisions new nodes at Cherry Servers and bootstraps them in the cluster. +func (mgr *cherryManagerRest) createNodes(nodegroup string, nodes int) error { + klog.Infof("Updating node count to %d for nodegroup %s", nodes, nodegroup) + + cloudinit, err := base64.StdEncoding.DecodeString(mgr.getNodePoolDefinition(nodegroup).cloudinit) + if err != nil { + err = fmt.Errorf("could not decode cloudinit script: %w", err) + klog.Fatal(err) + return err + } + + errList := make([]error, 0, nodes) + for i := 0; i < nodes; i++ { + errList = append(errList, mgr.createNode(context.TODO(), string(cloudinit), nodegroup)) + } + + return utilerrors.NewAggregate(errList) +} + +func (mgr *cherryManagerRest) createServerRequest(ctx context.Context, cr *CreateServer, nodegroup string) error { + req := path.Join("projects", fmt.Sprintf("%d", cr.ProjectID), "servers") + + jsonValue, err := json.Marshal(cr) + if err != nil { + return fmt.Errorf("failed to marshal create request: %w", err) + } + + klog.Infof("Creating new node") + if _, err := mgr.request(ctx, "POST", req, jsonValue); err != nil { + return err + } + + return nil +} + +// getNodes should return ProviderIDs for all nodes in the node group, +// used to find any nodes which are unregistered in kubernetes. +func (mgr *cherryManagerRest) getNodes(nodegroup string) ([]string, error) { + // Get node ProviderIDs by getting server IDs from Cherry Servers + servers, err := mgr.listCherryServers(context.TODO()) + if err != nil { + return nil, fmt.Errorf("failed to list servers: %w", err) + } + + nodes := []string{} + + for _, s := range servers { + clusterName, ok := s.Tags["k8s-cluster"] + if !ok || clusterName != mgr.getNodePoolDefinition(nodegroup).clusterName { + continue + } + nodepoolName, ok := s.Tags["k8s-nodepool"] + if !ok || nodegroup != nodepoolName { + continue + } + nodes = append(nodes, fmt.Sprintf("%s%d", cherryPrefix, s.ID)) + } + + return nodes, nil +} + +// getNodeNames should return Names for all nodes in the node group, +// used to find any nodes which are unregistered in kubernetes. +func (mgr *cherryManagerRest) getNodeNames(nodegroup string) ([]string, error) { + servers, err := mgr.listCherryServers(context.TODO()) + if err != nil { + return nil, fmt.Errorf("failed to list servers: %w", err) + } + + nodes := []string{} + + for _, s := range servers { + clusterName, ok := s.Tags["k8s-cluster"] + if !ok || clusterName != mgr.getNodePoolDefinition(nodegroup).clusterName { + continue + } + nodepoolName, ok := s.Tags["k8s-nodepool"] + if !ok || nodegroup != nodepoolName { + continue + } + nodes = append(nodes, s.Hostname) + } + + return nodes, nil +} + +func (mgr *cherryManagerRest) deleteServer(ctx context.Context, nodegroup string, id int) error { + req := path.Join("servers", fmt.Sprintf("%d", id)) + + result, err := mgr.request(context.TODO(), "DELETE", req, []byte("")) + if err != nil { + return err + } + + klog.Infof("Deleted server %s: %v", id, result) + return nil + +} + +// deleteNodes deletes nodes by passing a comma separated list of names or IPs +func (mgr *cherryManagerRest) deleteNodes(nodegroup string, nodes []NodeRef, updatedNodeCount int) error { + klog.Infof("Deleting %d nodes from nodegroup %s", len(nodes), nodegroup) + klog.V(2).Infof("Deleting nodes %v", nodes) + + ctx := context.TODO() + + errList := make([]error, 0, len(nodes)) + + servers, err := mgr.listCherryServers(ctx) + if err != nil { + return fmt.Errorf("failed to list servers: %w", err) + } + klog.V(2).Infof("total servers found: %d", len(servers)) + + for _, n := range nodes { + fakeNode := false + + if n.Name == n.ProviderID { + klog.Infof("Fake Node: %s", n.Name) + fakeNode = true + } else { + klog.Infof("Node %s - %s - %s", n.Name, n.MachineID, n.IPs) + } + + // Get the count of servers tagged as nodegroup + for _, s := range servers { + klog.V(2).Infof("Checking server %v", s) + clusterName, ok := s.Tags["k8s-cluster"] + if !ok || clusterName != mgr.getNodePoolDefinition(nodegroup).clusterName { + continue + } + nodepoolName, ok := s.Tags["k8s-nodepool"] + if !ok || nodegroup != nodepoolName { + continue + } + klog.V(2).Infof("nodegroup match %s %s", s.Hostname, n.Name) + + trimmedProviderID := strings.TrimPrefix(n.ProviderID, cherryPrefix) + nodeID, err := strconv.ParseInt(trimmedProviderID, 10, 32) + if err != nil { + errList = append(errList, fmt.Errorf("invalid node ID is not integer for %s", n.Name)) + } + + switch { + case s.Hostname == n.Name: + klog.V(1).Infof("Matching Cherry Server %s - %s", s.Hostname, s.ID) + errList = append(errList, mgr.deleteServer(ctx, nodegroup, s.ID)) + case fakeNode && int(nodeID) == s.ID: + klog.V(1).Infof("Fake Node %s", s.ID) + errList = append(errList, mgr.deleteServer(ctx, nodegroup, s.ID)) + } + } + } + + return utilerrors.NewAggregate(errList) +} + +// BuildGenericLabels builds basic labels for Cherry Servers nodes +func BuildGenericLabels(nodegroup string, plan *Plan) map[string]string { + result := make(map[string]string) + + //result[kubeletapis.LabelArch] = "amd64" + //result[kubeletapis.LabelOS] = "linux" + result[apiv1.LabelInstanceType] = plan.Name + //result[apiv1.LabelZoneRegion] = "" + //result[apiv1.LabelZoneFailureDomain] = "0" + //result[apiv1.LabelHostname] = "" + result["pool"] = nodegroup + + return result +} + +// templateNodeInfo returns a NodeInfo with a node template based on the Cherry Servers plan +// that is used to create nodes in a given node group. +func (mgr *cherryManagerRest) templateNodeInfo(nodegroup string) (*schedulerframework.NodeInfo, error) { + node := apiv1.Node{} + nodeName := fmt.Sprintf("%s-asg-%d", nodegroup, rand.Int63()) + node.ObjectMeta = metav1.ObjectMeta{ + Name: nodeName, + SelfLink: fmt.Sprintf("/api/v1/nodes/%s", nodeName), + Labels: map[string]string{}, + } + node.Status = apiv1.NodeStatus{ + Capacity: apiv1.ResourceList{}, + } + + // check if we need to update our plans + if time.Since(mgr.planUpdate) > time.Hour*1 { + plans, err := mgr.listCherryPlans(context.TODO()) + if err != nil { + return nil, fmt.Errorf("unable to update cherry plans: %v", err) + } + mgr.plans = map[int]*Plan{} + for _, plan := range plans { + mgr.plans[plan.ID] = &plan + } + } + planID := mgr.getNodePoolDefinition(nodegroup).plan + cherryPlan, ok := mgr.plans[planID] + if !ok { + klog.V(5).Infof("no plan found for planID %d", planID) + return nil, fmt.Errorf("cherry plan %q not supported", mgr.getNodePoolDefinition(nodegroup).plan) + } + var ( + memoryMultiplier int64 + ) + if memoryMultiplier, ok = multipliers[cherryPlan.Specs.Memory.Unit]; !ok { + memoryMultiplier = 1 + } + node.Status.Capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI) + node.Status.Capacity[apiv1.ResourceCPU] = *resource.NewQuantity(int64(cherryPlan.Specs.Cpus.Cores), resource.DecimalSI) + node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(0, resource.DecimalSI) + node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(int64(cherryPlan.Specs.Memory.Total)*memoryMultiplier, resource.DecimalSI) + + node.Status.Allocatable = node.Status.Capacity + node.Status.Conditions = cloudprovider.BuildReadyConditions() + + // GenericLabels + node.Labels = cloudprovider.JoinStringMaps(node.Labels, BuildGenericLabels(nodegroup, cherryPlan)) + + nodeInfo := schedulerframework.NewNodeInfo(cloudprovider.BuildKubeProxy(nodegroup)) + nodeInfo.SetNode(&node) + return nodeInfo, nil +} + +func (mgr *cherryManagerRest) getNodePoolDefinition(nodegroup string) *cherryManagerNodePool { + NodePoolDefinition, ok := mgr.nodePools[nodegroup] + if !ok { + NodePoolDefinition, ok = mgr.nodePools["default"] + if !ok { + klog.Fatalf("No default cloud-config was found") + } + klog.V(1).Infof("No cloud-config was found for %s, using default", nodegroup) + } + + return NodePoolDefinition +} + +func renderTemplate(str string, vars interface{}) (string, error) { + tmpl, err := template.New("tmpl").Parse(str) + if err != nil { + return "", fmt.Errorf("failed to parse template %q, %w", str, err) + } + + var tmplBytes bytes.Buffer + + if err := tmpl.Execute(&tmplBytes, vars); err != nil { + return "", fmt.Errorf("failed to execute template: %w", err) + } + + return tmplBytes.String(), nil +} diff --git a/cluster-autoscaler/cloudprovider/cherryservers/cherry_manager_rest_test.go b/cluster-autoscaler/cloudprovider/cherryservers/cherry_manager_rest_test.go new file mode 100644 index 0000000000..6c160b60a4 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/cherry_manager_rest_test.go @@ -0,0 +1,119 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cherryservers + +import ( + "context" + "fmt" + "net/url" + "os" + "strings" + "testing" + + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" +) + +// API call responses contain only the minimum information required by the cluster-autoscaler +const listCherryServersResponse = ` +[{"id":1000,"name":"server-1000","hostname":"k8s-cluster2-pool3-gndxdmmw","state":"active","tags":{"k8s-cluster":"cluster2","k8s-nodepool":"pool3"}},{"id":1001,"name":"server-1001","hostname":"k8s-cluster2-master","state":"active","tags":{"k8s-cluster":"cluster2"}}] +` + +const listCherryServersResponseAfterIncreasePool3 = ` +[{"id":2000,"name":"server-2000","hostname":"k8s-cluster2-pool3-xpnrwgdf","state":"active","tags":{"k8s-cluster":"cluster2","k8s-nodepool":"pool3"}},{"id":1000,"name":"server-1000","hostname":"k8s-cluster2-pool3-gndxdmmw","state":"active","tags":{"k8s-cluster":"cluster2","k8s-nodepool":"pool3"}},{"id":1001,"name":"server-1001","hostname":"k8s-cluster2-master","state":"active","tags":{"k8s-cluster":"cluster2"}}] +` + +const listCherryServersResponseAfterIncreasePool2 = ` +[{"id":3000,"name":"server-3001","hostname":"k8s-cluster2-pool2-jssxcyzz","state":"active","tags":{"k8s-cluster":"cluster2","k8s-nodepool":"pool2"}},{"id":2000,"name":"server-2000","hostname":"k8s-cluster2-pool3-xpnrwgdf","state":"active","tags":{"k8s-cluster":"cluster2","k8s-nodepool":"pool3"}},{"id":1000,"name":"server-1000","hostname":"k8s-cluster2-pool3-gndxdmmw","state":"active","tags":{"k8s-cluster":"cluster2","k8s-nodepool":"pool3"}},{"id":1001,"name":"server-1001","hostname":"k8s-cluster2-master","state":"active","tags":{"k8s-cluster":"cluster2"}}] +` + +const cloudinitDefault = "IyEvYmluL2Jhc2gKZXhwb3J0IERFQklBTl9GUk9OVEVORD1ub25pbnRlcmFjdGl2ZQphcHQtZ2V0IHVwZGF0ZSAmJiBhcHQtZ2V0IGluc3RhbGwgLXkgYXB0LXRyYW5zcG9ydC1odHRwcyBjYS1jZXJ0aWZpY2F0ZXMgY3VybCBzb2Z0d2FyZS1wcm9wZXJ0aWVzLWNvbW1vbgpjdXJsIC1mc1NMIGh0dHBzOi8vZG93bmxvYWQuZG9ja2VyLmNvbS9saW51eC91YnVudHUvZ3BnIHwgYXB0LWtleSBhZGQgLQpjdXJsIC1zIGh0dHBzOi8vcGFja2FnZXMuY2xvdWQuZ29vZ2xlLmNvbS9hcHQvZG9jL2FwdC1rZXkuZ3BnIHwgYXB0LWtleSBhZGQgLQpjYXQgPDxFT0YgPi9ldGMvYXB0L3NvdXJjZXMubGlzdC5kL2t1YmVybmV0ZXMubGlzdApkZWIgaHR0cHM6Ly9hcHQua3ViZXJuZXRlcy5pby8ga3ViZXJuZXRlcy14ZW5pYWwgbWFpbgpFT0YKYWRkLWFwdC1yZXBvc2l0b3J5ICAgImRlYiBbYXJjaD1hbWQ2NF0gaHR0cHM6Ly9kb3dubG9hZC5kb2NrZXIuY29tL2xpbnV4L3VidW50dSAgICQobHNiX3JlbGVhc2UgLWNzKSAgIHN0YWJsZSIKYXB0LWdldCB1cGRhdGUKYXB0LWdldCB1cGdyYWRlIC15CmFwdC1nZXQgaW5zdGFsbCAteSBrdWJlbGV0PTEuMTcuNC0wMCBrdWJlYWRtPTEuMTcuNC0wMCBrdWJlY3RsPTEuMTcuNC0wMAphcHQtbWFyayBob2xkIGt1YmVsZXQga3ViZWFkbSBrdWJlY3RsCmN1cmwgLWZzU0wgaHR0cHM6Ly9kb3dubG9hZC5kb2NrZXIuY29tL2xpbnV4L3VidW50dS9ncGcgfCBhcHQta2V5IGFkZCAtCmFkZC1hcHQtcmVwb3NpdG9yeSAiZGViIFthcmNoPWFtZDY0XSBodHRwczovL2Rvd25sb2FkLmRvY2tlci5jb20vbGludXgvdWJ1bnR1IGJpb25pYyBzdGFibGUiCmFwdCB1cGRhdGUKYXB0IGluc3RhbGwgLXkgZG9ja2VyLWNlPTE4LjA2LjJ+Y2V+My0wfnVidW50dQpjYXQgPiAvZXRjL2RvY2tlci9kYWVtb24uanNvbiA8PEVPRgp7CiAgImV4ZWMtb3B0cyI6IFsibmF0aXZlLmNncm91cGRyaXZlcj1zeXN0ZW1kIl0sCiAgImxvZy1kcml2ZXIiOiAianNvbi1maWxlIiwKICAibG9nLW9wdHMiOiB7CiAgICAibWF4LXNpemUiOiAiMTAwbSIKICB9LAogICJzdG9yYWdlLWRyaXZlciI6ICJvdmVybGF5MiIKfQpFT0YKbWtkaXIgLXAgL2V0Yy9zeXN0ZW1kL3N5c3RlbS9kb2NrZXIuc2VydmljZS5kCnN5c3RlbWN0bCBkYWVtb24tcmVsb2FkCnN5c3RlbWN0bCByZXN0YXJ0IGRvY2tlcgpzd2Fwb2ZmIC1hCm12IC9ldGMvZnN0YWIgL2V0Yy9mc3RhYi5vbGQgJiYgZ3JlcCAtdiBzd2FwIC9ldGMvZnN0YWIub2xkID4gL2V0Yy9mc3RhYgpjYXQgPDxFT0YgfCB0ZWUgL2V0Yy9kZWZhdWx0L2t1YmVsZXQKS1VCRUxFVF9FWFRSQV9BUkdTPS0tY2xvdWQtcHJvdmlkZXI9ZXh0ZXJuYWwgLS1ub2RlLWxhYmVscz1wb29sPXt7Lk5vZGVHcm91cH19CkVPRgprdWJlYWRtIGpvaW4gLS1kaXNjb3ZlcnktdG9rZW4tdW5zYWZlLXNraXAtY2EtdmVyaWZpY2F0aW9uIC0tdG9rZW4ge3suQm9vdHN0cmFwVG9rZW5JRH19Lnt7LkJvb3RzdHJhcFRva2VuU2VjcmV0fX0ge3suQVBJU2VydmVyRW5kcG9pbnR9fQo=" + +var useRealEndpoint bool + +func init() { + useRealEndpoint = strings.TrimSpace(os.Getenv("CHERRY_USE_PRODUCTION_API")) == "true" +} + +// newTestCherryManagerRest creates a cherryManagerRest with two nodepools. +// If the url is provided, uses that as the Cherry Servers API endpoint, otherwise +// uses the system default. +func newTestCherryManagerRest(t *testing.T, serverUrl string) *cherryManagerRest { + poolUrl := baseURL + if serverUrl != "" { + poolUrl = serverUrl + } + u, err := url.Parse(poolUrl) + if err != nil { + t.Fatalf("invalid request path %s: %v", poolUrl, err) + } + manager := &cherryManagerRest{ + baseURL: u, + nodePools: map[string]*cherryManagerNodePool{ + "default": { + clusterName: "cluster2", + projectID: 10001, + apiServerEndpoint: "147.75.102.15:6443", + region: "EU-Nord-1", + plan: 116, + os: "ubuntu_18_04", + cloudinit: cloudinitDefault, + hostnamePattern: "k8s-{{.ClusterName}}-{{.NodeGroup}}-{{.RandString8}}", + }, + "pool2": { + clusterName: "cluster2", + projectID: 10001, + apiServerEndpoint: "147.75.102.15:6443", + region: "EU-Nord-1", + plan: 116, + os: "ubuntu_18_04", + cloudinit: cloudinitDefault, + hostnamePattern: "k8s-{{.ClusterName}}-{{.NodeGroup}}-{{.RandString8}}", + }, + }, + } + return manager +} +func TestListCherryServers(t *testing.T) { + server := NewHttpServerMock(MockFieldContentType, MockFieldResponse) + defer server.Close() + + var m *cherryManagerRest + // Set up a mock Cherry Servers API + if useRealEndpoint { + // If auth token set in env, hit the actual Cherry Servers API + m = newTestCherryManagerRest(t, "") + } else { + m = newTestCherryManagerRest(t, server.URL) + t.Logf("server URL: %v", server.URL) + t.Logf("default cherryManager baseURL: %v", m.baseURL) + // should get called 2 times: once for listCherryServers() below, and once + // as part of nodeGroupSize() + server.On("handle", fmt.Sprintf("/projects/%d/servers", m.nodePools["default"].projectID)).Return("application/json", listCherryServersResponse).Times(2) + } + + _, err := m.listCherryServers(context.TODO()) + assert.NoError(t, err) + + c, err := m.nodeGroupSize("pool3") + assert.NoError(t, err) + assert.Equal(t, int(1), c) // One server in nodepool + + mock.AssertExpectationsForObjects(t, server) +} diff --git a/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group.go b/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group.go new file mode 100644 index 0000000000..744bd52b66 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group.go @@ -0,0 +1,311 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cherryservers + +import ( + "fmt" + "sync" + "time" + + apiv1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" + klog "k8s.io/klog/v2" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +const ( + waitForStatusTimeStep = 30 * time.Second + waitForUpdateStatusTimeout = 2 * time.Minute + waitForCompleteStatusTimout = 10 * time.Minute + scaleToZeroSupported = true + + // Time that the goroutine that first acquires clusterUpdateMutex + // in deleteNodes should wait for other synchronous calls to deleteNodes. + deleteNodesBatchingDelay = 2 * time.Second +) + +// cherryNodeGroup implements NodeGroup interface from cluster-autoscaler/cloudprovider. +// +// Represents a homogeneous collection of nodes within a cluster, +// which can be dynamically resized between a minimum and maximum +// number of nodes. +type cherryNodeGroup struct { + cherryManager cherryManager + id string + + clusterUpdateMutex *sync.Mutex + + minSize int + maxSize int + // Stored as a pointer so that when autoscaler copies the nodegroup it can still update the target size + targetSize int + + nodesToDelete []*apiv1.Node + nodesToDeleteMutex *sync.Mutex + + waitTimeStep time.Duration + deleteBatchingDelay time.Duration + + // Used so that only one DeleteNodes goroutine has to get the node group size at the start of the deletion + deleteNodesCachedSize int + deleteNodesCachedSizeAt time.Time +} + +func newCherryNodeGroup(manager cherryManager, name string, minSize, maxSize, targetSize int, wait, deleteBatching time.Duration) cherryNodeGroup { + ng := cherryNodeGroup{ + cherryManager: manager, + id: name, + clusterUpdateMutex: &sync.Mutex{}, + nodesToDeleteMutex: &sync.Mutex{}, + minSize: minSize, + maxSize: maxSize, + targetSize: targetSize, + waitTimeStep: wait, + deleteBatchingDelay: deleteBatching, + } + return ng +} + +// IncreaseSize increases the number of nodes by replacing the cluster's node_count. +// +// Takes precautions so that the cluster is not modified while in an UPDATE_IN_PROGRESS state. +// Blocks until the cluster has reached UPDATE_COMPLETE. +func (ng *cherryNodeGroup) IncreaseSize(delta int) error { + ng.clusterUpdateMutex.Lock() + defer ng.clusterUpdateMutex.Unlock() + + if delta <= 0 { + return fmt.Errorf("size increase must be positive") + } + + size, err := ng.cherryManager.nodeGroupSize(ng.id) + if err != nil { + return fmt.Errorf("could not check current nodegroup size: %v", err) + } + if size+delta > ng.MaxSize() { + return fmt.Errorf("size increase too large, desired:%d max:%d", size+delta, ng.MaxSize()) + } + + klog.V(0).Infof("Increasing size by %d, %d->%d", delta, ng.targetSize, ng.targetSize+delta) + ng.targetSize += delta + + err = ng.cherryManager.createNodes(ng.id, delta) + if err != nil { + return fmt.Errorf("could not increase cluster size: %v", err) + } + + return nil +} + +// DeleteNodes deletes a set of nodes chosen by the autoscaler. +func (ng *cherryNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { + // Batch simultaneous deletes on individual nodes + if err := ng.addNodesToDelete(nodes); err != nil { + return err + } + cachedSize := ng.deleteNodesCachedSize + + // The first of the parallel delete calls to obtain this lock will be the one to actually perform the deletion + ng.clusterUpdateMutex.Lock() + defer ng.clusterUpdateMutex.Unlock() + + // This goroutine has the clusterUpdateMutex, so will be the one + // to actually delete the nodes. While this goroutine waits, others + // will add their nodes to nodesToDelete and block at acquiring + // the clusterUpdateMutex lock. Once they get it, the deletion will + // already be done and they will return above at the check + // for len(ng.nodesToDelete) == 0. + time.Sleep(ng.deleteBatchingDelay) + + nodes = ng.getNodesToDelete() + if len(nodes) == 0 { + // Deletion was handled by another goroutine + return nil + } + + var nodeNames []string + for _, node := range nodes { + nodeNames = append(nodeNames, node.Name) + } + + // Double check that the total number of batched nodes for deletion will not take the node group below its minimum size + if cachedSize-len(nodes) < ng.MinSize() { + return fmt.Errorf("size decrease too large, desired:%d min:%d", cachedSize-len(nodes), ng.MinSize()) + } + klog.V(0).Infof("Deleting nodes: %v", nodeNames) + + var nodeRefs []NodeRef + for _, node := range nodes { + + // Find node IPs, can be multiple (IPv4 and IPv6) + var IPs []string + for _, addr := range node.Status.Addresses { + if addr.Type == apiv1.NodeInternalIP { + IPs = append(IPs, addr.Address) + } + } + nodeRefs = append(nodeRefs, NodeRef{ + Name: node.Name, + MachineID: node.Status.NodeInfo.MachineID, + ProviderID: node.Spec.ProviderID, + IPs: IPs, + }) + } + + if err := ng.cherryManager.deleteNodes(ng.id, nodeRefs, cachedSize-len(nodes)); err != nil { + return fmt.Errorf("manager error deleting nodes: %v", err) + } + + // Check the new node group size and store that as the new target + newSize, err := ng.cherryManager.nodeGroupSize(ng.id) + if err != nil { + // Set to the expected size as a fallback + ng.targetSize = cachedSize - len(nodes) + return fmt.Errorf("could not check new cluster size after scale down: %v", err) + } + ng.targetSize = newSize + + return nil +} + +// getNodesToDelete safely gets all of the nodes added to the delete queue. +// "safely", as in it locks, gets and then releases the queue. +func (ng *cherryNodeGroup) getNodesToDelete() []*apiv1.Node { + ng.nodesToDeleteMutex.Lock() + defer ng.nodesToDeleteMutex.Unlock() + nodes := make([]*apiv1.Node, len(ng.nodesToDelete)) + copy(nodes, ng.nodesToDelete) + ng.nodesToDelete = nil + return nodes +} + +// addNodesToDelete safely adds nodes to the delete queue. +// "safely", as in it locks, adds, and then releases the queue. +func (ng *cherryNodeGroup) addNodesToDelete(nodes []*v1.Node) error { + // Batch simultaneous deletes on individual nodes + ng.nodesToDeleteMutex.Lock() + defer ng.nodesToDeleteMutex.Unlock() + + // First get the node group size and store the value, so that any other parallel delete calls can use it + // without having to make the get request themselves. + // cachedSize keeps a local copy for this goroutine, so that ng.deleteNodesCachedSize is used + // only within the ng.nodesToDeleteMutex. + var ( + cachedSize int = ng.deleteNodesCachedSize + err error + ) + // if the cache is more than 10 seconds old, refresh it + if time.Since(ng.deleteNodesCachedSizeAt) > time.Second*10 { + cachedSize, err = ng.cherryManager.nodeGroupSize(ng.id) + if err != nil { + return fmt.Errorf("could not get current node count: %v", err) + } + ng.deleteNodesCachedSize = cachedSize + ng.deleteNodesCachedSizeAt = time.Now() + } + + // Check that these nodes would not make the batch delete more nodes than the minimum would allow + if cachedSize-len(ng.nodesToDelete)-len(nodes) < ng.MinSize() { + return fmt.Errorf("deleting nodes would take nodegroup below minimum size %d", ng.minSize) + } + // otherwise, add the nodes to the batch and release the lock + ng.nodesToDelete = append(ng.nodesToDelete, nodes...) + + return nil +} + +// DecreaseTargetSize decreases the cluster node_count in Cherry Servers. +func (ng *cherryNodeGroup) DecreaseTargetSize(delta int) error { + if delta >= 0 { + return fmt.Errorf("size decrease must be negative") + } + klog.V(0).Infof("Decreasing target size by %d, %d->%d", delta, ng.targetSize, ng.targetSize+delta) + ng.targetSize += delta + return fmt.Errorf("could not decrease target size") /*ng.cherryManager.updateNodeCount(ng.id, ng.targetSize)*/ +} + +// Id returns the node group ID +func (ng *cherryNodeGroup) Id() string { + return ng.id +} + +// Debug returns a string formatted with the node group's min, max and target sizes. +func (ng *cherryNodeGroup) Debug() string { + return fmt.Sprintf("%s min=%d max=%d target=%d", ng.id, ng.minSize, ng.maxSize, ng.targetSize) +} + +// Nodes returns a list of nodes that belong to this node group. +func (ng *cherryNodeGroup) Nodes() ([]cloudprovider.Instance, error) { + nodes, err := ng.cherryManager.getNodes(ng.id) + if err != nil { + return nil, fmt.Errorf("could not get nodes: %v", err) + } + var instances []cloudprovider.Instance + for _, node := range nodes { + instances = append(instances, cloudprovider.Instance{Id: node}) + } + return instances, nil +} + +// TemplateNodeInfo returns a node template for this node group. +func (ng *cherryNodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { + return ng.cherryManager.templateNodeInfo(ng.id) +} + +// Exist returns if this node group exists. +// Currently always returns true. +func (ng *cherryNodeGroup) Exist() bool { + return true +} + +// Create creates the node group on the cloud provider side. +func (ng *cherryNodeGroup) Create() (cloudprovider.NodeGroup, error) { + return nil, cloudprovider.ErrAlreadyExist +} + +// Delete deletes the node group on the cloud provider side. +func (ng *cherryNodeGroup) Delete() error { + return cloudprovider.ErrNotImplemented +} + +// Autoprovisioned returns if the nodegroup is autoprovisioned. +func (ng *cherryNodeGroup) Autoprovisioned() bool { + return false +} + +// MaxSize returns the maximum allowed size of the node group. +func (ng *cherryNodeGroup) MaxSize() int { + return ng.maxSize +} + +// MinSize returns the minimum allowed size of the node group. +func (ng *cherryNodeGroup) MinSize() int { + return ng.minSize +} + +// TargetSize returns the target size of the node group. +func (ng *cherryNodeGroup) TargetSize() (int, error) { + return ng.targetSize, nil +} + +// GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular +// NodeGroup. Returning a nil will result in using default options. +func (ng *cherryNodeGroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { + return nil, cloudprovider.ErrNotImplemented +} diff --git a/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group_test.go b/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group_test.go new file mode 100644 index 0000000000..f36f6fbdce --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/cherry_node_group_test.go @@ -0,0 +1,267 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cherryservers + +import ( + "encoding/json" + "fmt" + "io" + "math/rand" + "net/http" + "net/http/httptest" + "strconv" + "strings" + "testing" + "time" + + apiv1 "k8s.io/api/core/v1" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + + "github.com/stretchr/testify/assert" +) + +const ( + createCherryServerResponsePool2 = `` + deleteCherryServerResponsePool2 = `` + createCherryServerResponsePool3 = `` + deleteCherryServerResponsePool3 = `` +) + +func TestIncreaseDecreaseSize(t *testing.T) { + var m *cherryManagerRest + memServers := []Server{ + {ID: 1000, Name: "server-1000", Hostname: "k8s-cluster2-pool3-gndxdmmw", State: "active", Tags: map[string]string{"k8s-cluster": "cluster2", "k8s-nodepool": "pool3"}}, + {ID: 1001, Name: "server-1001", Hostname: "k8s-cluster2-master", State: "active", Tags: map[string]string{"k8s-cluster": "cluster2"}}, + } + mux := http.NewServeMux() + server := httptest.NewServer(mux) + defer server.Close() + assert.Equal(t, true, true) + if useRealEndpoint { + // If auth token set in env, hit the actual Cherry API + m = newTestCherryManagerRest(t, "") + } else { + // Set up a mock Cherry API + m = newTestCherryManagerRest(t, server.URL) + // the flow needs to match our actual calls below + mux.HandleFunc(fmt.Sprintf("/projects/%d/servers", m.nodePools["default"].projectID), func(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case "GET": + b, _ := json.Marshal(memServers) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(200) + w.Write(b) + return + case "POST": + b, err := io.ReadAll(r.Body) + if err != nil { + w.WriteHeader(500) + w.Write([]byte("could not read request body")) + return + } + var createRequest CreateServer + if err := json.Unmarshal(b, &createRequest); err != nil { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(400) + w.Write([]byte(`{"error": "invalid body"}`)) + return + } + planID := createRequest.PlanID + if err != nil { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(400) + w.Write([]byte(`{"error": "invalid plan ID"}`)) + return + } + if createRequest.ProjectID != m.nodePools["default"].projectID { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(400) + w.Write([]byte(`{"error": "mismatched project ID in body and path"}`)) + return + } + projectID := createRequest.ProjectID + if err != nil { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(400) + w.Write([]byte(`{"error": "invalid project ID"}`)) + return + } + server := Server{ + ID: rand.Intn(10000), + Name: createRequest.Hostname, + Hostname: createRequest.Hostname, + Plan: Plan{ID: planID}, + Project: Project{ID: projectID}, + Image: createRequest.Image, + Tags: *createRequest.Tags, + //UserData: createRequest.UserData, + } + memServers = append(memServers, server) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(201) + b, _ = json.Marshal(server) + w.Write(b) + return + } + }) + mux.HandleFunc("/servers/", func(w http.ResponseWriter, r *http.Request) { + // extract the ID + serverID := strings.Replace(r.URL.Path, "/servers/", "", 1) + id32, err := strconv.ParseInt(serverID, 10, 32) + if err != nil { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(400) + w.Write([]byte(`{"error": "invalid server ID"}`)) + return + } + var ( + index int = -1 + ) + for i, s := range memServers { + if s.ID == int(id32) { + index = i + } + } + + switch r.Method { + case "GET": + if index >= 0 { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(200) + b, _ := json.Marshal(memServers[index]) + w.Write(b) + return + } + w.WriteHeader(404) + case "DELETE": + memServers = append(memServers[:index], memServers[index+1:]...) + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(204) + w.Write([]byte("{}")) + } + }) + } + ngPool2 := newCherryNodeGroup(m, "pool2", 0, 10, 0, 30*time.Second, 2*time.Second) + ngPool3 := newCherryNodeGroup(m, "pool3", 0, 10, 0, 30*time.Second, 2*time.Second) + + // calls: listServers + n1Pool2, err := ngPool2.cherryManager.getNodeNames(ngPool2.id) + assert.NoError(t, err) + assert.Equal(t, int(0), len(n1Pool2)) + + // calls: listServers + n1Pool3, err := ngPool3.cherryManager.getNodeNames(ngPool3.id) + assert.NoError(t, err) + assert.Equal(t, int(1), len(n1Pool3)) + + existingNodesPool2 := make(map[string]bool) + existingNodesPool3 := make(map[string]bool) + + for _, node := range n1Pool2 { + existingNodesPool2[node] = true + } + + for _, node := range n1Pool3 { + existingNodesPool3[node] = true + } + + // Try to increase pool3 with negative size, this should return an error + // calls: (should error before any calls) + err = ngPool3.IncreaseSize(-1) + assert.Error(t, err) + + // Now try to increase the pool3 size by 1, that should work + // calls: listServers, createServer + err = ngPool3.IncreaseSize(1) + assert.NoError(t, err) + + if useRealEndpoint { + // If testing with actual API give it some time until the nodes bootstrap + time.Sleep(420 * time.Second) + } + + // calls: listServers + n2Pool3, err := ngPool3.cherryManager.getNodeNames(ngPool3.id) + assert.NoError(t, err) + // Assert that the nodepool3 size is now 2 + assert.Equal(t, int(2), len(n2Pool3)) + // calls: listServers + n2Pool3providers, err := ngPool3.cherryManager.getNodes(ngPool3.id) + assert.NoError(t, err) + // Asset that provider ID lengths matches names length + assert.Equal(t, len(n2Pool3providers), len(n2Pool3)) + + // Now try to increase the pool2 size by 1, that should work + // calls: listServers, createServer + err = ngPool2.IncreaseSize(1) + assert.NoError(t, err) + + if useRealEndpoint { + // If testing with actual API give it some time until the nodes bootstrap + time.Sleep(420 * time.Second) + } + + // calls: listServers + n2Pool2, err := ngPool2.cherryManager.getNodeNames(ngPool2.id) + assert.NoError(t, err) + // Assert that the nodepool2 size is now 1 + assert.Equal(t, int(1), len(n2Pool2)) + // calls: listServers + n2Pool2providers, err := ngPool2.cherryManager.getNodes(ngPool2.id) + assert.NoError(t, err) + // Asset that provider ID lengths matches names length + assert.Equal(t, len(n2Pool2providers), len(n2Pool2)) + + // Let's try to delete the new nodes + nodesPool2 := []*apiv1.Node{} + nodesPool3 := []*apiv1.Node{} + for i, node := range n2Pool2 { + if _, ok := existingNodesPool2[node]; !ok { + testNode := BuildTestNode(node, 1000, 1000) + testNode.Spec.ProviderID = n2Pool2providers[i] + nodesPool2 = append(nodesPool2, testNode) + } + } + for i, node := range n2Pool3 { + if _, ok := existingNodesPool3[node]; !ok { + testNode := BuildTestNode(node, 1000, 1000) + testNode.Spec.ProviderID = n2Pool3providers[i] + nodesPool3 = append(nodesPool3, testNode) + } + } + + err = ngPool2.DeleteNodes(nodesPool2) + assert.NoError(t, err) + + err = ngPool3.DeleteNodes(nodesPool3) + assert.NoError(t, err) + + // Wait a few seconds if talking to the actual Cherry API + if useRealEndpoint { + time.Sleep(10 * time.Second) + } + + // Make sure that there were no errors and the nodepool2 size is once again 0 + n3Pool2, err := ngPool2.cherryManager.getNodeNames(ngPool2.id) + assert.NoError(t, err) + assert.Equal(t, int(0), len(n3Pool2)) + + // Make sure that there were no errors and the nodepool3 size is once again 1 + n3Pool3, err := ngPool3.cherryManager.getNodeNames(ngPool3.id) + assert.NoError(t, err) + assert.Equal(t, int(1), len(n3Pool3)) +} diff --git a/cluster-autoscaler/cloudprovider/cherryservers/cherry_types.go b/cluster-autoscaler/cloudprovider/cherryservers/cherry_types.go new file mode 100644 index 0000000000..f0875bc341 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/cherry_types.go @@ -0,0 +1,264 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cherryservers + +// BGPRoute single server BGP route +type BGPRoute struct { + Subnet string `json:"subnet,omitempty"` + Active bool `json:"active,omitempty"` + Router string `json:"router,omitempty"` + Age string `json:"age,omitempty"` + Updated string `json:"updated,omitempty"` +} + +// ServerBGP status of BGP on a server +type ServerBGP struct { + Enabled bool `json:"enabled"` + Available bool `json:"available,omitempty"` + Status string `json:"status,omitempty"` + Routers int `json:"routers,omitempty"` + Connected int `json:"connected,omitempty"` + Limit int `json:"limit,omitempty"` + Active int `json:"active,omitempty"` + Routes []BGPRoute `json:"routes,omitempty"` + Updated string `json:"updated,omitempty"` +} + +// Project a CherryServers project +type Project struct { + ID int `json:"id,omitempty"` + Name string `json:"name,omitempty"` + Bgp ProjectBGP `json:"bgp,omitempty"` + Href string `json:"href,omitempty"` +} + +// Region a CherryServers region +type Region struct { + ID int `json:"id,omitempty"` + Name string `json:"name,omitempty"` + RegionIso2 string `json:"region_iso_2,omitempty"` + BGP RegionBGP `json:"bgp,omitempty"` + Href string `json:"href,omitempty"` +} + +// RegionBGP information about BGP in a region +type RegionBGP struct { + Hosts []string `json:"hosts,omitempty"` + Asn int `json:"asn,omitempty"` +} + +// ProjectBGP information about BGP on an individual project +type ProjectBGP struct { + Enabled bool `json:"enabled,omitempty"` + LocalASN int `json:"local_asn,omitempty"` +} + +// Plan a server plan +type Plan struct { + ID int `json:"id,omitempty"` + Name string `json:"name,omitempty"` + Custom bool `json:"custom,omitempty"` + Specs Specs `json:"specs,omitempty"` + Pricing []Pricing `json:"pricing,omitempty"` + AvailableRegions []AvailableRegions `json:"available_regions,omitempty"` +} + +// Plans represents a list of Cherry Servers plans +type Plans []Plan + +// Pricing price for a specific plan +type Pricing struct { + Price float32 `json:"price,omitempty"` + Taxed bool `json:"taxed,omitempty"` + Currency string `json:"currency,omitempty"` + Unit string `json:"unit,omitempty"` +} + +// AvailableRegions regions that are available to the user +type AvailableRegions struct { + ID int `json:"id,omitempty"` + Name string `json:"name,omitempty"` + RegionIso2 string `json:"region_iso_2,omitempty"` + StockQty int `json:"stock_qty,omitempty"` +} + +// AttachedTo what a resource is attached to +type AttachedTo struct { + Href string `json:"href"` +} + +// BlockStorage cloud block storage +type BlockStorage struct { + ID int `json:"id"` + Name string `json:"name"` + Href string `json:"href"` + Size int `json:"size"` + AllowEditSize bool `json:"allow_edit_size"` + Unit string `json:"unit"` + Description string `json:"description,omitempty"` + AttachedTo AttachedTo `json:"attached_to,omitempty"` + VlanID string `json:"vlan_id"` + VlanIP string `json:"vlan_ip"` + Initiator string `json:"initiator"` + DiscoveryIP string `json:"discovery_ip"` +} + +// AssignedTo assignment of a network floating IP to a server +type AssignedTo struct { + ID int `json:"id,omitempty"` + Name string `json:"name,omitempty"` + Href string `json:"href,omitempty"` + Hostname string `json:"hostname,omitempty"` + Image string `json:"image,omitempty"` + Region Region `json:"region,omitempty"` + State string `json:"state,omitempty"` + Pricing Pricing `json:"pricing,omitempty"` +} + +// RoutedTo routing of a floating IP to an underlying IP +type RoutedTo struct { + ID string `json:"id,omitempty"` + Address string `json:"address,omitempty"` + AddressFamily int `json:"address_family,omitempty"` + Cidr string `json:"cidr,omitempty"` + Gateway string `json:"gateway,omitempty"` + Type string `json:"type,omitempty"` + Region Region `json:"region,omitempty"` +} + +// IPAddresses individual IP address +type IPAddresses struct { + ID string `json:"id,omitempty"` + Address string `json:"address,omitempty"` + AddressFamily int `json:"address_family,omitempty"` + Cidr string `json:"cidr,omitempty"` + Gateway string `json:"gateway,omitempty"` + Type string `json:"type,omitempty"` + Region Region `json:"region,omitempty"` + RoutedTo RoutedTo `json:"routed_to,omitempty"` + AssignedTo AssignedTo `json:"assigned_to,omitempty"` + TargetedTo AssignedTo `json:"targeted_to,omitempty"` + Project Project `json:"project,omitempty"` + PtrRecord string `json:"ptr_record,omitempty"` + ARecord string `json:"a_record,omitempty"` + Tags map[string]string `json:"tags,omitempty"` + Href string `json:"href,omitempty"` +} + +// Server represents a Cherry Servers server +type Server struct { + ID int `json:"id,omitempty"` + Name string `json:"name,omitempty"` + Href string `json:"href,omitempty"` + Hostname string `json:"hostname,omitempty"` + Image string `json:"image,omitempty"` + SpotInstance bool `json:"spot_instance"` + BGP ServerBGP `json:"bgp,omitempty"` + Project Project `json:"project,omitempty"` + Region Region `json:"region,omitempty"` + State string `json:"state,omitempty"` + Plan Plan `json:"plan,omitempty"` + AvailableRegions AvailableRegions `json:"availableregions,omitempty"` + Pricing Pricing `json:"pricing,omitempty"` + IPAddresses []IPAddresses `json:"ip_addresses,omitempty"` + SSHKeys []SSHKeys `json:"ssh_keys,omitempty"` + Tags map[string]string `json:"tags,omitempty"` + Storage BlockStorage `json:"storage,omitempty"` + Created string `json:"created_at,omitempty"` + TerminationDate string `json:"termination_date,omitempty"` +} + +// SSHKeys an ssh key +type SSHKeys struct { + ID int `json:"id,omitempty"` + Label string `json:"label,omitempty"` + Key string `json:"key,omitempty"` + Fingerprint string `json:"fingerprint,omitempty"` + Updated string `json:"updated,omitempty"` + Created string `json:"created,omitempty"` + Href string `json:"href,omitempty"` +} + +// Cpus cpu information for a server +type Cpus struct { + Count int `json:"count,omitempty"` + Name string `json:"name,omitempty"` + Cores int `json:"cores,omitempty"` + Frequency float32 `json:"frequency,omitempty"` + Unit string `json:"unit,omitempty"` +} + +// Memory cpu information for a server +type Memory struct { + Count int `json:"count,omitempty"` + Total int `json:"total,omitempty"` + Unit string `json:"unit,omitempty"` + Name string `json:"name,omitempty"` +} + +// Nics network interface information for a server +type Nics struct { + Name string `json:"name,omitempty"` +} + +// Raid raid for block storage on a server +type Raid struct { + Name string `json:"name,omitempty"` +} + +// Storage amount of storage +type Storage struct { + Count int `json:"count,omitempty"` + Name string `json:"name,omitempty"` + Size float32 `json:"size,omitempty"` + Unit string `json:"unit,omitempty"` +} + +// Bandwidth total bandwidth available +type Bandwidth struct { + Name string `json:"name,omitempty"` +} + +// Specs aggregated specs for a server +type Specs struct { + Cpus Cpus `json:"cpus,omitempty"` + Memory Memory `json:"memory,omitempty"` + Storage []Storage `json:"storage,omitempty"` + Raid Raid `json:"raid,omitempty"` + Nics Nics `json:"nics,omitempty"` + Bandwidth Bandwidth `json:"bandwidth,omitempty"` +} + +// IPAddressCreateRequest represents a request to create a new IP address within a CreateServer request +type IPAddressCreateRequest struct { + AddressFamily int `json:"address_family"` + Public bool `json:"public"` +} + +// CreateServer represents a request to create a new Cherry Servers server. Used by createNodes +type CreateServer struct { + ProjectID int `json:"project_id,omitempty"` + PlanID int `json:"plan_id,omitempty"` + Hostname string `json:"hostname,omitempty"` + Image string `json:"image,omitempty"` + Region string `json:"region,omitempty"` + SSHKeys []int `json:"ssh_keys"` + IPAddresses []string `json:"ip_addresses"` + UserData string `json:"user_data,omitempty"` + Tags *map[string]string `json:"tags,omitempty"` + SpotInstance int `json:"spot_market,omitempty"` +} diff --git a/cluster-autoscaler/cloudprovider/cherryservers/examples/cluster-autoscaler-deployment.yaml b/cluster-autoscaler/cloudprovider/cherryservers/examples/cluster-autoscaler-deployment.yaml new file mode 100644 index 0000000000..a071b31e5a --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/examples/cluster-autoscaler-deployment.yaml @@ -0,0 +1,77 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + namespace: kube-system + labels: + app: cluster-autoscaler + spec: + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/master + # Node affinity is used to force cluster-autoscaler to stick + # to the master node. This allows the cluster to reliably downscale + # to zero worker nodes when needed. + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/master + operator: Exists + serviceAccountName: cluster-autoscaler + containers: + - name: cluster-autoscaler + image: k8s.gcr.io/autoscaling/cluster-autoscaler:latest + imagePullPolicy: Always + env: + - name: BOOTSTRAP_TOKEN_ID + valueFrom: + secretKeyRef: + name: bootstrap-token-cluster-autoscaler-cherry + key: token-id + - name: BOOTSTRAP_TOKEN_SECRET + valueFrom: + secretKeyRef: + name: bootstrap-token-cluster-autoscaler-cherry + key: token-secret + - name: CHERRY_AUTH_TOKEN + valueFrom: + secretKeyRef: + name: cluster-autoscaler-cherry + key: authtoken + # You can take advantage of multiple nodepools by adding + # extra arguments on the cluster-autoscaler command. + # e.g. for pool1, pool2 + # --nodes=0:10:pool1 + # --nodes=0:10:pool2 + command: + - ./cluster-autoscaler + - --alsologtostderr + - --cluster-name=cluster1 + - --cloud-config=/config/cloud-config + - --cloud-provider=cherryservers + - --nodes=0:10:pool1 + - --nodes=0:10:pool2 + - --scale-down-unneeded-time=1m0s + - --scale-down-delay-after-add=1m0s + - --scale-down-unready-time=1m0s + - --v=2 + volumeMounts: + - name: cloud-config + mountPath: /config + readOnly: true + volumes: + - name: cloud-config + secret: + secretName: cluster-autoscaler-cloud-config diff --git a/cluster-autoscaler/cloudprovider/cherryservers/examples/cluster-autoscaler-rbac.yaml b/cluster-autoscaler/cloudprovider/cherryservers/examples/cluster-autoscaler-rbac.yaml new file mode 100644 index 0000000000..238314faf9 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/examples/cluster-autoscaler-rbac.yaml @@ -0,0 +1,113 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler + name: cluster-autoscaler + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "update"] + - apiGroups: [""] + resources: + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + verbs: ["watch", "list", "get"] + - apiGroups: ["extensions"] + resources: ["replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["statefulsets", "replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "csinodes"] + verbs: ["watch", "list", "get"] + - apiGroups: ["batch", "extensions"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "patch"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["*"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create","list","watch"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] + verbs: ["delete", "get", "update", "watch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system diff --git a/cluster-autoscaler/cloudprovider/cherryservers/examples/cluster-autoscaler-secret.yaml b/cluster-autoscaler/cloudprovider/cherryservers/examples/cluster-autoscaler-secret.yaml new file mode 100644 index 0000000000..33a80c70e7 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/examples/cluster-autoscaler-secret.yaml @@ -0,0 +1,68 @@ +--- +apiVersion: v1 +data: + authtoken: YOUR_CHERRY_AUTHTOKEN +kind: Secret +metadata: + name: cluster-autoscaler-cherry + namespace: kube-system +type: Opaque +--- +apiVersion: v1 +kind: Secret +metadata: + name: cluster-autoscaler-cloud-config + namespace: kube-system +type: Opaque +stringData: + # kubeadm, kubelet, kubectl are pinned to version 1.23.1 + # The version can be altered by decoding the cloudinit and updating it to + # the desired version + # In the cloud-config you must always have a valid default nodegroup + cloud-config: |- + [nodegroupdef "default"] + project-id=YOUR_CHERRYSERVERS_PROJECT_ID + api-server-endpoint=YOUR_KUBERNETES_API_IP_ADDRESS:YOUR_KUBERNETES_API_PORT + region=EU-Nord-1 + os=ubuntu_18_04 + plan=113 + cloudinit=IyEvYmluL2Jhc2gKZXhwb3J0IERFQklBTl9GUk9OVEVORD1ub25pbnRlcmFjdGl2ZQpleHBvcnQgSzhTX1ZFUlNJT049MS4yMy4xCmFwdC1nZXQgdXBkYXRlICYmIGFwdC1nZXQgaW5zdGFsbCAteSBhcHQtdHJhbnNwb3J0LWh0dHBzIGNhLWNlcnRpZmljYXRlcyBjdXJsIHNvZnR3YXJlLXByb3BlcnRpZXMtY29tbW9uCmN1cmwgLWZzU0wgaHR0cHM6Ly9kb3dubG9hZC5kb2NrZXIuY29tL2xpbnV4L3VidW50dS9ncGcgfCBhcHQta2V5IGFkZCAtCmN1cmwgLXMgaHR0cHM6Ly9wYWNrYWdlcy5jbG91ZC5nb29nbGUuY29tL2FwdC9kb2MvYXB0LWtleS5ncGcgfCBhcHQta2V5IGFkZCAtCmNhdCA8PEVPRiA+L2V0Yy9hcHQvc291cmNlcy5saXN0LmQva3ViZXJuZXRlcy5saXN0CmRlYiBodHRwczovL2FwdC5rdWJlcm5ldGVzLmlvLyBrdWJlcm5ldGVzLXhlbmlhbCBtYWluCkVPRgphZGQtYXB0LXJlcG9zaXRvcnkgICAiZGViIFthcmNoPWFtZDY0XSBodHRwczovL2Rvd25sb2FkLmRvY2tlci5jb20vbGludXgvdWJ1bnR1ICAgJChsc2JfcmVsZWFzZSAtY3MpICAgc3RhYmxlIgphcHQtZ2V0IHVwZGF0ZQphcHQtZ2V0IHVwZ3JhZGUgLXkKYXB0LWdldCBpbnN0YWxsIC15IGt1YmVsZXQ9JHtLOHNfVkVSU0lPTn0tMDAga3ViZWFkbT0ke0s4c19WRVJTSU9OfS0wMCBrdWJlY3RsPSR7SzhzX1ZFUlNJT059LTAwCmFwdC1tYXJrIGhvbGQga3ViZWxldCBrdWJlYWRtIGt1YmVjdGwKY3VybCAtZnNTTCBodHRwczovL2Rvd25sb2FkLmRvY2tlci5jb20vbGludXgvdWJ1bnR1L2dwZyB8IGFwdC1rZXkgYWRkIC0KYWRkLWFwdC1yZXBvc2l0b3J5ICJkZWIgW2FyY2g9YW1kNjRdIGh0dHBzOi8vZG93bmxvYWQuZG9ja2VyLmNvbS9saW51eC91YnVudHUgYmlvbmljIHN0YWJsZSIKYXB0IHVwZGF0ZQphcHQgaW5zdGFsbCAteSBkb2NrZXItY2U9MTguMDYuMn5jZX4zLTB+dWJ1bnR1CmNhdCA+IC9ldGMvZG9ja2VyL2RhZW1vbi5qc29uIDw8RU9GCnsKICAiZXhlYy1vcHRzIjogWyJuYXRpdmUuY2dyb3VwZHJpdmVyPXN5c3RlbWQiXSwKICAibG9nLWRyaXZlciI6ICJqc29uLWZpbGUiLAogICJsb2ctb3B0cyI6IHsKICAgICJtYXgtc2l6ZSI6ICIxMDBtIgogIH0sCiAgInN0b3JhZ2UtZHJpdmVyIjogIm92ZXJsYXkyIgp9CkVPRgpta2RpciAtcCAvZXRjL3N5c3RlbWQvc3lzdGVtL2RvY2tlci5zZXJ2aWNlLmQKc3lzdGVtY3RsIGRhZW1vbi1yZWxvYWQKc3lzdGVtY3RsIHJlc3RhcnQgZG9ja2VyCnN3YXBvZmYgLWEKbXYgL2V0Yy9mc3RhYiAvZXRjL2ZzdGFiLm9sZCAmJiBncmVwIC12IHN3YXAgL2V0Yy9mc3RhYi5vbGQgPiAvZXRjL2ZzdGFiCmNhdCA8PEVPRiB8IHRlZSAvZXRjL2RlZmF1bHQva3ViZWxldApLVUJFTEVUX0VYVFJBX0FSR1M9LS1jbG91ZC1wcm92aWRlcj1leHRlcm5hbApFT0YKa3ViZWFkbSBqb2luIC0tZGlzY292ZXJ5LXRva2VuLXVuc2FmZS1za2lwLWNhLXZlcmlmaWNhdGlvbiAtLXRva2VuIHt7LkJvb3RzdHJhcFRva2VuSUR9fS57ey5Cb290c3RyYXBUb2tlblNlY3JldH19IHt7LkFQSVNlcnZlckVuZHBvaW50fX0K + hostname-pattern=k8s-{{.ClusterName}}-{{.NodeGroup}}-{{.RandString8}} + + [nodegroupdef "pool2"] + project-id=YOUR_CHERRYSERVERS_PROJECT_ID + api-server-endpoint=YOUR_KUBERNETES_API_IP_ADDRESS:YOUR_KUBERNETES_API_PORT + region=EU-Nord-1 + os=ubuntu_18_04 + plan=113 + cloudinit=IyEvYmluL2Jhc2gKZXhwb3J0IERFQklBTl9GUk9OVEVORD1ub25pbnRlcmFjdGl2ZQpleHBvcnQgSzhTX1ZFUlNJT049MS4yMy4xCmFwdC1nZXQgdXBkYXRlICYmIGFwdC1nZXQgaW5zdGFsbCAteSBhcHQtdHJhbnNwb3J0LWh0dHBzIGNhLWNlcnRpZmljYXRlcyBjdXJsIHNvZnR3YXJlLXByb3BlcnRpZXMtY29tbW9uCmN1cmwgLWZzU0wgaHR0cHM6Ly9kb3dubG9hZC5kb2NrZXIuY29tL2xpbnV4L3VidW50dS9ncGcgfCBhcHQta2V5IGFkZCAtCmN1cmwgLXMgaHR0cHM6Ly9wYWNrYWdlcy5jbG91ZC5nb29nbGUuY29tL2FwdC9kb2MvYXB0LWtleS5ncGcgfCBhcHQta2V5IGFkZCAtCmNhdCA8PEVPRiA+L2V0Yy9hcHQvc291cmNlcy5saXN0LmQva3ViZXJuZXRlcy5saXN0CmRlYiBodHRwczovL2FwdC5rdWJlcm5ldGVzLmlvLyBrdWJlcm5ldGVzLXhlbmlhbCBtYWluCkVPRgphZGQtYXB0LXJlcG9zaXRvcnkgICAiZGViIFthcmNoPWFtZDY0XSBodHRwczovL2Rvd25sb2FkLmRvY2tlci5jb20vbGludXgvdWJ1bnR1ICAgJChsc2JfcmVsZWFzZSAtY3MpICAgc3RhYmxlIgphcHQtZ2V0IHVwZGF0ZQphcHQtZ2V0IHVwZ3JhZGUgLXkKYXB0LWdldCBpbnN0YWxsIC15IGt1YmVsZXQ9JHtLOHNfVkVSU0lPTn0tMDAga3ViZWFkbT0ke0s4c19WRVJTSU9OfS0wMCBrdWJlY3RsPSR7SzhzX1ZFUlNJT059LTAwCmFwdC1tYXJrIGhvbGQga3ViZWxldCBrdWJlYWRtIGt1YmVjdGwKY3VybCAtZnNTTCBodHRwczovL2Rvd25sb2FkLmRvY2tlci5jb20vbGludXgvdWJ1bnR1L2dwZyB8IGFwdC1rZXkgYWRkIC0KYWRkLWFwdC1yZXBvc2l0b3J5ICJkZWIgW2FyY2g9YW1kNjRdIGh0dHBzOi8vZG93bmxvYWQuZG9ja2VyLmNvbS9saW51eC91YnVudHUgYmlvbmljIHN0YWJsZSIKYXB0IHVwZGF0ZQphcHQgaW5zdGFsbCAteSBkb2NrZXItY2U9MTguMDYuMn5jZX4zLTB+dWJ1bnR1CmNhdCA+IC9ldGMvZG9ja2VyL2RhZW1vbi5qc29uIDw8RU9GCnsKICAiZXhlYy1vcHRzIjogWyJuYXRpdmUuY2dyb3VwZHJpdmVyPXN5c3RlbWQiXSwKICAibG9nLWRyaXZlciI6ICJqc29uLWZpbGUiLAogICJsb2ctb3B0cyI6IHsKICAgICJtYXgtc2l6ZSI6ICIxMDBtIgogIH0sCiAgInN0b3JhZ2UtZHJpdmVyIjogIm92ZXJsYXkyIgp9CkVPRgpta2RpciAtcCAvZXRjL3N5c3RlbWQvc3lzdGVtL2RvY2tlci5zZXJ2aWNlLmQKc3lzdGVtY3RsIGRhZW1vbi1yZWxvYWQKc3lzdGVtY3RsIHJlc3RhcnQgZG9ja2VyCnN3YXBvZmYgLWEKbXYgL2V0Yy9mc3RhYiAvZXRjL2ZzdGFiLm9sZCAmJiBncmVwIC12IHN3YXAgL2V0Yy9mc3RhYi5vbGQgPiAvZXRjL2ZzdGFiCmNhdCA8PEVPRiB8IHRlZSAvZXRjL2RlZmF1bHQva3ViZWxldApLVUJFTEVUX0VYVFJBX0FSR1M9LS1jbG91ZC1wcm92aWRlcj1leHRlcm5hbApFT0YKa3ViZWFkbSBqb2luIC0tZGlzY292ZXJ5LXRva2VuLXVuc2FmZS1za2lwLWNhLXZlcmlmaWNhdGlvbiAtLXRva2VuIHt7LkJvb3RzdHJhcFRva2VuSUR9fS57ey5Cb290c3RyYXBUb2tlblNlY3JldH19IHt7LkFQSVNlcnZlckVuZHBvaW50fX0K + hostname-pattern=k8s-{{.ClusterName}}-{{.NodeGroup}}-{{.RandString8}} +--- +# The following secret is only required when using bootstrap tokens in cloudinit +# like in the above example. For more info on bootstrap tokens check +# https://kubernetes.io/docs/reference/access-authn-authz/bootstrap-tokens/ +# IMPORTANT: change the token-id & token-secret values below before applying +apiVersion: v1 +kind: Secret +type: bootstrap.kubernetes.io/token +metadata: + name: bootstrap-token-cluster-autoscaler-cherry + namespace: kube-system +stringData: + description: "The default bootstrap token used by cluster-autoscaler on Cherry Servers." + + # Token ID and secret. Required if using bootstrap tokens in cloudinit (e.g. with kubeadm). + # token-id must match the regular expression [a-z0-9]{6} + token-id: YOUR_TOKEN_ID + # token-secret must match the regular expression [a-z0-9]{16} + token-secret: YOUR_TOKEN_SECRET + + # Expiration. Optional. + # expiration: 2020-03-10T03:22:11Z + + # Allowed usages. + usage-bootstrap-authentication: "true" + usage-bootstrap-signing: "true" + + # Extra groups to authenticate the token as. Must start with "system:bootstrappers:" + auth-extra-groups: system:bootstrappers:kubeadm:default-node-token,system:bootstrappers:worker,system:bootstrappers:ingress diff --git a/cluster-autoscaler/cloudprovider/cherryservers/examples/cluster-autoscaler-svcaccount.yaml b/cluster-autoscaler/cloudprovider/cherryservers/examples/cluster-autoscaler-svcaccount.yaml new file mode 100644 index 0000000000..b656fc5eb7 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/cherryservers/examples/cluster-autoscaler-svcaccount.yaml @@ -0,0 +1,77 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-autoscaler-role +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "update"] + - apiGroups: [""] + resources: + - "namespaces" + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + verbs: ["watch", "list", "get"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["daemonsets", "replicasets", "statefulsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "csinodes"] + verbs: ["watch", "list", "get"] + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create","list","watch"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] + verbs: ["delete", "get", "update"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["create"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler-rolebinding + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler-role +subjects: + - kind: ServiceAccount + name: cluster-autoscaler-account + namespace: kube-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cluster-autoscaler-account + namespace: kube-system diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go index 8f2539d3d5..372bd76809 100644 --- a/cluster-autoscaler/cloudprovider/cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/cloud_provider.go @@ -40,6 +40,8 @@ const ( BizflyCloudProviderName = "bizflycloud" // BrightboxProviderName gets the provider name of brightbox BrightboxProviderName = "brightbox" + // CherryServersProviderName gets the provider name of cherry servers + CherryServersProviderName = "cherryservers" // CloudStackProviderName gets the provider name of cloudstack CloudStackProviderName = "cloudstack" // ClusterAPIProviderName gets the provider name of clusterapi